Online.Bandit.ArrayProbSpace

`streamMeasure`🔗

DefinitionBandits.streamMeasure

Details

Measure of an infinite stream of rewards from each action.

def

Bandits.streamMeasure.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R) : MeasureTheory.Measure (ℕ → 𝓐 → R)
Bandits.streamMeasure.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R) :
  MeasureTheory.Measure (ℕ → 𝓐 → R)

Code

noncomputable
def streamMeasure (ν : Kernel 𝓐 R) : Measure (ℕ → 𝓐 → R) :=
  Measure.infinitePi fun _ ↦ Measure.infinitePi ν

Used by (56)

Actions: Source · Open Issue

`instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel`🔗

InstanceBandits.instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel

Details

No docstring.

theorem

Bandits.instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  MeasureTheory.IsProbabilityMeasure (streamMeasure ν)
Bandits.instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  MeasureTheory.IsProbabilityMeasure
    (streamMeasure ν)

Code

instance (ν : Kernel 𝓐 R) [IsMarkovKernel ν] : IsProbabilityMeasure (streamMeasure ν)

Type uses (1)

streamMeasure

Used by (22)

Actions: Source · Open Issue

Proof

by
  unfold streamMeasure
  infer_instance

`hasLaw_eval_infinitePi`🔗

LemmahasLaw_eval_infinitePi

Details

No docstring.

theorem

hasLaw_eval_infinitePi.{u_3, u_4} {ι : Type u_3} {X : ι → Type u_4}
  {mX : (i : ι) → MeasurableSpace (X i)}
  (μ : (i : ι) → MeasureTheory.Measure (X i))
  [hμ : ∀ (i : ι), MeasureTheory.IsProbabilityMeasure (μ i)] (i : ι) :
  ProbabilityTheory.HasLaw (Function.eval i) (μ i)
    (MeasureTheory.Measure.infinitePi μ)
hasLaw_eval_infinitePi.{u_3, u_4}
  {ι : Type u_3} {X : ι → Type u_4}
  {mX : (i : ι) → MeasurableSpace (X i)}
  (μ :
    (i : ι) → MeasureTheory.Measure (X i))
  [hμ :
    ∀ (i : ι),
      MeasureTheory.IsProbabilityMeasure
        (μ i)]
  (i : ι) :
  ProbabilityTheory.HasLaw
    (Function.eval i) (μ i)
    (MeasureTheory.Measure.infinitePi μ)

Code

lemma _root_.hasLaw_eval_infinitePi {ι : Type*} {X : ι → Type*} {mX : ∀ i, MeasurableSpace (X i)}
  (μ : (i : ι) → Measure (X i)) [hμ : ∀ i, IsProbabilityMeasure (μ i)] (i : ι) :
    HasLaw (Function.eval i) (μ i) (Measure.infinitePi μ) where
  aemeasurable

Used by (2)

Actions: Source · Open Issue

Proof

Measurable.aemeasurable (by fun_prop)
  map_eq := by exact (measurePreserving_eval_infinitePi μ i).map_eq

`hasLaw_eval_streamMeasure`🔗

LemmaBandits.hasLaw_eval_streamMeasure

Details

No docstring.

theorem

Bandits.hasLaw_eval_streamMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasLaw (fun h => h n)
    (MeasureTheory.Measure.infinitePi ⇑ν) (streamMeasure ν)
Bandits.hasLaw_eval_streamMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasLaw (fun h => h n)
    (MeasureTheory.Measure.infinitePi ⇑ν)
    (streamMeasure ν)

Code

lemma hasLaw_eval_streamMeasure (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasLaw (fun h : ℕ → 𝓐 → R ↦ h n) (Measure.infinitePi ν) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (1)

hasLaw_eval_infinitePi

Used by (1)

hasLaw_eval_eval_streamMeasure

Actions: Source · Open Issue

Proof

hasLaw_eval_infinitePi (fun _ ↦ Measure.infinitePi ν) n

`hasLaw_eval_eval_streamMeasure`🔗

LemmaBandits.hasLaw_eval_eval_streamMeasure

Details

No docstring.

theorem

Bandits.hasLaw_eval_eval_streamMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
  ProbabilityTheory.HasLaw (fun h => h n a) (ν a) (streamMeasure ν)
Bandits.hasLaw_eval_eval_streamMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) (a : 𝓐) :
  ProbabilityTheory.HasLaw
    (fun h => h n a) (ν a)
    (streamMeasure ν)

Code

lemma hasLaw_eval_eval_streamMeasure (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
    HasLaw (fun h : ℕ → 𝓐 → R ↦ h n a) (ν a) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (2)

Used by (2)

Actions: Source · Open Issue

Proof

(hasLaw_eval_infinitePi ν a).comp (hasLaw_eval_streamMeasure ν n)

`identDistrib_eval_eval_id_streamMeasure`🔗

LemmaBandits.identDistrib_eval_eval_id_streamMeasure

Details

No docstring.

theorem

Bandits.identDistrib_eval_eval_id_streamMeasure.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
  ProbabilityTheory.IdentDistrib (fun h => h n a) id (streamMeasure ν)
    (ν a)
Bandits.identDistrib_eval_eval_id_streamMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) (a : 𝓐) :
  ProbabilityTheory.IdentDistrib
    (fun h => h n a) id (streamMeasure ν)
    (ν a)

Code

lemma identDistrib_eval_eval_id_streamMeasure (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
    IdentDistrib (fun h : ℕ → 𝓐 → R ↦ h n a) id (streamMeasure ν) (ν a) where
  aemeasurable_fst

Type uses (1)

streamMeasure

Body uses (1)

hasLaw_eval_eval_streamMeasure

Used by (7)

Actions: Source · Open Issue

Proof

Measurable.aemeasurable (by fun_prop)
  aemeasurable_snd := Measurable.aemeasurable (by fun_prop)
  map_eq := by
    rw [← (hasLaw_eval_eval_streamMeasure ν n a).map_eq,
      Measure.map_map (by fun_prop) (by fun_prop)]
    simp

`integrable_eval_streamMeasure`🔗

LemmaBandits.integrable_eval_streamMeasure

Details

No docstring.

theorem

Bandits.integrable_eval_streamMeasure.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} (ν : ProbabilityTheory.Kernel 𝓐 ℝ)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : 𝓐)
  (h_int : MeasureTheory.Integrable id (ν a)) :
  MeasureTheory.Integrable (fun h => h n a) (streamMeasure ν)
Bandits.integrable_eval_streamMeasure.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  (ν : ProbabilityTheory.Kernel 𝓐 ℝ)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) (a : 𝓐)
  (h_int :
    MeasureTheory.Integrable id (ν a)) :
  MeasureTheory.Integrable
    (fun h => h n a) (streamMeasure ν)

Code

lemma integrable_eval_streamMeasure (ν : Kernel 𝓐 ℝ) [IsMarkovKernel ν] (n : ℕ) (a : 𝓐)
    (h_int : Integrable id (ν a)) :
    Integrable (fun h : ℕ → 𝓐 → ℝ ↦ h n a) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (2)

Actions: Source · Open Issue

Proof

Integrable.congr_identDistrib h_int (identDistrib_eval_eval_id_streamMeasure ν n a).symm

`integral_eval_streamMeasure`🔗

LemmaBandits.integral_eval_streamMeasure

Details

No docstring.

theorem

Bandits.integral_eval_streamMeasure.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} (ν : ProbabilityTheory.Kernel 𝓐 ℝ)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
  ∫ (h : ℕ → 𝓐 → ℝ), h n a ∂streamMeasure ν = ∫ (x : ℝ), id x ∂ν a
Bandits.integral_eval_streamMeasure.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  (ν : ProbabilityTheory.Kernel 𝓐 ℝ)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) (a : 𝓐) :
  ∫ (h : ℕ → 𝓐 → ℝ),
      h n a ∂streamMeasure ν =
    ∫ (x : ℝ), id x ∂ν a

Code

lemma integral_eval_streamMeasure (ν : Kernel 𝓐 ℝ) [IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
    ∫ h, h n a ∂(streamMeasure ν) = (ν a)[id]

Type uses (1)

streamMeasure

Body uses (1)

hasLaw_eval_eval_streamMeasure

Used by (1)

probReal_sum_le_sum_streamMeasure

Actions: Source · Open Issue

Proof

by
  calc ∫ h, h n a ∂(streamMeasure ν)
  _ = ∫ x, x ∂((streamMeasure ν).map (fun h ↦ h n a)) := by
    rw [integral_map (Measurable.aemeasurable (by fun_prop)) (by fun_prop)]
  _ = (ν a)[id] := by simp [(hasLaw_eval_eval_streamMeasure ν n a).map_eq]

`iIndepFun_eval_streamMeasure'`🔗

LemmaBandits.iIndepFun_eval_streamMeasure'

Details

No docstring.

theorem

Bandits.iIndepFun_eval_streamMeasure'.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.iIndepFun (fun n ω => ω n) (streamMeasure ν)
Bandits.iIndepFun_eval_streamMeasure'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.iIndepFun
    (fun n ω => ω n) (streamMeasure ν)

Code

lemma iIndepFun_eval_streamMeasure' (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    iIndepFun (fun n ω ↦ ω n) (streamMeasure ν)

Type uses (1)

streamMeasure

Used by (1)

iIndepFun_eval_streamMeasure''

Actions: Source · Open Issue

Proof

iIndepFun_infinitePi (P := fun (_ : ℕ) ↦ Measure.infinitePi ν) (Ω := fun _ ↦ 𝓐 → R)
    (X := fun i u ↦ u) (fun i ↦ by fun_prop)

`iIndepFun_eval_streamMeasure''`🔗

LemmaBandits.iIndepFun_eval_streamMeasure''

Details

No docstring.

theorem

Bandits.iIndepFun_eval_streamMeasure''.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) :
  ProbabilityTheory.iIndepFun (fun n ω => ω n a) (streamMeasure ν)
Bandits.iIndepFun_eval_streamMeasure''.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) :
  ProbabilityTheory.iIndepFun
    (fun n ω => ω n a) (streamMeasure ν)

Code

lemma iIndepFun_eval_streamMeasure'' (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (a : 𝓐) :
    iIndepFun (fun n ω ↦ ω n a) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (1)

iIndepFun_eval_streamMeasure'

Used by (5)

Actions: Source · Open Issue

Proof

(iIndepFun_eval_streamMeasure' ν).comp (g := fun i ω ↦ ω a) (by fun_prop)

`iIndepFun_eval_streamMeasure`🔗

LemmaBandits.iIndepFun_eval_streamMeasure

Details

No docstring.

theorem

Bandits.iIndepFun_eval_streamMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.iIndepFun (fun p ω => ω (Prod.fst p) (Prod.snd p))
    (streamMeasure ν)
Bandits.iIndepFun_eval_streamMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.iIndepFun
    (fun p ω =>
      ω (Prod.fst p) (Prod.snd p))
    (streamMeasure ν)

Code

lemma iIndepFun_eval_streamMeasure (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    iIndepFun (fun (p : ℕ × 𝓐) ω ↦ ω p.1 p.2) (streamMeasure ν)

Type uses (1)

streamMeasure

Used by (1)

indepFun_eval_streamMeasure

Actions: Source · Open Issue

Proof

iIndepFun_uncurry_infinitePi' (X := fun _ _ ↦ id) (fun _ ↦ ν) (by fun_prop)

`indepFun_eval_streamMeasure`🔗

LemmaBandits.indepFun_eval_streamMeasure

Details

No docstring.

theorem

Bandits.indepFun_eval_streamMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] {n m : ℕ} {a b : 𝓐}
  (h : n ≠ m ∨ a ≠ b) :
  ProbabilityTheory.IndepFun (fun ω => ω n a) (fun ω => ω m b)
    (streamMeasure ν)
Bandits.indepFun_eval_streamMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  {n m : ℕ} {a b : 𝓐}
  (h : n ≠ m ∨ a ≠ b) :
  ProbabilityTheory.IndepFun
    (fun ω => ω n a) (fun ω => ω m b)
    (streamMeasure ν)

Code

lemma indepFun_eval_streamMeasure (ν : Kernel 𝓐 R) [IsMarkovKernel ν] {n m : ℕ} {a b : 𝓐}
    (h : n ≠ m ∨ a ≠ b) :
    IndepFun (fun ω ↦ ω n a) (fun ω ↦ ω m b) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (1)

iIndepFun_eval_streamMeasure

Actions: Source · Open Issue

Proof

by
  change IndepFun (fun ω ↦ ω (n, a).1 (n, a).2) (fun ω ↦ ω (m, b).1 (m, b).2)
    (streamMeasure ν)
  exact (iIndepFun_eval_streamMeasure ν).indepFun (by grind)

`indepFun_eval_streamMeasure'`🔗

LemmaBandits.indepFun_eval_streamMeasure'

Details

No docstring.

theorem

Bandits.indepFun_eval_streamMeasure'.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] {a b : 𝓐} (h : a ≠ b) :
  ProbabilityTheory.IndepFun (fun ω n => ω n a) (fun ω n => ω n b)
    (streamMeasure ν)
Bandits.indepFun_eval_streamMeasure'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  {a b : 𝓐} (h : a ≠ b) :
  ProbabilityTheory.IndepFun
    (fun ω n => ω n a) (fun ω n => ω n b)
    (streamMeasure ν)

Code

lemma indepFun_eval_streamMeasure' (ν : Kernel 𝓐 R) [IsMarkovKernel ν] {a b : 𝓐} (h : a ≠ b) :
    IndepFun (fun ω n ↦ ω n a) (fun ω n ↦ ω n b) (streamMeasure ν)

Type uses (1)

streamMeasure

Body uses (1)

indepFun_proj_infinitePi_infinitePi

Used by (1)

probReal_sum_le_sum_streamMeasure

Actions: Source · Open Issue

Proof

indepFun_proj_infinitePi_infinitePi h

`probSpace`🔗

DefinitionBandits.ArrayModel.probSpace

Details

Probability space for the array model of stochastic bandits.

def

Bandits.ArrayModel.probSpace.{u_1, u_2} (𝓐 : Type u_1) (R : Type u_2) :
  Type (max u_1 u_2)
Bandits.ArrayModel.probSpace.{u_1, u_2}
  (𝓐 : Type u_1) (R : Type u_2) :
  Type (max u_1 u_2)

Code

def probSpace : Type _ := (ℕ → I) × (ℕ → 𝓐 → R)

Used by (64)

Actions: Source · Open Issue

`instMeasurableSpaceProbSpace`🔗

InstanceBandits.ArrayModel.instMeasurableSpaceProbSpace

Details

No docstring.

def

Bandits.ArrayModel.instMeasurableSpaceProbSpace.{u_3, u_4}
  {𝓐 : Type u_3} {R : Type u_4} [MeasurableSpace R] :
  MeasurableSpace (probSpace 𝓐 R)
Bandits.ArrayModel.instMeasurableSpaceProbSpace.{u_3,
    u_4}
  {𝓐 : Type u_3} {R : Type u_4}
  [MeasurableSpace R] :
  MeasurableSpace (probSpace 𝓐 R)

Code

instance {𝓐 R : Type*} [MeasurableSpace R] : MeasurableSpace (probSpace 𝓐 R)

Type uses (1)

probSpace

Used by (41)

Actions: Source · Open Issue

Proof

inferInstanceAs (MeasurableSpace ((ℕ → I) × (ℕ → 𝓐 → R)))

`instStandardBorelSpaceProbSpaceOfCountable`🔗

InstanceBandits.ArrayModel.instStandardBorelSpaceProbSpaceOfCountable

Details

No docstring.

theorem

Bandits.ArrayModel.instStandardBorelSpaceProbSpaceOfCountable.{u_3, u_4}
  {𝓐 : Type u_3} {R : Type u_4} [Countable 𝓐] [MeasurableSpace R]
  [StandardBorelSpace R] : StandardBorelSpace (probSpace 𝓐 R)
Bandits.ArrayModel.instStandardBorelSpaceProbSpaceOfCountable.{u_3,
    u_4}
  {𝓐 : Type u_3} {R : Type u_4}
  [Countable 𝓐] [MeasurableSpace R]
  [StandardBorelSpace R] :
  StandardBorelSpace (probSpace 𝓐 R)

Code

instance {𝓐 R : Type*} [Countable 𝓐] [MeasurableSpace R] [StandardBorelSpace R] :
    StandardBorelSpace (probSpace 𝓐 R)

Type uses (2)

Used by (2)

Actions: Source · Open Issue

Proof

inferInstanceAs (StandardBorelSpace ((ℕ → I) × (ℕ → 𝓐 → R)))

`arrayMeasure`🔗

DefinitionBandits.ArrayModel.arrayMeasure

Details

Probability measure for the array model of stochastic bandits.

def

Bandits.ArrayModel.arrayMeasure.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R) :
  MeasureTheory.Measure (probSpace 𝓐 R)
Bandits.ArrayModel.arrayMeasure.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R) :
  MeasureTheory.Measure (probSpace 𝓐 R)

Code

noncomputable
def arrayMeasure (ν : Kernel 𝓐 R) : Measure (probSpace 𝓐 R) :=
  (Measure.infinitePi fun _ ↦ volume).prod (streamMeasure ν)

Type uses (2)

Body uses (1)

streamMeasure

Used by (29)

Actions: Source · Open Issue

`instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel`🔗

InstanceBandits.ArrayModel.instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel

Details

No docstring.

theorem

Bandits.ArrayModel.instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  MeasureTheory.IsProbabilityMeasure (arrayMeasure ν)
Bandits.ArrayModel.instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  MeasureTheory.IsProbabilityMeasure
    (arrayMeasure ν)

Code

instance (ν : Kernel 𝓐 R) [IsMarkovKernel ν] : IsProbabilityMeasure (arrayMeasure ν)

Type uses (3)

Body uses (2)

Used by (10)

Actions: Source · Open Issue

Proof

Measure.prod.instIsProbabilityMeasure _ _

`initAlgFunction`🔗

DefinitionBandits.ArrayModel.initAlgFunction

Details

The initial action is the image of a uniform random variable by this function.

def

Bandits.ArrayModel.initAlgFunction.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R) :
  ↑unitInterval → 𝓐
Bandits.ArrayModel.initAlgFunction.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  ↑unitInterval → 𝓐

Code

noncomputable
def initAlgFunction (alg : Algorithm 𝓐 R) : I → 𝓐 :=
  (Measure.exists_measurable_map_eq alg.p0).choose

Type uses (1)

Algorithm

Body uses (1)

instIsProbabilityMeasureP0

Used by (12)

Actions: Source · Open Issue

`initAlgFunction_map`🔗

LemmaBandits.ArrayModel.initAlgFunction_map

Details

No docstring.

theorem

Bandits.ArrayModel.initAlgFunction_map.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R) :
  MeasureTheory.Measure.map (initAlgFunction alg) MeasureTheory.volume =
    Learning.Algorithm.p0 alg
Bandits.ArrayModel.initAlgFunction_map.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  MeasureTheory.Measure.map
      (initAlgFunction alg)
      MeasureTheory.volume =
    Learning.Algorithm.p0 alg

Code

lemma initAlgFunction_map (alg : Algorithm 𝓐 R) : volume.map (initAlgFunction alg) = alg.p0

Type uses (2)

Body uses (1)

instIsProbabilityMeasureP0

Used by (1)

hasLaw_action_zero

Actions: Source · Open Issue

Proof

(Measure.exists_measurable_map_eq alg.p0).choose_spec.2

`measurable_initAlgFunction`🔗

LemmaBandits.ArrayModel.measurable_initAlgFunction

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_initAlgFunction.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R) :
  Measurable (initAlgFunction alg)
Bandits.ArrayModel.measurable_initAlgFunction.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  Measurable (initAlgFunction alg)

Code

lemma measurable_initAlgFunction (alg : Algorithm 𝓐 R) :
    Measurable (initAlgFunction alg)

Type uses (2)

Body uses (1)

instIsProbabilityMeasureP0

Used by (3)

Actions: Source · Open Issue

Proof

(Measure.exists_measurable_map_eq alg.p0).choose_spec.1

`algFunction`🔗

DefinitionBandits.ArrayModel.algFunction

Details

The next action is the image of the history and a uniform random variable by this function.

def

Bandits.ArrayModel.algFunction.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  (↥(Finset.Iic n) → 𝓐 × R) → ↑unitInterval → 𝓐
Bandits.ArrayModel.algFunction.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  (↥(Finset.Iic n) → 𝓐 × R) →
    ↑unitInterval → 𝓐

Code

noncomputable
def algFunction (alg : Algorithm 𝓐 R) (n : ℕ) :
    (Iic n → 𝓐 × R) → I → 𝓐 :=
  (Kernel.exists_measurable_map_eq_unitInterval (alg.policy n)).choose

Type uses (1)

Algorithm

Body uses (1)

instIsMarkovKernelForallSubtypeNatMemFinsetIicProdPolicy

Used by (17)

Actions: Source · Open Issue

`algFunction_map`🔗

LemmaBandits.ArrayModel.algFunction_map

Details

No docstring.

theorem

Bandits.ArrayModel.algFunction_map.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) (h : ↥(Finset.Iic n) → 𝓐 × R) :
  MeasureTheory.Measure.map (algFunction alg n h) MeasureTheory.volume =
    (Learning.Algorithm.policy alg n) h
Bandits.ArrayModel.algFunction_map.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ)
  (h : ↥(Finset.Iic n) → 𝓐 × R) :
  MeasureTheory.Measure.map
      (algFunction alg n h)
      MeasureTheory.volume =
    (Learning.Algorithm.policy alg n) h

Code

lemma algFunction_map (alg : Algorithm 𝓐 R) (n : ℕ) (h : Iic n → 𝓐 × R) :
      volume.map (algFunction alg n h) = alg.policy n h

Type uses (2)

Body uses (1)

instIsMarkovKernelForallSubtypeNatMemFinsetIicProdPolicy

Used by (1)

hasCondDistrib_action'

Actions: Source · Open Issue

Proof

(Kernel.exists_measurable_map_eq_unitInterval (alg.policy n)).choose_spec.2 h

`measurable_algFunction`🔗

LemmaBandits.ArrayModel.measurable_algFunction

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_algFunction.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) : Measurable (Function.uncurry (algFunction alg n))
Bandits.ArrayModel.measurable_algFunction.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable
    (Function.uncurry (algFunction alg n))

Code

lemma measurable_algFunction (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable (Function.uncurry (algFunction alg n))

Type uses (2)

Body uses (1)

instIsMarkovKernelForallSubtypeNatMemFinsetIicProdPolicy

Used by (4)

Actions: Source · Open Issue

Proof

(Kernel.exists_measurable_map_eq_unitInterval (alg.policy n)).choose_spec.1

`hist`🔗

DefinitionBandits.ArrayModel.hist

Details

History of actions and rewards up to time n in the array model.

def

Bandits.ArrayModel.hist.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) : ↥(Finset.Iic n) → 𝓐 × R
Bandits.ArrayModel.hist.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  ↥(Finset.Iic n) → 𝓐 × R

Code

noncomputable
def hist [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R) : (n : ℕ) → Iic n → 𝓐 × R
| 0 => fun _ ↦ (initAlgFunction alg (ω.1 0), ω.2 0 (initAlgFunction alg (ω.1 0)))
| n + 1 =>
  let hn : Iic n → 𝓐 × R := hist alg ω n
  let a : 𝓐 := algFunction alg n hn (ω.1 (n + 1))
  fun i ↦ if hin : i ≤ n then hn ⟨i, by simp [hin]⟩ else (a, ω.2 (pullCount' n hn a) a)

Type uses (2)

Body uses (3)

Used by (30)

Actions: Source · Open Issue

`hist_zero`🔗

LemmaBandits.ArrayModel.hist_zero

Details

No docstring.

theorem

Bandits.ArrayModel.hist_zero.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) :
  hist alg ω 0 = fun x =>
    (initAlgFunction alg (Prod.fst ω 0),
      Prod.snd ω 0 (initAlgFunction alg (Prod.fst ω 0)))
Bandits.ArrayModel.hist_zero.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) :
  hist alg ω 0 = fun x =>
    (initAlgFunction alg (Prod.fst ω 0),
      Prod.snd ω 0
        (initAlgFunction alg
          (Prod.fst ω 0)))

Code

lemma hist_zero [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R) :
    hist alg ω 0 = fun _ ↦ (initAlgFunction alg (ω.1 0), ω.2 0 (initAlgFunction alg (ω.1 0)))

Type uses (4)

Actions: Source · Open Issue

Proof

rfl

`hist_add_one`🔗

LemmaBandits.ArrayModel.hist_add_one

Details

No docstring.

theorem

Bandits.ArrayModel.hist_add_one.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  have a := algFunction alg n (hist alg ω n) (Prod.fst ω (n + 1));
  hist alg ω (n + 1) = fun i =>
    if hin : ↑i ≤ n then hist alg ω n ⟨↑i, ⋯⟩
    else (a, Prod.snd ω (Learning.pullCount' n (hist alg ω n) a) a)
Bandits.ArrayModel.hist_add_one.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  have a :=
    algFunction alg n (hist alg ω n)
      (Prod.fst ω (n + 1));
  hist alg ω (n + 1) = fun i =>
    if hin : ↑i ≤ n then
      hist alg ω n ⟨↑i, ⋯⟩
    else
      (a,
        Prod.snd ω
          (Learning.pullCount' n
            (hist alg ω n) a)
          a)

Code

lemma hist_add_one [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R) (n : ℕ) :
    let a : 𝓐

Type uses (5)

Used by (5)

Actions: Source · Open Issue

Proof

algFunction alg n (hist alg ω n) (ω.1 (n + 1))
    hist alg ω (n + 1) =
      fun (i : Iic (n + 1)) ↦ if hin : i ≤ n then hist alg ω n ⟨i, by simp [hin]⟩
        else (a, ω.2 (pullCount' n (hist alg ω n) a) a) := rfl

`hist_eq`🔗

LemmaBandits.ArrayModel.hist_eq

Details

No docstring.

theorem

Bandits.ArrayModel.hist_eq.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  hist alg ω n = fun i => hist alg ω ↑i ⟨↑i, ⋯⟩
Bandits.ArrayModel.hist_eq.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  hist alg ω n = fun i =>
    hist alg ω ↑i ⟨↑i, ⋯⟩

Code

lemma hist_eq [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R) (n : ℕ) :
    hist alg ω n = fun i : Iic n ↦ hist alg ω i ⟨i.1, by simp⟩

Type uses (3)

Body uses (4)

Used by (7)

Actions: Source · Open Issue

Proof

by
  induction n with
  | zero =>
    ext i : 1
    simp only [hist]
    rw [Unique.eq_default i]
    simp [coe_default_Iic_zero]
  | succ n hn =>
    ext i : 1
    by_cases hin : i ≤ n
    · rw [hist_add_one]
      simp only [hin, ↓reduceDIte]
      rw [funext_iff] at hn
      simp_rw [hn]
    · grind

`hist_add_one_eq_IicSuccProd'`🔗

LemmaBandits.ArrayModel.hist_add_one_eq_IicSuccProd'

Details

No docstring.

theorem

Bandits.ArrayModel.hist_add_one_eq_IicSuccProd'.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R) (ω : probSpace 𝓐 R)
  (n : ℕ) :
  have a := algFunction alg n (hist alg ω n) (Prod.fst ω (n + 1));
  hist alg ω (n + 1) =
    (MeasurableEquiv.symm
        (MeasurableEquiv.IicSuccProd (fun x => 𝓐 × R) n))
      (hist alg ω n, a,
        Prod.snd ω (Learning.pullCount' n (hist alg ω n) a) a)
Bandits.ArrayModel.hist_add_one_eq_IicSuccProd'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  have a :=
    algFunction alg n (hist alg ω n)
      (Prod.fst ω (n + 1));
  hist alg ω (n + 1) =
    (MeasurableEquiv.symm
        (MeasurableEquiv.IicSuccProd
          (fun x => 𝓐 × R) n))
      (hist alg ω n, a,
        Prod.snd ω
          (Learning.pullCount' n
            (hist alg ω n) a)
          a)

Code

lemma hist_add_one_eq_IicSuccProd' [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R)
    (n : ℕ) :
    let a : 𝓐

Type uses (6)

Body uses (1)

hist_add_one

Used by (1)

hist_add_one_eq_IicSuccProd

Actions: Source · Open Issue

Proof

algFunction alg n (hist alg ω n) (ω.1 (n + 1))
    hist alg ω (n + 1) =
      (MeasurableEquiv.IicSuccProd (fun _ ↦ 𝓐 × R) n).symm
        (hist alg ω n, (a, ω.2 (pullCount' n (hist alg ω n) a) a)) := by
  intro a
  rw [hist_add_one]
  ext i : 1
  simp only [Kernel.symm_IicSuccProd, MeasurableEquiv.prodCongr, MeasurableEquiv.refl_toEquiv,
    MeasurableEquiv.piSingleton, eq_rec_constant, MeasurableEquiv.IicProdIoc,
    MeasurableEquiv.trans_apply, MeasurableEquiv.coe_mk, Equiv.prodCongr_apply, Equiv.coe_refl,
    Equiv.coe_fn_mk, Prod.map_apply, id_eq]
  rfl

`action`🔗

DefinitionBandits.ArrayModel.action

Details

Action taken at time n in the array model.

def

Bandits.ArrayModel.action.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) (ω : probSpace 𝓐 R) : 𝓐
Bandits.ArrayModel.action.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ)
  (ω : probSpace 𝓐 R) : 𝓐

Code

noncomputable
def action [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) (ω : probSpace 𝓐 R) : 𝓐 :=
  (hist alg ω n ⟨n, by simp⟩).1

Type uses (2)

Body uses (1)

hist

Used by (43)

Actions: Source · Open Issue

`action_zero`🔗

LemmaBandits.ArrayModel.action_zero

Details

No docstring.

theorem

Bandits.ArrayModel.action_zero.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  action alg 0 = fun ω => initAlgFunction alg (Prod.fst ω 0)
Bandits.ArrayModel.action_zero.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  action alg 0 = fun ω =>
    initAlgFunction alg (Prod.fst ω 0)

Code

lemma action_zero [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) :
    action alg 0 = fun ω ↦ initAlgFunction alg (ω.1 0)

Type uses (4)

Used by (3)

Actions: Source · Open Issue

Proof

by
  ext
  simp [action, hist_zero]

`action_add_one_eq`🔗

LemmaBandits.ArrayModel.action_add_one_eq

Details

No docstring.

theorem

Bandits.ArrayModel.action_add_one_eq.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  action alg (n + 1) = fun ω =>
    algFunction alg n (hist alg ω n) (Prod.fst ω (n + 1))
Bandits.ArrayModel.action_add_one_eq.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  action alg (n + 1) = fun ω =>
    algFunction alg n (hist alg ω n)
      (Prod.fst ω (n + 1))

Code

lemma action_add_one_eq [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    action alg (n + 1) = fun ω ↦ algFunction alg n (hist alg ω n) (ω.1 (n + 1))

Type uses (5)

Body uses (2)

Used by (7)

Actions: Source · Open Issue

Proof

by
  ext ω
  rw [action, hist_add_one]
  simp only [add_le_iff_nonpos_right, nonpos_iff_eq_zero, one_ne_zero, ↓reduceDIte]

`reward`🔗

DefinitionBandits.ArrayModel.reward

Details

Reward received at time n in the array model.

def

Bandits.ArrayModel.reward.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) (ω : probSpace 𝓐 R) : R
Bandits.ArrayModel.reward.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ)
  (ω : probSpace 𝓐 R) : R

Code

noncomputable
def reward [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) (ω : probSpace 𝓐 R) : R :=
  (hist alg ω n ⟨n, by simp⟩).2

Type uses (2)

Body uses (1)

hist

Used by (24)

Actions: Source · Open Issue

`reward_zero`🔗

LemmaBandits.ArrayModel.reward_zero

Details

No docstring.

theorem

Bandits.ArrayModel.reward_zero.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  reward alg 0 = fun ω => Prod.snd ω 0 (action alg 0 ω)
Bandits.ArrayModel.reward_zero.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) :
  reward alg 0 = fun ω =>
    Prod.snd ω 0 (action alg 0 ω)

Code

lemma reward_zero [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) :
    reward alg 0 = fun ω ↦ ω.2 0 (action alg 0 ω)

Type uses (4)

Body uses (2)

Used by (2)

Actions: Source · Open Issue

Proof

by
  ext
  simp [reward, hist_zero, action_zero]

`reward_add_one`🔗

LemmaBandits.ArrayModel.reward_add_one

Details

No docstring.

theorem

Bandits.ArrayModel.reward_add_one.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  reward alg (n + 1) = fun ω =>
    Prod.snd ω
      (Learning.pullCount' n (hist alg ω n) (action alg (n + 1) ω))
      (action alg (n + 1) ω)
Bandits.ArrayModel.reward_add_one.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  reward alg (n + 1) = fun ω =>
    Prod.snd ω
      (Learning.pullCount' n
        (hist alg ω n)
        (action alg (n + 1) ω))
      (action alg (n + 1) ω)

Code

lemma reward_add_one [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    reward alg (n + 1) =
      fun ω ↦ ω.2 (pullCount' n (hist alg ω n) (action alg (n + 1) ω)) (action alg (n + 1) ω)

Type uses (6)

Body uses (3)

Used by (2)

Actions: Source · Open Issue

Proof

by
  ext ω
  simp [reward, hist_add_one, action_add_one_eq]

`reward_eq`🔗

LemmaBandits.ArrayModel.reward_eq

Details

No docstring.

theorem

Bandits.ArrayModel.reward_eq.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) :
  reward alg n = fun ω =>
    Prod.snd ω (Learning.pullCount (action alg) (action alg n ω) n ω)
      (action alg n ω)
Bandits.ArrayModel.reward_eq.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  reward alg n = fun ω =>
    Prod.snd ω
      (Learning.pullCount (action alg)
        (action alg n ω) n ω)
      (action alg n ω)

Code

lemma reward_eq [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    reward alg n = fun ω ↦ ω.2 (pullCount (action alg) (action alg n ω) n ω) (action alg n ω)

Type uses (5)

Body uses (11)

Used by (3)

Actions: Source · Open Issue

Proof

by
  cases n with
  | zero => ext; simp [reward_zero, action_zero]
  | succ n =>
    ext ω
    rw [reward, hist_add_one]
    simp only [add_le_iff_nonpos_right, nonpos_iff_eq_zero, one_ne_zero, ↓reduceDIte]
    rw [action_add_one_eq, pullCount_eq_pullCount' (R' := reward alg) (by simp)]
    simp only [Nat.add_one_sub_one]
    rw [hist_eq]
    rfl

`sumRewards_eq`🔗

LemmaBandits.ArrayModel.sumRewards_eq

Details

No docstring.

theorem

Bandits.ArrayModel.sumRewards_eq.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 ℝ) (a : 𝓐) (n : ℕ)
  (ω : probSpace 𝓐 ℝ) :
  Learning.sumRewards (action alg) (reward alg) a n ω =
    ∑ i ∈ Finset.range (Learning.pullCount (action alg) a n ω),
      Prod.snd ω i a
Bandits.ArrayModel.sumRewards_eq.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 ℝ) (a : 𝓐)
  (n : ℕ) (ω : probSpace 𝓐 ℝ) :
  Learning.sumRewards (action alg)
      (reward alg) a n ω =
    ∑
      i ∈
        Finset.range
          (Learning.pullCount (action alg)
            a n ω),
      Prod.snd ω i a

Code

lemma sumRewards_eq [DecidableEq 𝓐] (alg : Algorithm 𝓐 ℝ) (a : 𝓐) (n : ℕ) (ω : probSpace 𝓐 ℝ) :
    sumRewards (action alg) (reward alg) a n ω =
      ∑ i ∈ range (pullCount (action alg) a n ω), ω.2 i a

Type uses (6)

Body uses (6)

Used by (3)

Actions: Source · Open Issue

Proof

by
  induction n with
  | zero => simp
  | succ n ih =>
    by_cases ha : action alg n ω = a
    · simp [ha, sumRewards_add_one, pullCount_add_one, sum_range_succ, ih, reward_eq]
    · simp [ha, sumRewards_add_one, pullCount_eq_pullCount_of_action_ne, ih]

`measurable_action_add_one'`🔗

LemmaBandits.ArrayModel.measurable_action_add_one'

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_action_add_one'.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  {alg : Learning.Algorithm 𝓐 R} (n : ℕ)
  (h : Measurable fun x => hist alg x n) :
  Measurable fun x =>
    algFunction alg n (hist alg x n) (Prod.fst x (n + 1))
Bandits.ArrayModel.measurable_action_add_one'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  {alg : Learning.Algorithm 𝓐 R} (n : ℕ)
  (h : Measurable fun x => hist alg x n) :
  Measurable fun x =>
    algFunction alg n (hist alg x n)
      (Prod.fst x (n + 1))

Code

lemma measurable_action_add_one' [DecidableEq 𝓐] {alg : Algorithm 𝓐 R}
    (n : ℕ) (h : Measurable (hist alg · n)) :
    Measurable (fun x ↦ algFunction alg n (hist alg x n) (x.1 (n + 1)))

Type uses (5)

Body uses (1)

measurable_algFunction

Used by (1)

measurable_pullCount'_action_add_one

Actions: Source · Open Issue

Proof

by fun_prop

`measurable_pullCount'_action_add_one`🔗

LemmaBandits.ArrayModel.measurable_pullCount'_action_add_one

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_pullCount'_action_add_one.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] {alg : Learning.Algorithm 𝓐 R} (n : ℕ)
  (h_hist : Measurable fun x => hist alg x n) :
  Measurable fun x =>
    Learning.pullCount' n (hist alg x n)
      (algFunction alg n (hist alg x n) (Prod.fst x (n + 1)))
Bandits.ArrayModel.measurable_pullCount'_action_add_one.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  {alg : Learning.Algorithm 𝓐 R} (n : ℕ)
  (h_hist :
    Measurable fun x => hist alg x n) :
  Measurable fun x =>
    Learning.pullCount' n (hist alg x n)
      (algFunction alg n (hist alg x n)
        (Prod.fst x (n + 1)))

Code

lemma measurable_pullCount'_action_add_one [DecidableEq 𝓐] {alg : Algorithm 𝓐 R}
    (n : ℕ) (h_hist : Measurable (hist alg · n)) :
    Measurable (fun x ↦
      pullCount' n (hist alg x n) (algFunction alg n (hist alg x n) (x.1 (n + 1))))

Type uses (6)

Body uses (3)

Used by (1)

measurable_hist

Actions: Source · Open Issue

Proof

by
  have h_alg_meas : Measurable (fun x ↦ algFunction alg n (hist alg x n) (x.1 (n + 1))) :=
    measurable_action_add_one' n h_hist
  exact (measurable_uncurry_pullCount' (𝓐 := 𝓐) n).comp (h_hist.prodMk h_alg_meas)

`measurable_hist`🔗

LemmaBandits.ArrayModel.measurable_hist

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_hist.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω => hist alg ω n
Bandits.ArrayModel.measurable_hist.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω => hist alg ω n

Code

lemma measurable_hist [DecidableEq 𝓐] [Countable 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable (fun ω ↦ hist alg ω n)

Type uses (4)

Body uses (7)

Used by (8)

Actions: Source · Open Issue

Proof

by
  induction n with
  | zero =>
    simp_rw [hist_zero, measurable_pi_iff]
    refine fun _ ↦ Measurable.prodMk (by fun_prop) ?_
    change Measurable ((fun x : 𝓐 × ((ℕ → I) × (ℕ → 𝓐 → R)) ↦ x.2.2 0 x.1) ∘
        (fun x : (ℕ → I) × (ℕ → 𝓐 → R) ↦ (initAlgFunction alg (x.1 0), x)))
    have : Measurable (fun x : 𝓐 × ((ℕ → I) × (ℕ → 𝓐 → R)) ↦ x.2.2 0 x.1) :=
      measurable_from_prod_countable_right fun p ↦ by simp only; fun_prop
    exact Measurable.comp (by fun_prop) (Measurable.prodMk (by fun_prop) (by fun_prop))
  | succ n hn =>
    refine measurable_pi_iff.mpr fun i ↦ ?_
    by_cases hin : i ≤ n
    · simp only [hist, hin, ↓reduceDIte]
      rw [measurable_pi_iff] at hn
      exact hn ⟨i.1, by simp [hin]⟩
    · simp only [hist, hin, ↓reduceDIte]
      refine Measurable.prodMk (by fun_prop) ?_
      change Measurable ((fun (x : (ℕ → 𝓐 → R) × ℕ × 𝓐) ↦ x.1 x.2.1 x.2.2) ∘
        (fun x ↦ (x.2, pullCount' n (hist alg x n) (algFunction alg n (hist alg x n) (x.1 (n + 1))),
          (algFunction alg n (hist alg x n) (x.1 (n + 1))))))
      have h1 : Measurable (fun (x : (ℕ → 𝓐 → R) × ℕ × 𝓐) ↦ x.1 x.2.1 x.2.2) :=
        measurable_from_prod_countable_left fun p : ℕ × 𝓐 ↦ (by simp only; fun_prop)
      refine Measurable.comp (by fun_prop) (Measurable.prodMk (by fun_prop) ?_)
      refine Measurable.prodMk ?_ (by fun_prop)
      exact measurable_pullCount'_action_add_one n hn

`measurable_action`🔗

LemmaBandits.ArrayModel.measurable_action

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_action.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) : Measurable (action alg n)
Bandits.ArrayModel.measurable_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable (action alg n)

Code

lemma measurable_action [DecidableEq 𝓐] [Countable 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable (action alg n)

Type uses (4)

Body uses (2)

Used by (13)

Actions: Source · Open Issue

Proof

by unfold action; fun_prop

`measurable_reward`🔗

LemmaBandits.ArrayModel.measurable_reward

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_reward.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) : Measurable (reward alg n)
Bandits.ArrayModel.measurable_reward.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable (reward alg n)

Code

lemma measurable_reward [DecidableEq 𝓐] [Countable 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable (reward alg n)

Type uses (4)

Body uses (2)

Used by (8)

Actions: Source · Open Issue

Proof

by unfold reward; fun_prop

`hist_add_one_eq_IicSuccProd`🔗

LemmaBandits.ArrayModel.hist_add_one_eq_IicSuccProd

Details

No docstring.

theorem

Bandits.ArrayModel.hist_add_one_eq_IicSuccProd.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ω : probSpace 𝓐 R) (n : ℕ) :
  hist alg ω (n + 1) =
    (MeasurableEquiv.symm
        (MeasurableEquiv.IicSuccProd (fun x => 𝓐 × R) n))
      (hist alg ω n, action alg (n + 1) ω, reward alg (n + 1) ω)
Bandits.ArrayModel.hist_add_one_eq_IicSuccProd.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ω : probSpace 𝓐 R) (n : ℕ) :
  hist alg ω (n + 1) =
    (MeasurableEquiv.symm
        (MeasurableEquiv.IicSuccProd
          (fun x => 𝓐 × R) n))
      (hist alg ω n, action alg (n + 1) ω,
        reward alg (n + 1) ω)

Code

lemma hist_add_one_eq_IicSuccProd [DecidableEq 𝓐] (alg : Algorithm 𝓐 R) (ω : probSpace 𝓐 R)
    (n : ℕ) :
    hist alg ω (n + 1) =
      (MeasurableEquiv.IicSuccProd (fun _ ↦ 𝓐 × R) n).symm
        (hist alg ω n, (action alg (n + 1) ω, reward alg (n + 1) ω))

Type uses (6)

Body uses (5)

Used by (1)

hist_congr

Actions: Source · Open Issue

Proof

by
  rw [hist_add_one_eq_IicSuccProd', reward_add_one, action_add_one_eq]

`measurable_pullCount_action_add_one`🔗

LemmaBandits.ArrayModel.measurable_pullCount_action_add_one

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_pullCount_action_add_one.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Countable 𝓐] (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω =>
    Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω
Bandits.ArrayModel.measurable_pullCount_action_add_one.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω =>
    Learning.pullCount (action alg)
      (action alg (n + 1) ω) (n + 1) ω

Code

lemma measurable_pullCount_action_add_one [DecidableEq 𝓐] [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (n : ℕ) :
    Measurable (fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)

Type uses (5)

Body uses (3)

Used by (6)

Actions: Source · Open Issue

Proof

by
  change Measurable ((fun p : (probSpace 𝓐 R) × 𝓐 ↦ pullCount (action alg) p.2 (n + 1) p.1) ∘
    (fun ω : probSpace 𝓐 R ↦ (ω, action alg (n + 1) ω)))
  exact (measurable_uncurry_pullCount (by fun_prop) _).comp (by fun_prop)

`hist_congr`🔗

LemmaBandits.ArrayModel.hist_congr

Details

No docstring.

theorem

Bandits.ArrayModel.hist_congr.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R)
  (n : ℕ) {ω ω' : probSpace 𝓐 R}
  (hω1 : ∀ i ≤ n, Prod.fst ω i = Prod.fst ω' i)
  (hω2 :
    ∀ (i : ℕ) (a : 𝓐),
      i < Learning.pullCount (action alg) a (n + 1) ω →
        Prod.snd ω i a = Prod.snd ω' i a) :
  hist alg ω n = hist alg ω' n
Bandits.ArrayModel.hist_congr.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ)
  {ω ω' : probSpace 𝓐 R}
  (hω1 :
    ∀ i ≤ n, Prod.fst ω i = Prod.fst ω' i)
  (hω2 :
    ∀ (i : ℕ) (a : 𝓐),
      i <
          Learning.pullCount (action alg)
            a (n + 1) ω →
        Prod.snd ω i a =
          Prod.snd ω' i a) :
  hist alg ω n = hist alg ω' n

Code

lemma hist_congr (alg : Algorithm 𝓐 R) (n : ℕ) {ω ω' : probSpace 𝓐 R}
    (hω1 : ∀ i ≤ n, ω.1 i = ω'.1 i)
    (hω2 : ∀ i a, i < pullCount (action alg) a (n + 1) ω → ω.2 i a = ω'.2 i a) :
    hist alg ω n = hist alg ω' n

Type uses (5)

Body uses (13)

Used by (3)

Actions: Source · Open Issue

Proof

by
  induction n with
  | zero =>
    simp only [zero_add, pullCount_one] at hω2
    simp_rw [hist_zero]
    ext i : 1
    simp only [le_refl, hω1, Prod.mk.injEq, true_and]
    refine hω2 0 _ ?_
    simp [action, hω1]
  | succ n hn =>
    simp_rw [hist_add_one_eq_IicSuccProd]
    specialize hn fun i hin ↦ hω1 i (by grind)
    have h_hist : hist alg ω n = hist alg ω' n := by
      refine hn fun i a hi ↦ hω2 i a (hi.trans_le ?_)
      exact pullCount_mono _ (by lia) _
    have h_action : action alg (n + 1) ω = action alg (n + 1) ω' := by
      simp_rw [action_add_one_eq]
      rw [h_hist, hω1 _ le_rfl]
    congr 3
    simp only [reward_add_one, h_hist, h_action]
    refine hω2 _ _ ?_
    rw [pullCount_add_one, h_action]
    simp only [↓reduceIte]
    rw [pullCount_eq_pullCount' (R' := reward alg) (by simp)]
    simp only [Nat.add_one_sub_one]
    rw [← h_hist, hist_eq]
    change pullCount' n  (fun i ↦ (action alg i ω, reward alg i ω)) (action alg (n + 1) ω') <
      pullCount' n (fun i ↦ (action alg i ω, reward alg i ω)) (action alg (n + 1) ω') + 1
    grind

`stepsUntil_congr_aux`🔗

LemmaBandits.ArrayModel.stepsUntil_congr_aux

Details

No docstring.

theorem

Bandits.ArrayModel.stepsUntil_congr_aux.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ)
  {ω ω' : probSpace 𝓐 R} (hω1 : ∀ (i : ℕ), Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne : ∀ (i : ℕ) (b : 𝓐), b ≠ a → Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → Prod.snd ω i a = Prod.snd ω' i a)
  (h_eq :
    action alg (n + 1) ω = a ∧
      Learning.pullCount (action alg) a (n + 1) ω = m) :
  action alg (n + 1) ω' = a ∧
    Learning.pullCount (action alg) a (n + 1) ω' = m
Bandits.ArrayModel.stepsUntil_congr_aux.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (m n : ℕ) {ω ω' : probSpace 𝓐 R}
  (hω1 :
    ∀ (i : ℕ),
      Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne :
    ∀ (i : ℕ) (b : 𝓐),
      b ≠ a →
        Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq :
    ∀ (i : ℕ),
      i + 1 ≤ m →
        Prod.snd ω i a = Prod.snd ω' i a)
  (h_eq :
    action alg (n + 1) ω = a ∧
      Learning.pullCount (action alg) a
          (n + 1) ω =
        m) :
  action alg (n + 1) ω' = a ∧
    Learning.pullCount (action alg) a
        (n + 1) ω' =
      m

Code

lemma stepsUntil_congr_aux (alg : Algorithm 𝓐 R)
    (a : 𝓐) (m n : ℕ) {ω ω' : probSpace 𝓐 R}
    (hω1 : ∀ i, ω.1 i = ω'.1 i) (hω2_ne : ∀ i b, b ≠ a → ω.2 i b = ω'.2 i b)
    (hω2_eq : ∀ i, i + 1 ≤ m → ω.2 i a = ω'.2 i a)
    (h_eq : action alg (n + 1) ω = a ∧ pullCount (action alg) a (n + 1) ω = m) :
    action alg (n + 1) ω' = a ∧ pullCount (action alg) a (n + 1) ω' = m

Type uses (4)

Body uses (6)

Used by (1)

stepsUntil_congr

Actions: Source · Open Issue

Proof

by
  obtain ⟨h_action, h_pc⟩ := h_eq
  have h_hist := hist_congr alg n (ω := ω) (ω' := ω') (by grind) fun i b hi ↦ ?_
  swap
  · rcases eq_or_ne b a with (rfl | hba)
    · refine hω2_eq i ?_
      rw [h_pc] at hi
      grind
    · grind
  constructor
  · rw [← h_action, action_add_one_eq]
    simp [h_hist, hω1]
  · simp_rw [← h_pc, pullCount_eq_sum]
    refine Finset.sum_congr rfl fun i hi ↦ ?_
    congr 2
    rw [hist_eq _ _ n, hist_eq _ _ n, funext_iff] at h_hist
    unfold action
    specialize h_hist ⟨i, by grind⟩
    simp only at h_hist
    rw [h_hist]

`stepsUntil_congr`🔗

LemmaBandits.ArrayModel.stepsUntil_congr

Details

No docstring.

theorem

Bandits.ArrayModel.stepsUntil_congr.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ)
  {ω ω' : probSpace 𝓐 R} (hω1 : ∀ (i : ℕ), Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne : ∀ (i : ℕ) (b : 𝓐), b ≠ a → Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → Prod.snd ω i a = Prod.snd ω' i a) :
  action alg (n + 1) ω = a ∧
      Learning.pullCount (action alg) a (n + 1) ω = m ↔
    action alg (n + 1) ω' = a ∧
      Learning.pullCount (action alg) a (n + 1) ω' = m
Bandits.ArrayModel.stepsUntil_congr.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (m n : ℕ) {ω ω' : probSpace 𝓐 R}
  (hω1 :
    ∀ (i : ℕ),
      Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne :
    ∀ (i : ℕ) (b : 𝓐),
      b ≠ a →
        Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq :
    ∀ (i : ℕ),
      i + 1 ≤ m →
        Prod.snd ω i a =
          Prod.snd ω' i a) :
  action alg (n + 1) ω = a ∧
      Learning.pullCount (action alg) a
          (n + 1) ω =
        m ↔
    action alg (n + 1) ω' = a ∧
      Learning.pullCount (action alg) a
          (n + 1) ω' =
        m

Code

lemma stepsUntil_congr (alg : Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ) {ω ω' : probSpace 𝓐 R}
    (hω1 : ∀ i, ω.1 i = ω'.1 i) (hω2_ne : ∀ i b, b ≠ a → ω.2 i b = ω'.2 i b)
    (hω2_eq : ∀ i, i + 1 ≤ m → ω.2 i a = ω'.2 i a) :
    (action alg (n + 1) ω = a ∧ pullCount (action alg) a (n + 1) ω = m) ↔
      (action alg (n + 1) ω' = a ∧ pullCount (action alg) a (n + 1) ω' = m)

Type uses (4)

Body uses (1)

stepsUntil_congr_aux

Used by (1)

stepsUntil_indicator_congr

Actions: Source · Open Issue

Proof

⟨stepsUntil_congr_aux alg a m n hω1 hω2_ne hω2_eq,
    stepsUntil_congr_aux alg a m n (by grind) (by grind) (by grind)⟩

`stepsUntil_indicator_congr`🔗

LemmaBandits.ArrayModel.stepsUntil_indicator_congr

Details

No docstring.

theorem

Bandits.ArrayModel.stepsUntil_indicator_congr.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ)
  {ω ω' : probSpace 𝓐 R} (hω1 : ∀ (i : ℕ), Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne : ∀ (i : ℕ) (b : 𝓐), b ≠ a → Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq : ∀ (i : ℕ), i + 1 ≤ m → Prod.snd ω i a = Prod.snd ω' i a) :
  Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg) a (n + 1) ω = m}
      (fun x => 1) ω =
    Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg) a (n + 1) ω = m}
      (fun x => 1) ω'
Bandits.ArrayModel.stepsUntil_indicator_congr.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (m n : ℕ) {ω ω' : probSpace 𝓐 R}
  (hω1 :
    ∀ (i : ℕ),
      Prod.fst ω i = Prod.fst ω' i)
  (hω2_ne :
    ∀ (i : ℕ) (b : 𝓐),
      b ≠ a →
        Prod.snd ω i b = Prod.snd ω' i b)
  (hω2_eq :
    ∀ (i : ℕ),
      i + 1 ≤ m →
        Prod.snd ω i a =
          Prod.snd ω' i a) :
  Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg)
              a (n + 1) ω =
            m}
      (fun x => 1) ω =
    Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg)
              a (n + 1) ω =
            m}
      (fun x => 1) ω'

Code

lemma stepsUntil_indicator_congr (alg : Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ) {ω ω' : probSpace 𝓐 R}
    (hω1 : ∀ i, ω.1 i = ω'.1 i) (hω2_ne : ∀ i b, b ≠ a → ω.2 i b = ω'.2 i b)
    (hω2_eq : ∀ i, i + 1 ≤ m → ω.2 i a = ω'.2 i a) :
    {ω | action alg (n + 1) ω = a ∧ pullCount (action alg) a (n + 1) ω = m}.indicator (fun _ ↦ 1)
        ω =
      {ω | action alg (n + 1) ω = a ∧ pullCount (action alg) a (n + 1) ω = m}.indicator
        (fun _ ↦ 1) ω'

Type uses (4)

Body uses (1)

stepsUntil_congr

Used by (1)

measurable_stepsUntil

Actions: Source · Open Issue

Proof

by
  simp only [Set.indicator_apply, Set.mem_setOf_eq]
  simp_rw [stepsUntil_congr alg a m n hω1 hω2_ne hω2_eq]

`measurable_hist_comap`🔗

LemmaBandits.ArrayModel.measurable_hist_comap

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_hist_comap.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun x => hist alg x n
Bandits.ArrayModel.measurable_hist_comap.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun x => hist alg x n

Code

lemma measurable_hist_comap [Countable 𝓐] (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable[MeasurableSpace.comap (fun ω ↦ (fun (i : Iic n) ↦ ω.1 i, ω.2)) inferInstance]
      (hist alg · n)

Type uses (3)

Body uses (6)

Used by (1)

indepFun_fst_add_one_hist

Actions: Source · Open Issue

Proof

by
  have h_eq : (hist alg · n) =
      ((hist alg · n) ∘ (fun p ↦ (fun i : ℕ ↦ p.1 ⟨min i n, by grind⟩, p.2))) ∘
        (fun ω ↦ (fun (i : Iic n) ↦ ω.1 i, ω.2)) := by
    ext ω : 1
    exact hist_congr alg n (by grind) (by simp)
  rw [h_eq]
  refine measurable_comp_comap _ (Measurable.comp (by fun_prop) ?_)
  refine Measurable.prodMk ?_ (by fun_prop)
  rw [measurable_pi_iff]
  intro i
  change Measurable ((fun p ↦ p ⟨min i n, by simp⟩) ∘ (fun x : (Iic n → I) × (ℕ → 𝓐 → R) ↦ x.1))
  exact Measurable.comp (by fun_prop) measurable_fst

`truePast`🔗

DefinitionBandits.ArrayModel.truePast

Details

All random variables in the space, except for the unseen rewards for action a after time n.

def

Bandits.ArrayModel.truePast.{u_1, u_2} {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) (ω : probSpace 𝓐 R) :
  probSpace 𝓐 R
Bandits.ArrayModel.truePast.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n : ℕ) (ω : probSpace 𝓐 R) :
  probSpace 𝓐 R

Code

noncomputable
def truePast (alg : Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) (ω : probSpace 𝓐 R) :
    probSpace 𝓐 R :=
  (ω.1, fun i b ↦ if b = a then if pullCount (action alg) a (n + 1) ω ≠ 0 then
      ω.2 (min i ((pullCount (action alg) a (n + 1) ω) - 1)) a else Nonempty.some inferInstance
    else ω.2 i b)

Type uses (2)

Body uses (2)

Used by (6)

Actions: Source · Open Issue

`truePast_eq_of_pullCount_eq`🔗

LemmaBandits.ArrayModel.truePast_eq_of_pullCount_eq

Details

No docstring.

theorem

Bandits.ArrayModel.truePast_eq_of_pullCount_eq.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (n m : ℕ) (ω : probSpace 𝓐 R)
  (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m) :
  truePast alg a n ω =
    (Prod.fst ω, fun i b =>
      if b = a then
        if m ≠ 0 then Prod.snd ω (min i (m - 1)) a else Nonempty.some ⋯
      else Prod.snd ω i b)
Bandits.ArrayModel.truePast_eq_of_pullCount_eq.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n m : ℕ) (ω : probSpace 𝓐 R)
  (h_pc :
    Learning.pullCount (action alg) a
        (n + 1) ω =
      m) :
  truePast alg a n ω =
    (Prod.fst ω, fun i b =>
      if b = a then
        if m ≠ 0 then
          Prod.snd ω (min i (m - 1)) a
        else Nonempty.some ⋯
      else Prod.snd ω i b)

Code

lemma truePast_eq_of_pullCount_eq (alg : Algorithm 𝓐 R)
    (a : 𝓐) (n m : ℕ) (ω : probSpace 𝓐 R)
    (h_pc : pullCount (action alg) a (n + 1) ω = m) :
    truePast alg a n ω = (ω.1, fun i b ↦ if b = a then if m ≠ 0 then
      ω.2 (min i (m - 1)) a else Nonempty.some inferInstance else ω.2 i b)

Type uses (5)

Actions: Source · Open Issue

Proof

by
  simp [truePast, h_pc]

`truePast_eq_of_pullCount_eq_of_ne_zero`🔗

LemmaBandits.ArrayModel.truePast_eq_of_pullCount_eq_of_ne_zero

Details

No docstring.

theorem

Bandits.ArrayModel.truePast_eq_of_pullCount_eq_of_ne_zero.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Nonempty R] (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n m : ℕ) (ω : probSpace 𝓐 R)
  (h_pc : Learning.pullCount (action alg) a (n + 1) ω = m)
  (hm : m ≠ 0) :
  truePast alg a n ω =
    (Prod.fst ω, fun i b =>
      if b = a then Prod.snd ω (min i (m - 1)) a else Prod.snd ω i b)
Bandits.ArrayModel.truePast_eq_of_pullCount_eq_of_ne_zero.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n m : ℕ) (ω : probSpace 𝓐 R)
  (h_pc :
    Learning.pullCount (action alg) a
        (n + 1) ω =
      m)
  (hm : m ≠ 0) :
  truePast alg a n ω =
    (Prod.fst ω, fun i b =>
      if b = a then
        Prod.snd ω (min i (m - 1)) a
      else Prod.snd ω i b)

Code

lemma truePast_eq_of_pullCount_eq_of_ne_zero (alg : Algorithm 𝓐 R)
    (a : 𝓐) (n m : ℕ) (ω : probSpace 𝓐 R)
    (h_pc : pullCount (action alg) a (n + 1) ω = m) (hm : m ≠ 0) :
    truePast alg a n ω = (ω.1, fun i b ↦ if b = a then
      ω.2 (min i (m - 1)) a else ω.2 i b)

Type uses (5)

Actions: Source · Open Issue

Proof

by
  simp [truePast, h_pc, hm]

`measurable_hist_truePast`🔗

LemmaBandits.ArrayModel.measurable_hist_truePast

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_hist_truePast.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Nonempty R]
  [Countable 𝓐] (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) :
  Measurable fun x => hist alg x n
Bandits.ArrayModel.measurable_hist_truePast.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n : ℕ) :
  Measurable fun x => hist alg x n

Code

lemma measurable_hist_truePast [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (a : 𝓐) (n : ℕ) :
    Measurable[MeasurableSpace.comap (truePast alg a n) inferInstance] (hist alg · n)

Type uses (5)

Body uses (4)

Used by (3)

Actions: Source · Open Issue

Proof

by
  have h_eq : (hist alg · n) = (hist alg · n) ∘ (truePast alg a n) := by
    ext ω : 1
    refine hist_congr alg n (fun _ _ ↦ rfl) fun i b hi ↦ ?_
    by_cases hb : b = a
    · subst hb
      simp only [truePast, ↓reduceIte]
      rw [min_eq_left, if_pos (by grind)]
      grind
    · simp [truePast, hb]
  rw [h_eq]
  refine Measurable.comp ?_ (Measurable.of_comap_le le_rfl)
  fun_prop

`measurable_action_add_one_truePast`🔗

LemmaBandits.ArrayModel.measurable_action_add_one_truePast

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_action_add_one_truePast.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) :
  Measurable (action alg (n + 1))
Bandits.ArrayModel.measurable_action_add_one_truePast.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n : ℕ) :
  Measurable (action alg (n + 1))

Code

lemma measurable_action_add_one_truePast [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (a : 𝓐) (n : ℕ) :
    Measurable[MeasurableSpace.comap (truePast alg a n) inferInstance]
      (action alg (n + 1))

Type uses (5)

Body uses (5)

Actions: Source · Open Issue

Proof

by
  rw [action_add_one_eq]
  change Measurable[MeasurableSpace.comap (truePast alg a n) inferInstance]
    ((fun p ↦ algFunction alg n p.1 p.2) ∘ (fun ω ↦ (hist alg ω n, ω.1 (n + 1))))
  refine (measurable_algFunction alg n).comp (Measurable.prodMk ?_ ?_)
  · exact measurable_hist_truePast alg a n
  · have : (fun ω ↦ ω.1 (n + 1)) =
      (fun (p : probSpace 𝓐 R) ↦ p.1 (n + 1)) ∘ (truePast alg a n) := rfl
    rw [this]
    exact Measurable.comp (by fun_prop) (Measurable.of_comap_le le_rfl)

`measurable_pullCount_add_one_truePast`🔗

LemmaBandits.ArrayModel.measurable_pullCount_add_one_truePast

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_pullCount_add_one_truePast.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) :
  Measurable (Learning.pullCount (action alg) a (n + 1))
Bandits.ArrayModel.measurable_pullCount_add_one_truePast.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (n : ℕ) :
  Measurable
    (Learning.pullCount (action alg) a
      (n + 1))

Code

lemma measurable_pullCount_add_one_truePast [Countable 𝓐] (alg : Algorithm 𝓐 R) (a : 𝓐) (n : ℕ) :
    Measurable[MeasurableSpace.comap (truePast alg a n) inferInstance]
      (pullCount (action alg) a (n + 1))

Type uses (6)

Body uses (5)

Actions: Source · Open Issue

Proof

by
  change Measurable[MeasurableSpace.comap (truePast alg a n) inferInstance]
    (fun ω ↦ pullCount (action alg) a (n + 1) ω)
  simp_rw [pullCount_eq_sum]
  refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
  refine (measurableSet_singleton _).preimage ?_
  have h_meas := measurable_hist_truePast alg a n
  simp_rw [hist_eq _ _ n, @measurable_pi_iff] at h_meas
  exact (h_meas ⟨i, by grind⟩).fst

`measurable_stepsUntil`🔗

LemmaBandits.ArrayModel.measurable_stepsUntil

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_stepsUntil.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Nonempty R]
  [Countable 𝓐] (alg : Learning.Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ) :
  Measurable
    (Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg) a (n + 1) ω = m}
      fun x => 1)
Bandits.ArrayModel.measurable_stepsUntil.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (a : 𝓐)
  (m n : ℕ) :
  Measurable
    (Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg)
              a (n + 1) ω =
            m}
      fun x => 1)

Code

lemma measurable_stepsUntil [Countable 𝓐] (alg : Algorithm 𝓐 R) (a : 𝓐) (m n : ℕ) :
    Measurable[MeasurableSpace.comap
        (fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
          else Nonempty.some inferInstance else ω.2 k b)) inferInstance]
      (({ω | action alg (n + 1) ω = a ∧
        pullCount (action alg) a (n + 1) ω = m}).indicator (fun _ ↦ 1))

Type uses (4)

Body uses (4)

Used by (2)

Actions: Source · Open Issue

Proof

by
  let f := ({ω | action alg (n + 1) ω = a ∧ pullCount (action alg) a (n + 1) ω = m}).indicator
    (fun _ ↦ 1)
  have h_eq : f = f ∘
      fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
        else Nonempty.some inferInstance else ω.2 k b) := by
    ext ω
    exact stepsUntil_indicator_congr alg a m n (by grind) (by grind) (by grind)
  change Measurable[MeasurableSpace.comap
    (fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
      else Nonempty.some inferInstance else ω.2 k b)) inferInstance] f
  rw [h_eq]
  refine Measurable.comp ?_ (Measurable.of_comap_le le_rfl)
  refine Measurable.indicator (by fun_prop) ?_
  exact MeasurableSet.inter ((measurableSet_singleton _).preimage (by fun_prop))
    ((measurableSet_singleton _).preimage (by fun_prop))

`measurable_pullCount_action_add_one_hist`🔗

LemmaBandits.ArrayModel.measurable_pullCount_action_add_one_hist

Details

No docstring.

theorem

Bandits.ArrayModel.measurable_pullCount_action_add_one_hist.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω =>
    Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω
Bandits.ArrayModel.measurable_pullCount_action_add_one_hist.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (n : ℕ) :
  Measurable fun ω =>
    Learning.pullCount (action alg)
      (action alg (n + 1) ω) (n + 1) ω

Code

lemma measurable_pullCount_action_add_one_hist (alg : Algorithm 𝓐 R) (n : ℕ) :
    Measurable[MeasurableSpace.comap (fun ω ↦ (action alg (n + 1) ω, hist alg ω n)) inferInstance]
      (fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)

Type uses (5)

Body uses (4)

Used by (1)

hasCondDistrib_reward'

Actions: Source · Open Issue

Proof

by
  simp_rw [pullCount_eq_sum]
  refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
  refine measurableSet_eq_fun ?_ (measurable_comp_comap _ measurable_fst)
  rw [measurable_iff_comap_le]
  simp_rw [hist_eq _ _ n]
  rw [← measurable_iff_comap_le]
  unfold action
  refine Measurable.fst (mγ := inferInstance) ?_
  have : (hist alg · i ⟨i, by grind⟩) =
      (fun ω : 𝓐 × (Iic n → 𝓐 × R) ↦ ω.2 ⟨i, by grind⟩) ∘
        (fun ω ↦ (action alg (n + 1) ω, fun i : Iic n ↦ hist alg ω i ⟨i, by grind⟩)) := rfl
  rw [this]
  exact measurable_comp_comap _ (Measurable.prodMk (by fun_prop) (by fun_prop))

`map_snd_apply_arrayMeasure`🔗

LemmaBandits.ArrayModel.map_snd_apply_arrayMeasure

Details

No docstring.

theorem

Bandits.ArrayModel.map_snd_apply_arrayMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  {ν : ProbabilityTheory.Kernel 𝓐 R}
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
  MeasureTheory.Measure.map (fun ω => Prod.snd ω n a) (arrayMeasure ν) =
    ν a
Bandits.ArrayModel.map_snd_apply_arrayMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  {ν : ProbabilityTheory.Kernel 𝓐 R}
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) (a : 𝓐) :
  MeasureTheory.Measure.map
      (fun ω => Prod.snd ω n a)
      (arrayMeasure ν) =
    ν a

Code

lemma map_snd_apply_arrayMeasure {ν : Kernel 𝓐 R} [IsMarkovKernel ν] (n : ℕ) (a : 𝓐) :
    (arrayMeasure ν).map (fun ω ↦ ω.2 n a) = ν a

Type uses (3)

Body uses (2)

Used by (3)

Actions: Source · Open Issue

Proof

by
  calc (arrayMeasure ν).map (fun ω ↦ ω.2 n a)
  _ = (arrayMeasure ν).snd.map (fun ω ↦ ω n a) := by
    rw [Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
    rfl
  _ = ν a := by
    rw [arrayMeasure, Measure.snd_prod, streamMeasure]
    have : (fun ω ↦ ω n a) = (fun h : 𝓐 → R ↦ h a) ∘ (fun ω : ℕ → 𝓐 → R ↦ ω n) := rfl
    rw [this, ← Measure.map_map (by fun_prop) (by fun_prop), Measure.infinitePi_map_eval,
      Measure.infinitePi_map_eval]

`indepFun_fst_snd`🔗

LemmaBandits.ArrayModel.indepFun_fst_snd

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_fst_snd.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.IndepFun Prod.fst Prod.snd (arrayMeasure ν)
Bandits.ArrayModel.indepFun_fst_snd.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.IndepFun Prod.fst
    Prod.snd (arrayMeasure ν)

Code

lemma indepFun_fst_snd (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    IndepFun Prod.fst Prod.snd (arrayMeasure ν)

Type uses (2)

Body uses (2)

Actions: Source · Open Issue

Proof

indepFun_prod measurable_id measurable_id

`indepFun_fst_zero_snd_zero_action`🔗

LemmaBandits.ArrayModel.indepFun_fst_zero_snd_zero_action

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_fst_zero_snd_zero_action.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) :
  ProbabilityTheory.IndepFun (fun ω => Prod.fst ω 0)
    (fun ω => Prod.snd ω 0 a) (arrayMeasure ν)
Bandits.ArrayModel.indepFun_fst_zero_snd_zero_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.fst ω 0)
    (fun ω => Prod.snd ω 0 a)
    (arrayMeasure ν)

Code

lemma indepFun_fst_zero_snd_zero_action (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (a : 𝓐) :
    IndepFun (fun ω ↦ ω.1 0) (fun ω ↦ ω.2 0 a) (arrayMeasure ν)

Type uses (3)

Body uses (2)

Used by (1)

hasCondDistrib_reward_zero

Actions: Source · Open Issue

Proof

indepFun_prod (X := fun ω : ℕ → I ↦ ω 0) (Y := fun ω : ℕ → 𝓐 → R ↦ ω 0 a)
    (by fun_prop) (by fun_prop)

`indepFun_fst_add_one_aux`🔗

LemmaBandits.ArrayModel.indepFun_fst_add_one_aux

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_fst_add_one_aux.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.IndepFun (fun ω => Prod.fst ω (n + 1))
    (fun ω => (fun i => Prod.fst ω ↑i, Prod.snd ω)) (arrayMeasure ν)
Bandits.ArrayModel.indepFun_fst_add_one_aux.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R}
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.fst ω (n + 1))
    (fun ω =>
      (fun i => Prod.fst ω ↑i,
        Prod.snd ω))
    (arrayMeasure ν)

Code

lemma indepFun_fst_add_one_aux (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    (fun ω ↦ ω.1 (n + 1)) ⟂ᵢ[arrayMeasure ν] (fun ω ↦ (fun (i : Iic n) ↦ ω.1 i, ω.2))

Type uses (3)

Body uses (2)

Used by (1)

indepFun_fst_add_one_hist

Actions: Source · Open Issue

Proof

by
  let μ₁ : Measure (ℕ → I) := Measure.infinitePi fun _ ↦ volume
  let μ₂ : Measure (ℕ → 𝓐 → R) := streamMeasure ν
  -- Coordinates of μ₁ are independent
  have h_indep : iIndepFun (fun i (ω : ℕ → I) ↦ ω i) μ₁ :=
    iIndepFun_infinitePi (fun _ ↦ measurable_id)
  have h_indep_n : IndepFun (fun ω ↦ ω (n + 1)) (fun ω ↦ fun i : Iic n ↦ ω i) μ₁ := by
    have h := h_indep.indepFun_finset₀ {n + 1} (Iic n) (by simp)
      (fun i ↦ (measurable_pi_apply i).aemeasurable)
    convert h.comp (measurable_pi_apply ⟨n + 1, by simp⟩) measurable_id using 1
    · rfl
    · rfl
  rw [indepFun_iff_measure_inter_preimage_eq_mul]
  intro s t hs ht
  let X : (ℕ → I) × (ℕ → 𝓐 → R) → I := fun ω ↦ ω.1 (n + 1)
  let Y : (ℕ → I) × (ℕ → 𝓐 → R) → (Iic n → I) × (ℕ → 𝓐 → R) := fun ω ↦ (fun i ↦ ω.1 i, ω.2)
  change (μ₁.prod μ₂) (X ⁻¹' s ∩ Y ⁻¹' t) = (μ₁.prod μ₂) (X ⁻¹' s) * (μ₁.prod μ₂) (Y ⁻¹' t)
  -- Rewrite using Fubini
  rw [Measure.prod_apply (hs.preimage (by fun_prop : Measurable X)),
    Measure.prod_apply (ht.preimage (by fun_prop : Measurable Y)),
    Measure.prod_apply ((hs.preimage (by fun_prop : Measurable X)).inter
      (ht.preimage (by fun_prop : Measurable Y)))]
  -- Compute fibers
  have hX_fst ω₁ : μ₂ (Prod.mk ω₁ ⁻¹' (X ⁻¹' s)) = s.indicator 1 (ω₁ (n + 1)) := by
    simp only [X, Set.preimage_preimage]
    by_cases h : ω₁ (n + 1) ∈ s <;> simp [h]
  have hY_fst ω₁ : μ₂ (Prod.mk ω₁ ⁻¹' (Y ⁻¹' t)) = μ₂ {y | ((fun i : Iic n ↦ ω₁ i), y) ∈ t} := rfl
  have hXY ω₁ : μ₂ (Prod.mk ω₁ ⁻¹' (X ⁻¹' s ∩ Y ⁻¹' t)) =
      s.indicator 1 (ω₁ (n + 1)) * μ₂ {y | ((fun i : Iic n ↦ ω₁ i), y) ∈ t} := by
    simp only [X, Y, Set.preimage_inter, Set.preimage_preimage]
    by_cases h : ω₁ (n + 1) ∈ s
    · simp [h]
      congr
    · simp [h]
  simp_rw [hY_fst, hX_fst, hXY]
  -- Factor the integral using independence
  let g : (Iic n → I) → ENNReal := fun x ↦ μ₂ {y | (x, y) ∈ t}
  have hg_meas : Measurable g := measurable_measure_prodMk_left ht
  have hf_meas : Measurable (fun ω₁ : ℕ → I ↦ s.indicator (1 : I → ENNReal) (ω₁ (n + 1))) :=
    (measurable_one.indicator hs).comp (measurable_pi_apply _)
  have hindep_fg : IndepFun (fun ω₁ ↦ s.indicator (1 : I → ENNReal) (ω₁ (n + 1)))
      (fun ω₁ ↦ g (fun i ↦ ω₁ i)) μ₁ :=
    h_indep_n.comp (measurable_one.indicator hs) hg_meas
  have h_eq (ω₁ : ℕ → I) : μ₂ {y | ((fun i : Iic n ↦ ω₁ i), y) ∈ t} = g (fun i ↦ ω₁ i) := rfl
  simp_rw [h_eq]
  exact lintegral_mul_eq_lintegral_mul_lintegral_of_indepFun hf_meas (by fun_prop) hindep_fg

`indepFun_fst_add_one_hist`🔗

LemmaBandits.ArrayModel.indepFun_fst_add_one_hist

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_fst_add_one_hist.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [StandardBorelSpace R] [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.IndepFun (fun ω => Prod.fst ω (n + 1))
    (fun x => hist alg x n) (arrayMeasure ν)
Bandits.ArrayModel.indepFun_fst_add_one_hist.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [StandardBorelSpace R] [Nonempty R]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.fst ω (n + 1))
    (fun x => hist alg x n)
    (arrayMeasure ν)

Code

lemma indepFun_fst_add_one_hist [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    IndepFun (fun ω ↦ ω.1 (n + 1)) (hist alg · n) (arrayMeasure ν)

Type uses (5)

Body uses (3)

Used by (1)

hasCondDistrib_action'

Actions: Source · Open Issue

Proof

(indepFun_fst_add_one_aux ν n).of_measurable_right (measurable_hist_comap alg n)

`indepFun_snd_apply_aux`🔗

LemmaBandits.ArrayModel.indepFun_snd_apply_aux

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_snd_apply_aux.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [DecidableEq 𝓐] [Nonempty R] (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ℕ) :
  ProbabilityTheory.IndepFun (fun ω => Prod.snd ω m a)
    (fun ω =>
      (Prod.fst ω, fun k b =>
        if b = a then
          if m ≠ 0 then Prod.snd ω (min k (m - 1)) b
          else Nonempty.some ⋯
        else Prod.snd ω k b))
    (arrayMeasure ν)
Bandits.ArrayModel.indepFun_snd_apply_aux.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [DecidableEq 𝓐]
  [Nonempty R]
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (m : ℕ) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.snd ω m a)
    (fun ω =>
      (Prod.fst ω, fun k b =>
        if b = a then
          if m ≠ 0 then
            Prod.snd ω (min k (m - 1)) b
          else Nonempty.some ⋯
        else Prod.snd ω k b))
    (arrayMeasure ν)

Code

lemma indepFun_snd_apply_aux (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (a : 𝓐) (m : ℕ) :
    (fun ω ↦ ω.2 m a) ⟂ᵢ[arrayMeasure ν]
      (fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
        else Nonempty.some inferInstance else ω.2 k b))

Type uses (3)

Body uses (2)

Used by (2)

Actions: Source · Open Issue

Proof

by
  unfold arrayMeasure
  let μ₁ : Measure (ℕ → I) := Measure.infinitePi fun _ ↦ volume
  let μ₂ : Measure (ℕ → 𝓐 → R) := Measure.infinitePi fun _ ↦ Measure.infinitePi ν
  -- Independence within μ₂: coordinates ω i are independent
  have h_indep₂ : iIndepFun (fun i (ω : ℕ → 𝓐 → R) ↦ ω i) μ₂ :=
    iIndepFun_infinitePi (fun _ ↦ measurable_id)
  -- Independence within each infinitePi ν: coordinates f b are independent
  have h_indep_inner : iIndepFun (fun (b : 𝓐) (f : 𝓐 → R) ↦ f b) (Measure.infinitePi ν) :=
    iIndepFun_infinitePi (fun _ ↦ measurable_id)
  rw [indepFun_iff_measure_inter_preimage_eq_mul]
  intro s t hs ht
  let X : (ℕ → I) × (ℕ → 𝓐 → R) → R := fun ω ↦ ω.2 m a
  let Y : (ℕ → I) × (ℕ → 𝓐 → R) → (ℕ → I) × (ℕ → 𝓐 → R) :=
    fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
      else Nonempty.some inferInstance else ω.2 k b)
  have hX_meas : Measurable X :=
    (measurable_pi_apply a).comp ((measurable_pi_apply m).comp measurable_snd)
  have hY_meas : Measurable Y := by
    change Measurable (fun ω : (ℕ → I) × (ℕ → 𝓐 → R) ↦
      (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
        else Nonempty.some inferInstance else ω.2 k b))
    refine Measurable.prod measurable_fst ?_
    refine measurable_pi_lambda _ (fun k ↦ ?_)
    refine measurable_pi_lambda _ (fun b ↦ ?_)
    by_cases hb : b = a
    · simp only [hb, ↓reduceIte]
      by_cases hm : m ≠ 0
      · simp only [ne_eq, hm, not_false_eq_true, ↓reduceIte]
        exact (measurable_pi_apply a).comp
          ((measurable_pi_apply (min k (m - 1))).comp measurable_snd)
      · simp only [hm, ↓reduceIte]
        exact measurable_const
    · simp only [hb, ↓reduceIte]
      exact (measurable_pi_apply b).comp ((measurable_pi_apply k).comp measurable_snd)
  change (μ₁.prod μ₂) (X ⁻¹' s ∩ Y ⁻¹' t) = (μ₁.prod μ₂) (X ⁻¹' s) * (μ₁.prod μ₂) (Y ⁻¹' t)
  -- Use Fubini on μ₁.prod μ₂
  rw [Measure.prod_apply (hs.preimage hX_meas),
    Measure.prod_apply (ht.preimage hY_meas),
    Measure.prod_apply ((hs.preimage hX_meas).inter (ht.preimage hY_meas))]
  -- X only depends on ω₂, so its fiber is constant in ω₁
  have hX_fst : ∀ ω₁, μ₂ (Prod.mk ω₁ ⁻¹' (X ⁻¹' s)) = μ₂ ((fun ω₂ ↦ ω₂ m a) ⁻¹' s) := fun _ ↦ rfl
  simp_rw [hX_fst]
  -- The LHS integral: fiber of X ∩ Y at ω₁
  -- Key: X depends only on ω₂ m a, while Y's dependence on ω₂ avoids (m, a)
  -- Define the "truncation" map on ω₂
  let trunc : (ℕ → 𝓐 → R) → (ℕ → 𝓐 → R) :=
    fun ω₂ k b ↦ if b = a then if m ≠ 0 then ω₂ (min k (m - 1)) b
      else Nonempty.some inferInstance else ω₂ k b
  -- The fiber of Y at ω₁ only depends on trunc(ω₂)
  have hY_fiber : ∀ ω₁, Prod.mk ω₁ ⁻¹' (Y ⁻¹' t) = (fun ω₂ ↦ (ω₁, trunc ω₂)) ⁻¹' t := fun _ ↦ rfl
  -- The fiber of X ∩ Y factors
  have hXY_fiber : ∀ ω₁, Prod.mk ω₁ ⁻¹' (X ⁻¹' s ∩ Y ⁻¹' t) =
      ((fun ω₂ ↦ ω₂ m a) ⁻¹' s) ∩ ((fun ω₂ ↦ (ω₁, trunc ω₂)) ⁻¹' t) := fun _ ↦ rfl
  simp_rw [hXY_fiber, hY_fiber]
  -- Now we use independence in μ₂: (ω₂ m a) is independent of (trunc ω₂)
  -- because trunc only uses indices (k, a) with k < m, and (k, b) with b ≠ a
  have h_trunc_meas : Measurable trunc := by
    refine measurable_pi_lambda _ (fun k ↦ ?_)
    refine measurable_pi_lambda _ (fun b ↦ ?_)
    simp only [trunc]
    by_cases hb : b = a
    · simp only [hb, ↓reduceIte]
      by_cases hm : m = 0
      · simp only [hm]
        exact measurable_const
      · simp only [ne_eq, hm, not_false_eq_true, ↓reduceIte]
        exact (measurable_pi_apply a).comp (measurable_pi_apply (min k (m - 1)))
    · simp only [hb, ↓reduceIte]
      exact (measurable_pi_apply b).comp (measurable_pi_apply k)
  -- Key independence: (ω₂ m a) ⟂ trunc because trunc only uses coordinates ≠ (m, a)
  have h_indep_trunc : IndepFun (fun ω₂ ↦ ω₂ m a) trunc μ₂ := by
    -- Factor trunc through proj which extracts the relevant coordinates
    let proj : (ℕ → 𝓐 → R) → ((ℕ → R) × (ℕ → {b : 𝓐 // b ≠ a} → R)) := fun ω₂ ↦
      (fun k ↦ if m ≠ 0 then ω₂ (min k (m - 1)) a else Nonempty.some inferInstance,
      fun k ⟨b, _⟩ ↦ ω₂ k b)
    have h_trunc_proj : ∀ ω₂, trunc ω₂ = (fun p k b ↦
        if h : b = a then if m ≠ 0 then p.1 k
          else Nonempty.some inferInstance else p.2 k ⟨b, h⟩) (proj ω₂) := by
      intro ω₂; ext k b; simp only [trunc, proj]; by_cases hb : b = a <;> simp [hb]; grind
    have h_proj_meas : Measurable proj := by
      refine Measurable.prod ?_ ?_
      · refine measurable_pi_lambda _ fun k ↦ ?_
        by_cases hm : m ≠ 0
        · simp only [proj, ne_eq, hm, not_false_eq_true, ↓reduceIte]
          exact (measurable_pi_apply a).comp (measurable_pi_apply (min k (m - 1)))
        · simp [proj, hm]
      · exact measurable_pi_lambda _ (fun k ↦ measurable_pi_lambda _ (fun ⟨b, _⟩ ↦
          (measurable_pi_apply b).comp (measurable_pi_apply k)))
    have h_g_meas : Measurable (fun p : (ℕ → R) × (ℕ → {b : 𝓐 // b ≠ a} → R) ↦
        (fun k b ↦ if h : b = a then if m ≠ 0 then p.1 k else Nonempty.some inferInstance
          else p.2 k ⟨b, h⟩)) := by
      refine measurable_pi_lambda _ (fun k ↦ measurable_pi_lambda _ (fun b ↦ ?_))
      by_cases hb : b = a
      · simp only [hb, ↓reduceDIte]
        by_cases hm : m ≠ 0
        · simp only [ne_eq, hm, not_false_eq_true]
          exact (measurable_pi_apply k).comp measurable_fst
        · simp [hm]
      · simp only [hb, ↓reduceDIte]
        exact (measurable_pi_apply (⟨b, hb⟩ : {b : 𝓐 // b ≠ a})).comp
          ((measurable_pi_apply k).comp measurable_snd)
    -- Show (ω₂ m a) ⟂ proj: proj uses coordinates disjoint from (m, a)
    have h_indep_proj : IndepFun (fun ω₂ ↦ ω₂ m a) proj μ₂ := by
      have h_row_bound (hm : m ≠ 0) : ∀ k, min k (m - 1) < m := by
        intro k
        calc min k (m - 1) ≤ m - 1 := Nat.min_le_right k (m - 1)
          _ < m := Nat.sub_lt (by grind) Nat.one_pos
      rw [indepFun_iff_measure_inter_preimage_eq_mul]
      intro s t' hs ht'
      -- rows_lt_m extracts column a at rows < m, other_cols extracts columns ≠ a
      let rows_lt_m : (ℕ → 𝓐 → R) → (Iio m → R) := fun ω₂ ⟨j, _⟩ ↦ ω₂ j a
      let other_cols : (ℕ → 𝓐 → R) → (ℕ → {b : 𝓐 // b ≠ a} → R) := fun ω₂ k ⟨b, _⟩ ↦ ω₂ k b
      have h_proj_factor : ∀ ω₂, proj ω₂ =
          ((fun r k ↦ if hm : m ≠ 0 then r ⟨min k (m - 1), Finset.mem_Iio.mpr (h_row_bound hm k)⟩
            else Nonempty.some inferInstance) (rows_lt_m ω₂),
           other_cols ω₂) := by
        intro ω₂; ext1
        · ext k
          by_cases hm : m ≠ 0
          · simp [proj, rows_lt_m, hm]
          · simp [proj, hm]
        · rfl
      -- Use iIndepFun structure of the doubly-indexed infinite product
      have h_iindep : iIndepFun (fun (p : ℕ × 𝓐) ω ↦ ω p.1 p.2) μ₂ :=
        iIndepFun_uncurry_infinitePi' (X := fun _ _ ↦ id) (fun _ ↦ ν) (by fun_prop)
      have h_rows_meas : Measurable rows_lt_m :=
        measurable_pi_lambda _ (fun ⟨j, _⟩ ↦ (measurable_pi_apply a).comp (measurable_pi_apply j))
      have h_other_meas : Measurable other_cols :=
        measurable_pi_lambda _ (fun k ↦ measurable_pi_lambda _ (fun ⟨b, _⟩ ↦
          (measurable_pi_apply b).comp (measurable_pi_apply k)))
      -- Show (ω₂ m a) ⟂ (rows_lt_m, other_cols) via indep_iSup_of_disjoint
      have h_indep_combined : IndepFun (fun ω₂ ↦ ω₂ m a)
          (fun ω₂ ↦ (rows_lt_m ω₂, other_cols ω₂)) μ₂ := by
        rw [IndepFun_iff_Indep]
        have h_comap_le : (MeasurableSpace.pi.prod MeasurableSpace.pi).comap
            (fun ω₂ ↦ (rows_lt_m ω₂, other_cols ω₂)) ≤
            ⨆ (p : {p : ℕ × 𝓐 // p ≠ (m, a)}), mR.comap (fun ω ↦ ω p.val.1 p.val.2) := by
          rw [MeasurableSpace.comap_prodMk]
          refine sup_le ?_ ?_
          · rw [MeasurableSpace.comap_pi]
            refine iSup_le (fun ⟨j, hj⟩ ↦ ?_)
            have h_ne : (j, a) ≠ (m, a) := fun h ↦ (Finset.mem_Iio.mp hj).ne (Prod.mk.inj h).1
            exact le_iSup_of_le ⟨(j, a), h_ne⟩ le_rfl
          · rw [MeasurableSpace.comap_pi]
            refine iSup_le (fun k ↦ ?_)
            rw [MeasurableSpace.comap_pi]
            refine iSup_le (fun ⟨b, hb⟩ ↦ ?_)
            have h_ne : (k, b) ≠ (m, a) := fun h ↦ hb (Prod.mk.inj h).2
            exact le_iSup_of_le ⟨(k, b), h_ne⟩ le_rfl
        refine indep_of_indep_of_le_right ?_ h_comap_le
        have h_disjoint : Disjoint ({(m, a)} : Set (ℕ × 𝓐)) {p | p ≠ (m, a)} := by simp
        have h_le : ∀ p : ℕ × 𝓐, mR.comap (fun ω : ℕ → 𝓐 → R ↦ ω p.1 p.2) ≤
            MeasurableSpace.pi (m := fun _ ↦ MeasurableSpace.pi) := fun p ↦
          Measurable.comap_le ((measurable_pi_apply p.2).comp (measurable_pi_apply p.1))
        have h_iindep' : iIndep (fun p : ℕ × 𝓐 ↦ mR.comap (fun ω : ℕ → 𝓐 → R ↦ ω p.1 p.2)) μ₂ :=
          h_iindep.iIndep
        have h_indep := indep_iSup_of_disjoint h_le h_iindep' h_disjoint
        convert h_indep using 2
        · simp only [Set.mem_singleton_iff, iSup_iSup_eq_left]
        · simp only [ne_eq, Set.mem_setOf_eq, iSup_subtype']
      have h_proj_preimage : proj ⁻¹' t' = (fun ω₂ ↦ (rows_lt_m ω₂, other_cols ω₂)) ⁻¹'
          {p | ((fun r k ↦ if hm : m ≠ 0 then
            r ⟨min k (m - 1), Finset.mem_Iio.mpr (h_row_bound hm k)⟩
            else Nonempty.some inferInstance) p.1, p.2) ∈ t'}
          := by ext ω₂; simp only [Set.mem_preimage, Set.mem_setOf_eq, h_proj_factor]
      rw [indepFun_iff_measure_inter_preimage_eq_mul] at h_indep_combined
      rw [h_proj_preimage]
      let T : Set ((Iio m → R) × (ℕ → {b : 𝓐 // b ≠ a} → R)) :=
        {p | ((fun r k ↦ if hm : m ≠ 0 then
          r ⟨min k (m - 1), Finset.mem_Iio.mpr (h_row_bound hm k)⟩
          else Nonempty.some inferInstance) p.1, p.2) ∈ t'}
      have hT_meas : MeasurableSet T := by
        refine ht'.preimage (Measurable.prod ?_ measurable_snd)
        refine measurable_pi_lambda _ (fun k ↦ ?_)
        by_cases hm : m ≠ 0
        · simp only [ne_eq, hm, not_false_eq_true, ↓reduceDIte]
          exact (measurable_pi_apply (⟨min k (m - 1), Finset.mem_Iio.mpr (h_row_bound hm k)⟩ :
            Iio m)).comp measurable_fst
        · simp [hm]
      change μ₂ ((fun ω₂ ↦ ω₂ m a) ⁻¹' s ∩ (fun ω₂ ↦ (rows_lt_m ω₂, other_cols ω₂)) ⁻¹' T) =
        μ₂ ((fun ω₂ ↦ ω₂ m a) ⁻¹' s) *
        μ₂ ((fun ω₂ ↦ (rows_lt_m ω₂, other_cols ω₂)) ⁻¹' T)
      exact h_indep_combined s T hs hT_meas
    have h_eq : trunc = (fun p k b ↦ if h : b = a then if m ≠ 0 then p.1 k
        else Nonempty.some inferInstance else p.2 k ⟨b, h⟩) ∘ proj := by
      funext ω₂; exact h_trunc_proj ω₂
    rw [h_eq]
    exact h_indep_proj.comp measurable_id h_g_meas
  rw [indepFun_iff_measure_inter_preimage_eq_mul] at h_indep_trunc
  have h_const : ∀ ω₁, μ₂ (((fun ω₂ ↦ ω₂ m a) ⁻¹' s) ∩ ((fun ω₂ ↦ (ω₁, trunc ω₂)) ⁻¹' t)) =
      μ₂ ((fun ω₂ ↦ ω₂ m a) ⁻¹' s) * μ₂ ((fun ω₂ ↦ (ω₁, trunc ω₂)) ⁻¹' t) := fun ω₁ ↦
    h_indep_trunc s _ hs (ht.preimage (by fun_prop))
  simp_rw [h_const]
  let c := μ₂ ((fun ω₂ ↦ ω₂ m a) ⁻¹' s)
  change ∫⁻ x, c * μ₂ ((fun ω₂ ↦ (x, trunc ω₂)) ⁻¹' t) ∂μ₁ =
    (∫⁻ _, c ∂μ₁) * ∫⁻ x, μ₂ ((fun ω₂ ↦ (x, trunc ω₂)) ⁻¹' t) ∂μ₁
  have h_preimage : ∀ x, (fun ω₂ ↦ (x, trunc ω₂)) ⁻¹' t = trunc ⁻¹' (Prod.mk x ⁻¹' t) := fun _ ↦ rfl
  simp_rw [h_preimage]
  have h_map : ∀ x, μ₂ (trunc ⁻¹' (Prod.mk x ⁻¹' t)) = (μ₂.map trunc) (Prod.mk x ⁻¹' t) := by
    intro x; rw [Measure.map_apply h_trunc_meas (ht.preimage (by fun_prop))]
  simp_rw [h_map]
  rw [lintegral_const_mul _ (measurable_measure_prodMk_left_finite ht),
    lintegral_const, measure_univ, mul_one]

`indepFun_snd_apply_pullCount_action`🔗

LemmaBandits.ArrayModel.indepFun_snd_apply_pullCount_action

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_snd_apply_pullCount_action.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.IndepFun (fun ω => Prod.snd ω m a)
    (Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg) a (n + 1) ω = m}
      fun x => 1)
    (arrayMeasure ν)
Bandits.ArrayModel.indepFun_snd_apply_pullCount_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.snd ω m a)
    (Set.indicator
      {ω |
        action alg (n + 1) ω = a ∧
          Learning.pullCount (action alg)
              a (n + 1) ω =
            m}
      fun x => 1)
    (arrayMeasure ν)

Code

lemma indepFun_snd_apply_pullCount_action [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (a : 𝓐) (m n : ℕ) :
    (fun ω ↦ ω.2 m a) ⟂ᵢ[arrayMeasure ν]
      ({ω | action alg (n + 1) ω = a ∧
        pullCount (action alg) a (n + 1) ω = m}).indicator (fun _ ↦ 1)

Type uses (6)

Body uses (3)

Used by (2)

Actions: Source · Open Issue

Proof

(indepFun_snd_apply_aux ν a m).of_measurable_right (measurable_stepsUntil alg a m n)

`indepFun_cond_comp`🔗

LemmaBandits.ArrayModel.indepFun_cond_comp

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_cond_comp.{u_3, u_4, u_5, u_6}
  {𝓐 : Type u_3} {β : Type u_4} {γ : Type u_5} {δ : Type u_6}
  {m𝓐 : MeasurableSpace 𝓐} {mβ : MeasurableSpace β}
  {mγ : MeasurableSpace γ} {mδ : MeasurableSpace δ}
  [MeasurableSingletonClass δ] {μ : MeasureTheory.Measure 𝓐} {X : 𝓐 → β}
  {Y : 𝓐 → γ} (hXY : ProbabilityTheory.IndepFun X Y μ)
  (hY : Measurable Y) {Z : γ → δ} (hZ : Measurable Z) (z : δ) :
  ProbabilityTheory.IndepFun X Y μ[|Z ∘ Y ⁻¹' {z}]
Bandits.ArrayModel.indepFun_cond_comp.{u_3,
    u_4, u_5, u_6}
  {𝓐 : Type u_3} {β : Type u_4}
  {γ : Type u_5} {δ : Type u_6}
  {m𝓐 : MeasurableSpace 𝓐}
  {mβ : MeasurableSpace β}
  {mγ : MeasurableSpace γ}
  {mδ : MeasurableSpace δ}
  [MeasurableSingletonClass δ]
  {μ : MeasureTheory.Measure 𝓐}
  {X : 𝓐 → β} {Y : 𝓐 → γ}
  (hXY : ProbabilityTheory.IndepFun X Y μ)
  (hY : Measurable Y) {Z : γ → δ}
  (hZ : Measurable Z) (z : δ) :
  ProbabilityTheory.IndepFun X Y
    μ[|Z ∘ Y ⁻¹' {z}]

Code

lemma indepFun_cond_comp {𝓐 β γ δ : Type*} {m𝓐 : MeasurableSpace 𝓐} {mβ : MeasurableSpace β}
    {mγ : MeasurableSpace γ} {mδ : MeasurableSpace δ} [MeasurableSingletonClass δ] {μ : Measure 𝓐}
    {X : 𝓐 → β} {Y : 𝓐 → γ} (hXY : X ⟂ᵢ[μ] Y) (hY : Measurable Y)
    {Z : γ → δ} (hZ : Measurable Z) (z : δ) :
    X ⟂ᵢ[μ[|(Z ∘ Y) ⁻¹' {z}]] Y

Body uses (1)

indepFun_cond_of_indepFun

Used by (1)

indepFun_snd_hist_cond

Actions: Source · Open Issue

Proof

by
  have h_preim : (Z ∘ Y) ⁻¹' {z} = Y ⁻¹' (Z ⁻¹' {z}) := by grind
  simp_rw [h_preim]
  exact indepFun_cond_of_indepFun hXY hY (hZ (measurableSet_singleton z))

`indepFun_snd_hist_cond`🔗

LemmaBandits.ArrayModel.indepFun_snd_hist_cond

Details

No docstring.

theorem

Bandits.ArrayModel.indepFun_snd_hist_cond.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [StandardBorelSpace R] [Nonempty R] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n m : ℕ) :
  ProbabilityTheory.IndepFun (fun ω => Prod.snd ω m a)
    (fun x => hist alg x n)
    (arrayMeasure
        ν)[|(fun ω =>
          (action alg (n + 1) ω,
            Learning.pullCount (action alg) (action alg (n + 1) ω)
              (n + 1) ω)) ⁻¹'
        {(a, m)}]
Bandits.ArrayModel.indepFun_snd_hist_cond.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [StandardBorelSpace R] [Nonempty R]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (n m : ℕ) :
  ProbabilityTheory.IndepFun
    (fun ω => Prod.snd ω m a)
    (fun x => hist alg x n)
    (arrayMeasure
        ν)[|(fun ω =>
          (action alg (n + 1) ω,
            Learning.pullCount
              (action alg)
              (action alg (n + 1) ω)
              (n + 1) ω)) ⁻¹'
        {(a, m)}]

Code

lemma indepFun_snd_hist_cond [Countable 𝓐] (alg : Algorithm 𝓐 R)
    (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (a : 𝓐) (n m : ℕ) :
    (fun ω ↦ ω.2 m a) ⟂ᵢ[(arrayMeasure ν)[|(fun ω ↦ (action alg (n + 1) ω,
      pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]]
    (hist alg · n)

Type uses (7)

Body uses (10)

Used by (1)

hasCondDistrib_reward_hist_action_pullCount

Actions: Source · Open Issue

Proof

by
  have h_meas := measurable_hist_truePast alg a n
  refine IndepFun.of_measurable_right ?_ h_meas
  have h_ae_eq : truePast alg a n =ᵐ[(arrayMeasure ν)[|(fun ω ↦ (action alg (n + 1) ω,
        pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]]
      (fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
        else Nonempty.some inferInstance else ω.2 k b)) := by
    refine ae_cond_of_forall_mem ?_ fun x hx ↦ ?_
    · refine (measurableSet_singleton _).preimage ?_
      have h_meas_pc : Measurable fun ω ↦
          pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω := by
        change Measurable ((fun p : (probSpace 𝓐 R) × 𝓐 ↦ pullCount (action alg) p.2 (n + 1) p.1) ∘
          (fun ω : probSpace 𝓐 R ↦ (ω, action alg (n + 1) ω)))
        exact (measurable_uncurry_pullCount (by fun_prop) _).comp (by fun_prop)
      fun_prop
    simp only [Set.mem_preimage, Set.mem_singleton_iff, Prod.mk.injEq] at hx
    simp only [truePast]
    congr with i b
    by_cases hb : b = a
    · simp only [hb, ↓reduceIte]
      simp only [hx.1, true_and] at hx
      congr!
    · simp [hb]
  refine IndepFun.congr ?_ EventuallyEq.rfl h_ae_eq.symm
  suffices (fun ω ↦ ω.2 m a) ⟂ᵢ[(arrayMeasure ν)[|(({ω | action alg (n + 1) ω = a ∧
        pullCount (action alg) a (n + 1) ω = m}).indicator (fun _ ↦ 1)) ⁻¹' {1}]]
      fun ω ↦ (ω.1, fun k b ↦ if b = a then if m ≠ 0 then ω.2 (min k (m - 1)) b
        else Nonempty.some inferInstance else ω.2 k b) by
    convert this using 1
    · rfl
    · rfl
    · rfl
    congr with ω
    simp only [Set.mem_preimage, Set.mem_singleton_iff, Prod.mk.injEq, Set.indicator_apply,
      Set.mem_setOf_eq, ite_eq_left_iff, not_and, zero_ne_one, imp_false,
      Classical.not_imp, Decidable.not_not, and_congr_right_iff]
    intro ha
    simp [ha]
  have h_meas := measurable_stepsUntil alg a m n
  obtain ⟨f, hf, hf_eq⟩ := h_meas.exists_eq_measurable_comp
  simp_rw [hf_eq]
  refine indepFun_cond_comp (Z := f) (z := 1) ?_ ?_ hf
  · exact indepFun_snd_apply_aux ν a m
  · refine Measurable.prodMk (by fun_prop) ?_
    simp_rw [measurable_pi_iff]
    intro i b
    refine Measurable.ite (MeasurableSet.const _) ?_ (by fun_prop)
    refine Measurable.ite (MeasurableSet.const _) (by fun_prop) (by fun_prop)

`hasLaw_action_zero`🔗

LemmaBandits.ArrayModel.hasLaw_action_zero

Details

No docstring.

theorem

Bandits.ArrayModel.hasLaw_action_zero.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.HasLaw (action alg 0) (Learning.Algorithm.p0 alg)
    (arrayMeasure ν)
Bandits.ArrayModel.hasLaw_action_zero.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.HasLaw (action alg 0)
    (Learning.Algorithm.p0 alg)
    (arrayMeasure ν)

Code

lemma hasLaw_action_zero (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    HasLaw (action alg 0) alg.p0 (arrayMeasure ν) where
  map_eq

Type uses (5)

Body uses (6)

Used by (1)

isAlgEnvSeq_arrayMeasure

Actions: Source · Open Issue

Proof

by
    calc (arrayMeasure ν).map (fun ω ↦ initAlgFunction alg (ω.1 0))
    _ = ((arrayMeasure ν).fst.map (Function.eval 0)).map (initAlgFunction alg) := by
      rw [Measure.fst, Measure.map_map (by fun_prop) (by fun_prop),
        Measure.map_map (by fun_prop) (by fun_prop)]
      rfl
    _ = (volume : Measure I).map (initAlgFunction alg) := by
      simp only [arrayMeasure, Measure.fst_prod]
      rw [(measurePreserving_eval_infinitePi (fun _ ↦ volume) 0).map_eq]
    _ = alg.p0 := initAlgFunction_map alg

`hasCondDistrib_reward_zero`🔗

LemmaBandits.ArrayModel.hasCondDistrib_reward_zero

Details

No docstring.

theorem

Bandits.ArrayModel.hasCondDistrib_reward_zero.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.HasCondDistrib (reward alg 0) (action alg 0) ν
    (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_reward_zero.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  ProbabilityTheory.HasCondDistrib
    (reward alg 0) (action alg 0) ν
    (arrayMeasure ν)

Code

lemma hasCondDistrib_reward_zero (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    HasCondDistrib (reward alg 0) (action alg 0) ν (arrayMeasure ν)

Type uses (6)

Body uses (13)

Used by (1)

isAlgEnvSeq_arrayMeasure

Actions: Source · Open Issue

Proof

by
  refine hasCondDistrib_of_condDistrib_eq (by fun_prop) (by fun_prop) ?_
  refine (condDistrib_ae_eq_cond (by fun_prop) (by fun_prop)).trans ?_
  rw [Filter.EventuallyEq, ae_iff_of_countable]
  intro a ha
  simp only [reward_zero]
  calc ((arrayMeasure ν)[|action alg 0 ⁻¹' {a}]).map (fun ω ↦ ω.2 0 (action alg 0 ω))
  _ = ((arrayMeasure ν)[|action alg 0 ⁻¹' {a}]).map (fun ω ↦ ω.2 0 a) := by
    refine Measure.map_congr
      (ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) ?_)
    intro x hx
    simp only [Set.mem_preimage, Set.mem_singleton_iff] at hx
    simp [hx]
  _ = ν a := by
    rw [cond_of_indepFun]
    · exact map_snd_apply_arrayMeasure 0 a
    · have : (fun ω ↦ ω.1 0) ⟂ᵢ[arrayMeasure ν] fun ω ↦ ω.2 0 a :=
        indepFun_fst_zero_snd_zero_action ν a
      rw [action_zero]
      exact this.comp (φ := initAlgFunction alg) (by fun_prop) measurable_id
    · fun_prop
    · fun_prop
    · simp
    · rwa [Measure.map_apply (by fun_prop) (by simp)] at ha

`hasCondDistrib_action'`🔗

LemmaBandits.ArrayModel.hasCondDistrib_action'

Details

No docstring.

theorem

Bandits.ArrayModel.hasCondDistrib_action'.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (action alg (n + 1))
    (fun x => hist alg x n) (Learning.Algorithm.policy alg n)
    (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_action'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (action alg (n + 1))
    (fun x => hist alg x n)
    (Learning.Algorithm.policy alg n)
    (arrayMeasure ν)

Code

lemma hasCondDistrib_action' (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasCondDistrib (action alg (n + 1)) (hist alg · n) (alg.policy n) (arrayMeasure ν)

Type uses (6)

Body uses (11)

Used by (1)

hasCondDistrib_action

Actions: Source · Open Issue

Proof

by
  rw [action_add_one_eq]
  have h_fun ω := algFunction_map alg n (hist alg ω n)
  refine ⟨by fun_prop, ?_⟩
  have h_indep : (arrayMeasure ν).map (fun ω ↦ (ω.1 (n + 1), hist alg ω n)) =
      (ℙ).prod ((arrayMeasure ν).map (hist alg · n)) := by
    have h_indep' := indepFun_fst_add_one_hist alg ν n
    rw [indepFun_iff_map_prod_eq_prod_map_map (by fun_prop) (by fun_prop)] at h_indep'
    rw [h_indep']
    congr
    simp only [arrayMeasure]
    calc ((Measure.infinitePi fun x ↦ ℙ).prod (streamMeasure ν)).map (fun ω ↦ ω.1 (n + 1))
    _ = (Measure.infinitePi fun x ↦ ℙ).map (Function.eval (n + 1)) := by
      nth_rw 2 [← Measure.fst_prod (μ := Measure.infinitePi fun x ↦ ℙ)
        (ν := streamMeasure ν)]
      rw [Measure.fst, Measure.map_map (by fun_prop) (by fun_prop)]
      rfl
    _ = ℙ := by rw [Measure.infinitePi_map_eval]
  have : (fun x ↦ (hist alg x n, algFunction alg n (hist alg x n) (x.1 (n + 1)))) =
      (fun p ↦ (p.2, algFunction alg n (p.2) (p.1))) ∘ (fun x ↦ (x.1 (n + 1), hist alg x n)) := rfl
  rw [this, ← Measure.map_map (by fun_prop) (by fun_prop), h_indep]
  have : (ℙ : Measure I).prod ((arrayMeasure ν).map (hist alg · n)) =
      ((Kernel.const _ ℙ) ×ₖ Kernel.id) ∘ₘ ((arrayMeasure ν).map (hist alg · n)) := by
    have h := Measure.compProd_const (μ := (arrayMeasure ν).map (hist alg · n))
      (ν := (ℙ : Measure I))
    rw [Measure.compProd_eq_comp_prod] at h
    rw [← Measure.prod_swap, ← h, ← Measure.deterministic_comp_eq_map (by fun_prop),
      Measure.comp_assoc, ← Kernel.swap, Kernel.swap_prod]
  rw [this, ← Measure.deterministic_comp_eq_map (by fun_prop),
    ← Measure.deterministic_comp_eq_map (by fun_prop), Measure.compProd_eq_comp_prod,
    Measure.comp_assoc, Measure.comp_assoc, Measure.comp_assoc]
  congr 2
  ext ω : 1
  simp only [Kernel.deterministic_comp_eq_map, Kernel.comp_deterministic_eq_comap, Kernel.coe_comap,
    Function.comp_apply]
  rw [Kernel.map_apply _ (by fun_prop), Kernel.prod_apply, Kernel.const_apply, Kernel.id_apply,
    Kernel.prod_apply, Kernel.id_apply, ← h_fun]
  calc (((ℙ).prod (Measure.dirac (hist alg ω n)))).map (fun p ↦ (p.2, algFunction alg n p.2 p.1))
  _ = (((ℙ).prod (Measure.dirac (hist alg ω n))).map Prod.swap).map
      (fun p ↦ (p.1, algFunction alg n p.1 p.2)) := by
    rw [Measure.map_map (by fun_prop) (by fun_prop)]
    rfl
  _ = ((Measure.dirac (hist alg ω n)).prod ℙ).map (fun p ↦ (p.1, algFunction alg n p.1 p.2)) := by
    rw [Measure.prod_swap]
  _ = (Measure.dirac (hist alg ω n)).prod ((ℙ).map (algFunction alg n (hist alg ω n))) := by
    ext s hs
    rw [Measure.map_apply (by fun_prop) hs, Measure.prod_apply, lintegral_dirac, Measure.prod_apply,
      lintegral_dirac, Measure.map_apply (by fun_prop)]
    · congr
    · exact hs.preimage (by fun_prop)
    · exact hs
    · exact hs.preimage (by fun_prop)

`hasCondDistrib_reward_pullCount_action`🔗

LemmaBandits.ArrayModel.hasCondDistrib_reward_pullCount_action

Details

The conditional distribution of the reward at time n + 1, given the action at time n + 1 and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

theorem

Bandits.ArrayModel.hasCondDistrib_reward_pullCount_action.{u_1, u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace R] [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (reward alg (n + 1))
    (fun ω =>
      (action alg (n + 1) ω,
        Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1)
          ω))
    (ProbabilityTheory.Kernel.prodMkRight ℕ ν) (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_reward_pullCount_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (reward alg (n + 1))
    (fun ω =>
      (action alg (n + 1) ω,
        Learning.pullCount (action alg)
          (action alg (n + 1) ω) (n + 1)
          ω))
    (ProbabilityTheory.Kernel.prodMkRight
      ℕ ν)
    (arrayMeasure ν)

Code

lemma hasCondDistrib_reward_pullCount_action
    (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasCondDistrib (reward alg (n + 1))
      (fun ω ↦ (action alg (n + 1) ω, pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω))
      (ν.prodMkRight _) (arrayMeasure ν)

Type uses (7)

Body uses (14)

Used by (1)

hasCondDistrib_reward'

Actions: Source · Open Issue

Proof

by
  have h_meas : Measurable fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω := by
    change Measurable ((fun p : (probSpace 𝓐 R) × 𝓐 ↦ pullCount (action alg) p.2 (n + 1) p.1) ∘
      (fun ω : probSpace 𝓐 R ↦ (ω, action alg (n + 1) ω)))
    exact (measurable_uncurry_pullCount (by fun_prop) _).comp (by fun_prop)
  refine hasCondDistrib_of_condDistrib_eq (by fun_prop) (by fun_prop) ?_
  refine (condDistrib_ae_eq_cond
    (Measurable.prodMk (by fun_prop) (by fun_prop)) (by fun_prop)).trans ?_
  rw [Filter.EventuallyEq, ae_iff_of_countable]
  intro ⟨a, m⟩ ham
  simp only [Kernel.prodMkRight_apply]
  calc
    Measure.map (reward alg (n + 1))
      (arrayMeasure ν)[|(fun ω ↦ (action alg (n + 1) ω,
        pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]
  _ = Measure.map (fun ω ↦ ω.2 m a)
      (arrayMeasure ν)[|(fun ω ↦ (action alg (n + 1) ω,
        pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}] := by
    rw [reward_eq]
    refine Measure.map_congr
      (ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) (fun x hx ↦ ?_))
    simp only [Set.mem_preimage, Set.mem_singleton_iff, Prod.mk.injEq] at hx
    simp only [hx.1] at hx ⊢
    simp [hx.2]
  _ = Measure.map (fun ω ↦ ω.2 m a)
      (arrayMeasure ν)[|({ω | action alg (n + 1) ω = a ∧
        pullCount (action alg) a (n + 1) ω = m}).indicator 1 ⁻¹' {1}] := by
    congr with ω
    simp only [Set.mem_preimage, Set.mem_singleton_iff, Prod.mk.injEq, Set.indicator_apply,
      Set.mem_setOf_eq, Pi.one_apply, ite_eq_left_iff, not_and, zero_ne_one, imp_false,
      Classical.not_imp, Decidable.not_not, and_congr_right_iff]
    intro ha
    simp [ha]
  _ = ν a := by
    rw [cond_of_indepFun, map_snd_apply_arrayMeasure m a]
    · exact (indepFun_snd_apply_pullCount_action alg ν a m n).symm
    · refine Measurable.indicator (by fun_prop) ?_
      exact MeasurableSet.inter ((measurableSet_singleton _).preimage (by fun_prop))
        ((measurableSet_singleton _).preimage (by fun_prop))
    · fun_prop
    · simp
    · rw [Measure.map_apply (by fun_prop) (by simp)] at ham
      convert ham
      ext ω
      simp only [Set.mem_preimage, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply,
        Set.mem_singleton_iff, ite_eq_left_iff, not_and, zero_ne_one, imp_false, Classical.not_imp,
        Decidable.not_not, Prod.mk.injEq, and_congr_right_iff]
      intro ha
      simp [ha]

`reward_ae_eq_cond`🔗

LemmaBandits.ArrayModel.reward_ae_eq_cond

Details

No docstring.

theorem

Bandits.ArrayModel.reward_ae_eq_cond.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  (a : 𝓐) (n m : ℕ) :
  reward alg
      (n +
        1) =ᵐ[(arrayMeasure
        ν)[|(fun ω =>
          (action alg (n + 1) ω,
            Learning.pullCount (action alg) (action alg (n + 1) ω)
              (n + 1) ω)) ⁻¹'
        {(a, m)}]]
    fun ω => Prod.snd ω m a
Bandits.ArrayModel.reward_ae_eq_cond.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  (a : 𝓐) (n m : ℕ) :
  reward alg
      (n +
        1) =ᵐ[(arrayMeasure
        ν)[|(fun ω =>
          (action alg (n + 1) ω,
            Learning.pullCount
              (action alg)
              (action alg (n + 1) ω)
              (n + 1) ω)) ⁻¹'
        {(a, m)}]]
    fun ω => Prod.snd ω m a

Code

lemma reward_ae_eq_cond (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) (a : 𝓐) (n m : ℕ) :
    reward alg (n + 1) =ᵐ[(arrayMeasure ν)[|(fun ω ↦ (action alg (n + 1) ω,
        pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)}]]
      (fun ω ↦ ω.2 m a)

Type uses (7)

Body uses (5)

Used by (1)

hasCondDistrib_reward_hist_action_pullCount

Actions: Source · Open Issue

Proof

by
  rw [reward_eq]
  refine ae_cond_of_forall_mem ?_ ?_
  · have : Measurable fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω := by
      change Measurable ((fun p : (probSpace 𝓐 R) × 𝓐 ↦ pullCount (action alg) p.2 (n + 1) p.1) ∘
        (fun ω : probSpace 𝓐 R ↦ (ω, action alg (n + 1) ω)))
      exact (measurable_uncurry_pullCount (by fun_prop) _).comp (by fun_prop)
    exact (measurableSet_singleton _).preimage (by fun_prop)
  intro ω hω
  simp only [Set.mem_preimage, Set.mem_singleton_iff, Prod.mk.injEq] at hω
  simp only [hω.2]
  simp [hω.1]

`hasCondDistrib_reward_hist_action_pullCount`🔗

LemmaBandits.ArrayModel.hasCondDistrib_reward_hist_action_pullCount

Details

The conditional distribution of the reward at time n + 1, given the history up to time n, the action at time n + 1, and the number of times that action has been pulled before time n + 1, is equal to the kernel ν.

theorem

Bandits.ArrayModel.hasCondDistrib_reward_hist_action_pullCount.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐] [StandardBorelSpace 𝓐]
  [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace R] [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R) (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (reward alg (n + 1))
    (fun ω =>
      (hist alg ω n, action alg (n + 1) ω,
        Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1)
          ω))
    (ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic n) → 𝓐 × R)
      (ProbabilityTheory.Kernel.prodMkRight ℕ ν))
    (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_reward_hist_action_pullCount.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (reward alg (n + 1))
    (fun ω =>
      (hist alg ω n, action alg (n + 1) ω,
        Learning.pullCount (action alg)
          (action alg (n + 1) ω) (n + 1)
          ω))
    (ProbabilityTheory.Kernel.prodMkLeft
      (↥(Finset.Iic n) → 𝓐 × R)
      (ProbabilityTheory.Kernel.prodMkRight
        ℕ ν))
    (arrayMeasure ν)

Code

lemma hasCondDistrib_reward_hist_action_pullCount
    (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasCondDistrib (reward alg (n + 1))
      (fun ω ↦ (hist alg ω n, action alg (n + 1) ω,
        pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω))
      ((ν.prodMkRight _).prodMkLeft _) (arrayMeasure ν)

Type uses (8)

Body uses (17)

Used by (1)

condIndepFun_reward_hist

Actions: Source · Open Issue

Proof

by
  have h_meas : Measurable fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω := by
    change Measurable ((fun p : (probSpace 𝓐 R) × 𝓐 ↦ pullCount (action alg) p.2 (n + 1) p.1) ∘
      (fun ω : probSpace 𝓐 R ↦ (ω, action alg (n + 1) ω)))
    exact (measurable_uncurry_pullCount (by fun_prop) _).comp (by fun_prop)
  refine hasCondDistrib_of_condDistrib_eq (by fun_prop) (by fun_prop) ?_
  refine condDistrib_prod_of_forall_condDistrib_cond (by fun_prop) (by fun_prop) (by fun_prop) _ ?_
  intro (a, m) ham
  have h_eq : ((ν.prodMkRight _).prodMkLeft _).comap (fun ω : (Iic n → 𝓐 × R) ↦ (ω, a, m))
        (by fun_prop) =
      Kernel.const _ (ν a) := by ext; simp
  rw [h_eq, condDistrib_congr_left (reward_ae_eq_cond alg ν a n m)]
  refine (condDistrib_of_indepFun ?_ (by fun_prop) (by fun_prop)).trans (ae_of_all _ fun ω ↦ ?_)
  · exact (indepFun_snd_hist_cond alg ν a n m).symm
  · simp only [Kernel.const_apply]
    have : (fun ω ↦ (action alg (n + 1) ω,
          pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)) ⁻¹' {(a, m)} =
        ({ω | action alg (n + 1) ω = a ∧
          pullCount (action alg) a (n + 1) ω = m}).indicator 1 ⁻¹' {1} := by
      ext ω
      simp [Set.indicator_apply]
      grind
    rw [this, cond_of_indepFun, map_snd_apply_arrayMeasure m a]
    · exact (indepFun_snd_apply_pullCount_action alg ν a m n).symm
    · refine Measurable.indicator (by fun_prop) ?_
      exact MeasurableSet.inter ((measurableSet_singleton _).preimage (by fun_prop))
        ((measurableSet_singleton _).preimage (by fun_prop))
    · fun_prop
    · simp
    · convert ham
      ext ω
      simp only [Set.mem_preimage, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply,
        Set.mem_singleton_iff, ite_eq_left_iff, not_and, zero_ne_one, imp_false, Classical.not_imp,
        Decidable.not_not, Prod.mk.injEq, and_congr_right_iff]
      intro ha
      simp [ha]

`condIndepFun_reward_hist`🔗

LemmaBandits.ArrayModel.condIndepFun_reward_hist

Details

The reward at time n + 1 is conditionally independent of the history up to time n, given the action at time n + 1 and the number of times that action has been pulled before time n + 1.

theorem

Bandits.ArrayModel.condIndepFun_reward_hist.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap
      (fun ω =>
        (action alg (n + 1) ω,
          Learning.pullCount (action alg) (action alg (n + 1) ω) (n + 1)
            ω))
      inferInstance)
    ⋯ (reward alg (n + 1)) (fun x => hist alg x n) (arrayMeasure ν)
Bandits.ArrayModel.condIndepFun_reward_hist.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap
      (fun ω =>
        (action alg (n + 1) ω,
          Learning.pullCount (action alg)
            (action alg (n + 1) ω) (n + 1)
            ω))
      inferInstance)
    ⋯ (reward alg (n + 1))
    (fun x => hist alg x n)
    (arrayMeasure ν)

Code

lemma condIndepFun_reward_hist (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    (reward alg (n + 1)) ⟂ᵢ[(fun ω ↦ (action alg (n + 1) ω,
          pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω)),
        Measurable.prodMk (by fun_prop) (measurable_pullCount_action_add_one alg n);
        arrayMeasure ν]
      (hist alg · n)

Type uses (12)

Body uses (5)

Used by (1)

hasCondDistrib_reward'

Actions: Source · Open Issue

Proof

by
  have h_cond := hasCondDistrib_reward_hist_action_pullCount alg ν n
  refine condIndepFun_of_exists_condDistrib_prod_ae_eq_prodMkLeft (by fun_prop) (by fun_prop) ?_
    h_cond.condDistrib_eq
  exact Measurable.prodMk (by fun_prop) (measurable_pullCount_action_add_one alg n)

`hasCondDistrib_reward'`🔗

LemmaBandits.ArrayModel.hasCondDistrib_reward'

Details

The conditional distribution of the reward at time n + 1, given the history up to time n and the action at time n + 1, is equal to the kernel ν.

theorem

Bandits.ArrayModel.hasCondDistrib_reward'.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (reward alg (n + 1))
    (fun ω => (hist alg ω n, action alg (n + 1) ω))
    (ProbabilityTheory.Kernel.prodMkLeft (↥(Finset.Iic n) → 𝓐 × R) ν)
    (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_reward'.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (reward alg (n + 1))
    (fun ω =>
      (hist alg ω n,
        action alg (n + 1) ω))
    (ProbabilityTheory.Kernel.prodMkLeft
      (↥(Finset.Iic n) → 𝓐 × R) ν)
    (arrayMeasure ν)

Code

lemma hasCondDistrib_reward' (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasCondDistrib (reward alg (n + 1)) (fun ω ↦ (hist alg ω n, action alg (n + 1) ω))
      (ν.prodMkLeft _) (arrayMeasure ν)

Type uses (7)

Body uses (16)

Used by (1)

hasCondDistrib_reward

Actions: Source · Open Issue

Proof

by
  let R' := reward alg (n + 1)
  let H := (hist alg · n)
  let A := action alg (n + 1)
  let P := fun ω ↦ pullCount (action alg) (action alg (n + 1) ω) (n + 1) ω
  have hP : Measurable P := measurable_pullCount_action_add_one alg n
  change HasCondDistrib R' (fun ω ↦ (H ω, A ω)) (ν.prodMkLeft _) _
  suffices HasCondDistrib R' (fun ω ↦ (A ω, H ω)) (ν.prodMkRight _) (arrayMeasure ν) by
    have h_eq : (fun ω ↦ (H ω, A ω)) = MeasurableEquiv.prodComm ∘ (fun ω ↦ (A ω, H ω)) := rfl
    rw [h_eq]
    exact this.measurableEquiv_comp_right (κ := ν.prodMkRight _) _
  suffices HasCondDistrib R' (fun ω ↦ ((A ω, H ω), P ω))
      ((ν.prodMkRight _).prodMkRight _) (arrayMeasure ν) by
    -- use that `P` is measurable wrt `(A, H)` to drop it from the conditioning
    have hP_meas :
        Measurable[MeasurableSpace.comap (fun ω ↦ (A ω, H ω)) inferInstance] P :=
      measurable_pullCount_action_add_one_hist alg n
    obtain ⟨f, hf_meas, hf_eq⟩ := hP_meas.exists_eq_measurable_comp
    simp only [hf_eq, Function.comp_apply] at this
    rwa [hasCondDistrib_prod_right_iff _ _ hf_meas] at this
  suffices HasCondDistrib R' (fun ω ↦ ((A ω, P ω), H ω))
      ((ν.prodMkRight _).prodMkRight _) (arrayMeasure ν) by
    let e : ((𝓐 × ℕ) × (Iic n → 𝓐 × R)) ≃ᵐ ((𝓐 × (Iic n → 𝓐 × R)) × ℕ) :=
    { toFun := fun x ↦ ((x.1.1, x.2), x.1.2)
      invFun := fun x ↦ ((x.1.1, x.2), x.1.2)
      measurable_toFun := by simp only [Equiv.coe_fn_mk]; fun_prop
      measurable_invFun := by simp only [Equiv.symm_mk, Equiv.coe_fn_mk]; fun_prop }
    exact this.measurableEquiv_comp_right e
  suffices HasCondDistrib R' (fun ω ↦ (A ω, P ω)) (ν.prodMkRight _) (arrayMeasure ν) by
    have h_indep : H ⟂ᵢ[(fun ω ↦ (A ω, P ω)), (by fun_prop); arrayMeasure ν] R' :=
      (condIndepFun_reward_hist alg ν n).symm
    have h_condDistrib := this.condDistrib_eq
    rw [condIndepFun_iff_condDistrib_prod_ae_eq_prodMkRight (by fun_prop) (by fun_prop)
      (by fun_prop)] at h_indep
    refine hasCondDistrib_of_condDistrib_eq (by fun_prop) (by fun_prop) ?_
    refine h_indep.trans ?_
    rw [Filter.EventuallyEq, ae_map_iff] at h_condDistrib ⊢
    · simpa only [Kernel.prodMkRight_apply]
    · fun_prop
    · exact Kernel.measurableSet_eq _ _
    · fun_prop
    · exact Kernel.measurableSet_eq _ _
  exact hasCondDistrib_reward_pullCount_action alg ν n

`hasCondDistrib_action`🔗

LemmaBandits.ArrayModel.hasCondDistrib_action

Details

No docstring.

theorem

Bandits.ArrayModel.hasCondDistrib_action.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (action alg (n + 1))
    (fun ω i => (action alg (↑i) ω, reward alg (↑i) ω))
    (Learning.Algorithm.policy alg n) (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (action alg (n + 1))
    (fun ω i =>
      (action alg (↑i) ω,
        reward alg (↑i) ω))
    (Learning.Algorithm.policy alg n)
    (arrayMeasure ν)

Code

lemma hasCondDistrib_action (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] (n : ℕ) :
    HasCondDistrib (action alg (n + 1))
      (fun ω (i : Iic n) ↦ (action alg i ω, reward alg i ω))
      (alg.policy n) (arrayMeasure ν)

Type uses (6)

Body uses (3)

Used by (1)

isAlgEnvSeq_arrayMeasure

Actions: Source · Open Issue

Proof

by
  convert hasCondDistrib_action' alg ν n with ω i
  · simp only [action]
    rw [hist_eq _ _ n]
  · simp only [reward]
    rw [hist_eq _ _ n]

`hasCondDistrib_reward`🔗

LemmaBandits.ArrayModel.hasCondDistrib_reward

Details

No docstring.

theorem

Bandits.ArrayModel.hasCondDistrib_reward.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] (n : ℕ) :
  ProbabilityTheory.HasCondDistrib (reward alg (n + 1))
    (fun ω =>
      (fun i => (action alg (↑i) ω, reward alg (↑i) ω),
        action alg (n + 1) ω))
    (Learning.Environment.feedback (Learning.stationaryEnv ν) n)
    (arrayMeasure ν)
Bandits.ArrayModel.hasCondDistrib_reward.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν]
  (n : ℕ) :
  ProbabilityTheory.HasCondDistrib
    (reward alg (n + 1))
    (fun ω =>
      (fun i =>
        (action alg (↑i) ω,
          reward alg (↑i) ω),
        action alg (n + 1) ω))
    (Learning.Environment.feedback
      (Learning.stationaryEnv ν) n)
    (arrayMeasure ν)

Code

lemma hasCondDistrib_reward (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν]
    (n : ℕ) :
    HasCondDistrib (reward alg (n + 1))
      (fun ω ↦ (fun (i : Iic n) ↦ (action alg i ω, reward alg i ω), action alg (n + 1) ω))
      ((stationaryEnv ν).feedback n) (arrayMeasure ν)

Type uses (8)

Body uses (3)

Used by (1)

isAlgEnvSeq_arrayMeasure

Actions: Source · Open Issue

Proof

by
  convert hasCondDistrib_reward' alg ν n with ω i
  · simp only [action]
    rw [hist_eq _ _ n]
  · simp only [reward]
    rw [hist_eq _ _ n]
  · rfl

`isAlgEnvSeq_arrayMeasure`🔗

LemmaBandits.ArrayModel.isAlgEnvSeq_arrayMeasure

Details

No docstring.

theorem

Bandits.ArrayModel.isAlgEnvSeq_arrayMeasure.{u_1, u_2} {𝓐 : Type u_1}
  {R : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mR : MeasurableSpace R}
  [Nonempty 𝓐] [StandardBorelSpace 𝓐] [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace R] [Nonempty R] (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  Learning.IsAlgEnvSeq (action alg) (reward alg) alg
    (Learning.stationaryEnv ν) (arrayMeasure ν)
Bandits.ArrayModel.isAlgEnvSeq_arrayMeasure.{u_1,
    u_2}
  {𝓐 : Type u_1} {R : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mR : MeasurableSpace R} [Nonempty 𝓐]
  [StandardBorelSpace 𝓐] [DecidableEq 𝓐]
  [Countable 𝓐] [StandardBorelSpace R]
  [Nonempty R]
  (alg : Learning.Algorithm 𝓐 R)
  (ν : ProbabilityTheory.Kernel 𝓐 R)
  [ProbabilityTheory.IsMarkovKernel ν] :
  Learning.IsAlgEnvSeq (action alg)
    (reward alg) alg
    (Learning.stationaryEnv ν)
    (arrayMeasure ν)

Code

lemma isAlgEnvSeq_arrayMeasure (alg : Algorithm 𝓐 R) (ν : Kernel 𝓐 R) [IsMarkovKernel ν] :
    IsAlgEnvSeq (action alg) (reward alg) alg (stationaryEnv ν) (arrayMeasure ν) where
  hasLaw_action_zero

Type uses (9)

Body uses (8)

Used by (3)

Actions: Source · Open Issue

Proof

hasLaw_action_zero alg ν
  hasCondDistrib_feedback_zero := hasCondDistrib_reward_zero alg ν
  hasCondDistrib_action := hasCondDistrib_action alg ν
  hasCondDistrib_feedback := hasCondDistrib_reward alg ν

2.3. Online.Bandit.ArrayProbSpace🔗

streamMeasure🔗

instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel🔗

hasLaw_eval_infinitePi🔗

hasLaw_eval_streamMeasure🔗

hasLaw_eval_eval_streamMeasure🔗

identDistrib_eval_eval_id_streamMeasure🔗

integrable_eval_streamMeasure🔗

integral_eval_streamMeasure🔗

iIndepFun_eval_streamMeasure'🔗

iIndepFun_eval_streamMeasure''🔗

iIndepFun_eval_streamMeasure🔗

indepFun_eval_streamMeasure🔗

indepFun_eval_streamMeasure'🔗

probSpace🔗

instMeasurableSpaceProbSpace🔗

instStandardBorelSpaceProbSpaceOfCountable🔗

arrayMeasure🔗

instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel🔗

initAlgFunction🔗

initAlgFunction_map🔗

measurable_initAlgFunction🔗

algFunction🔗

algFunction_map🔗

measurable_algFunction🔗

hist🔗

hist_zero🔗

hist_add_one🔗

hist_eq🔗

hist_add_one_eq_IicSuccProd'🔗

action🔗

action_zero🔗

action_add_one_eq🔗

reward🔗

reward_zero🔗

reward_add_one🔗

reward_eq🔗

sumRewards_eq🔗

measurable_action_add_one'🔗

measurable_pullCount'_action_add_one🔗

measurable_hist🔗

measurable_action🔗

measurable_reward🔗

hist_add_one_eq_IicSuccProd🔗

measurable_pullCount_action_add_one🔗

hist_congr🔗

stepsUntil_congr_aux🔗

stepsUntil_congr🔗

stepsUntil_indicator_congr🔗

measurable_hist_comap🔗

truePast🔗

truePast_eq_of_pullCount_eq🔗

truePast_eq_of_pullCount_eq_of_ne_zero🔗

measurable_hist_truePast🔗

measurable_action_add_one_truePast🔗

measurable_pullCount_add_one_truePast🔗

measurable_stepsUntil🔗

measurable_pullCount_action_add_one_hist🔗

map_snd_apply_arrayMeasure🔗

indepFun_fst_snd🔗

indepFun_fst_zero_snd_zero_action🔗

indepFun_fst_add_one_aux🔗

indepFun_fst_add_one_hist🔗

indepFun_snd_apply_aux🔗

indepFun_snd_apply_pullCount_action🔗

indepFun_cond_comp🔗

indepFun_snd_hist_cond🔗

hasLaw_action_zero🔗

hasCondDistrib_reward_zero🔗

hasCondDistrib_action'🔗

hasCondDistrib_reward_pullCount_action🔗

reward_ae_eq_cond🔗

hasCondDistrib_reward_hist_action_pullCount🔗

condIndepFun_reward_hist🔗

hasCondDistrib_reward'🔗

hasCondDistrib_action🔗

hasCondDistrib_reward🔗

isAlgEnvSeq_arrayMeasure🔗

`streamMeasure`🔗

`instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel`🔗

`hasLaw_eval_infinitePi`🔗

`hasLaw_eval_streamMeasure`🔗

`hasLaw_eval_eval_streamMeasure`🔗

`identDistrib_eval_eval_id_streamMeasure`🔗

`integrable_eval_streamMeasure`🔗

`integral_eval_streamMeasure`🔗

`iIndepFun_eval_streamMeasure'`🔗

`iIndepFun_eval_streamMeasure''`🔗

`iIndepFun_eval_streamMeasure`🔗

`indepFun_eval_streamMeasure`🔗

`indepFun_eval_streamMeasure'`🔗

`probSpace`🔗

`instMeasurableSpaceProbSpace`🔗

`instStandardBorelSpaceProbSpaceOfCountable`🔗

`arrayMeasure`🔗

`instIsProbabilityMeasureProbSpaceArrayMeasureOfIsMarkovKernel`🔗

`initAlgFunction`🔗

`initAlgFunction_map`🔗

`measurable_initAlgFunction`🔗

`algFunction`🔗

`algFunction_map`🔗

`measurable_algFunction`🔗

`hist`🔗

`hist_zero`🔗

`hist_add_one`🔗

`hist_eq`🔗

`hist_add_one_eq_IicSuccProd'`🔗

`action`🔗

`action_zero`🔗

`action_add_one_eq`🔗

`reward`🔗

`reward_zero`🔗

`reward_add_one`🔗

`reward_eq`🔗

`sumRewards_eq`🔗

`measurable_action_add_one'`🔗

`measurable_pullCount'_action_add_one`🔗

`measurable_hist`🔗

`measurable_action`🔗

`measurable_reward`🔗

`hist_add_one_eq_IicSuccProd`🔗

`measurable_pullCount_action_add_one`🔗

`hist_congr`🔗

`stepsUntil_congr_aux`🔗

`stepsUntil_congr`🔗

`stepsUntil_indicator_congr`🔗

`measurable_hist_comap`🔗

`truePast`🔗

`truePast_eq_of_pullCount_eq`🔗

`truePast_eq_of_pullCount_eq_of_ne_zero`🔗

`measurable_hist_truePast`🔗

`measurable_action_add_one_truePast`🔗

`measurable_pullCount_add_one_truePast`🔗

`measurable_stepsUntil`🔗

`measurable_pullCount_action_add_one_hist`🔗

`map_snd_apply_arrayMeasure`🔗

`indepFun_fst_snd`🔗

`indepFun_fst_zero_snd_zero_action`🔗

`indepFun_fst_add_one_aux`🔗

`indepFun_fst_add_one_hist`🔗

`indepFun_snd_apply_aux`🔗

`indepFun_snd_apply_pullCount_action`🔗

`indepFun_cond_comp`🔗

`indepFun_snd_hist_cond`🔗

`hasLaw_action_zero`🔗

`hasCondDistrib_reward_zero`🔗

`hasCondDistrib_action'`🔗

`hasCondDistrib_reward_pullCount_action`🔗

`reward_ae_eq_cond`🔗

`hasCondDistrib_reward_hist_action_pullCount`🔗

`condIndepFun_reward_hist`🔗

`hasCondDistrib_reward'`🔗

`hasCondDistrib_action`🔗

`hasCondDistrib_reward`🔗

`isAlgEnvSeq_arrayMeasure`🔗