LeanMachineLearning exposition

2.2. Online.Bandit.SumRewards🔗

Law of the sum of rewards

Module LeanMachineLearning.Online.Bandit.SumRewards contains 32 exposed declarations.

identDistrib_sum_range_snd🔗

LemmaBandits.ArrayModel.identDistrib_sum_range_snd

No docstring.

🔗theorem
Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [Countable 𝓐] {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (k : ) : ProbabilityTheory.IdentDistrib (fun ω => i Finset.range k, Prod.snd ω i a) (fun ω => i Finset.range k, ω i a) (arrayMeasure ν) (streamMeasure ν)
Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [Countable 𝓐] {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (k : ) : ProbabilityTheory.IdentDistrib (fun ω => i Finset.range k, Prod.snd ω i a) (fun ω => i Finset.range k, ω i a) (arrayMeasure ν) (streamMeasure ν)

Code

lemma identDistrib_sum_range_snd (a : 𝓐) (k : ℕ) :
    IdentDistrib (fun ω ↦ ∑ i ∈ range k, ω.2 i a) (fun ω ↦ ∑ i ∈ range k, ω i a)
      𝔓 (streamMeasure ν) where
  aemeasurable_fst
Type uses (4)
Body uses (1)
Used by (2)

Actions: Source · Open Issue

Proof
by fun_prop
  aemeasurable_snd := (measurable_sum _ fun i _ ↦ by fun_prop).aemeasurable
  map_eq := by
    rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
      (ν := streamMeasure ν), Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
    rfl

prob_pullCount_prod_sumRewards_mem_le🔗

LemmaBandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ) {s : Set ( × )} [DecidablePred fun x => x Prod.fst '' s] (hs : MeasurableSet s) : (arrayMeasure ν) {ω | (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω) s} k Finset.range (n + 1) with k Prod.fst '' s, (streamMeasure ν) {ω | i Finset.range k, ω i a Prod.mk k ⁻¹' s}
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ) {s : Set ( × )} [DecidablePred fun x => x Prod.fst '' s] (hs : MeasurableSet s) : (arrayMeasure ν) {ω | (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω) s} k Finset.range (n + 1) with k Prod.fst '' s, (streamMeasure ν) {ω | i Finset.range k, ω i a Prod.mk k ⁻¹' s}

Code

lemma prob_pullCount_prod_sumRewards_mem_le (a : 𝓐) (n : ℕ)
    {s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
    𝔓 {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}
Type uses (9)
Body uses (3)
Used by (2)

Actions: Source · Open Issue

Proof
by
  simp_rw [sumRewards_eq]
  calc 𝔓 ((fun ω ↦ (pullCount A a n ω, ∑ i ∈ range (pullCount A a n ω), ω.2 i a)) ⁻¹' s)
  _ ≤ 𝔓 {ω | ∃ k ≤ n, (k, ∑ i ∈ range k, ω.2 i a) ∈ s} := by
    refine measure_mono fun ω hω ↦ ?_
    simp only [Set.mem_setOf_eq] at hω ⊢
    exact ⟨pullCount A a n ω, pullCount_le _ _ _, hω⟩
  _ = 𝔓 (⋃ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      {ω | (k, ∑ i ∈ range k, ω.2 i a) ∈ s}) := by congr 1; ext; simp; grind
  _ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      𝔓 {ω | ∑ i ∈ range k, ω.2 i a ∈ Prod.mk k ⁻¹' s} := measure_biUnion_finset_le _ _
  _ = ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    congr with k
    have : (𝔓).map (fun ω ↦ ∑ i ∈ range k, ω.2 i a) =
        (streamMeasure ν).map (fun ω ↦ ∑ i ∈ range k, ω i a) :=
      (identDistrib_sum_range_snd a k).map_eq
    rw [Measure.ext_iff] at this
    specialize this (Prod.mk k ⁻¹' s) (hs.preimage (by fun_prop))
    rwa [Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop)),
      Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop))] at this

prob_pullCount_mem_and_sumRewards_mem_le🔗

LemmaBandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ) {s : Set } [DecidablePred fun x => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) a n ω s Learning.sumRewards (action alg) (reward alg) a n ω B} k Finset.range (n + 1) with k s, (streamMeasure ν) {ω | i Finset.range k, ω i a B}
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ) {s : Set } [DecidablePred fun x => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) a n ω s Learning.sumRewards (action alg) (reward alg) a n ω B} k Finset.range (n + 1) with k s, (streamMeasure ν) {ω | i Finset.range k, ω i a B}

Code

lemma prob_pullCount_mem_and_sumRewards_mem_le (a : 𝓐) (n : ℕ)
    {s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
    𝔓 {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}
Type uses (9)
Body uses (1)

Actions: Source · Open Issue

Proof
by
  classical
  rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
  · simp [h_empty]
  convert prob_pullCount_prod_sumRewards_mem_le a n (hs.prod hB) (ν := ν) (alg := alg)
    with _ _ _ k hk
  · rfl
  · ext n
    have : ∃ x, x ∈ B := h_nonempty
    simp [this]
  · ext x
    simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
      exists_eq_right, mem_filter, mem_range] at hk
    simp [hk.2.1]

prob_exists_pullCount_eq_and_sumRewards_mem_le🔗

LemmaBandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ) {B : Set } (hB : MeasurableSet B) : (arrayMeasure ν) {ω | n, Learning.pullCount (action alg) a n ω = m Learning.sumRewards (action alg) (reward alg) a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ) {B : Set } (hB : MeasurableSet B) : (arrayMeasure ν) {ω | n, Learning.pullCount (action alg) a n ω = m Learning.sumRewards (action alg) (reward alg) a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}

Code

lemma prob_exists_pullCount_eq_and_sumRewards_mem_le (a : 𝓐) (m : ℕ) {B : Set ℝ}
    (hB : MeasurableSet B) : 𝔓 {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}
Type uses (9)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
calc
    _ ≤ 𝔓 {ω | ∑ i ∈ range m, ω.2 i a ∈ B} := by
        apply measure_mono
        intro ω ⟨n, hp, hn⟩
        rwa [sumRewards_eq alg a n ω, hp] at hn
    _ = streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B} :=
        (identDistrib_sum_range_snd a m).measure_mem_eq hB

prob_sumRewards_le_sumRewards_le🔗

LemmaBandits.ArrayModel.prob_sumRewards_le_sumRewards_le

No docstring.

🔗theorem
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}

Code

lemma prob_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
    (𝔓) {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      streamMeasure ν
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}
Type uses (10)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  simp_rw [sumRewards_eq]
  calc 𝔓 {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
      ∑ i ∈ range (pullCount A (bestArm ν) n ω), ω.2 i (bestArm ν) ≤
        ∑ i ∈ range (pullCount A a n ω), ω.2 i a}
  _ ≤ 𝔓 ((fun ω ↦ (∑ i ∈ range m₁, ω.2 i (bestArm ν), ∑ i ∈ range m₂, ω.2 i a)) ⁻¹'
        {p | p.1 ≤ p.2}) := by
      refine measure_mono fun ω hω ↦ ?_
      simp only [Set.preimage_setOf_eq, Set.mem_setOf_eq] at hω ⊢
      grind
  _ = streamMeasure ν
      {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a} := by
    rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
      (ν := streamMeasure ν), Measure.snd, Measure.map_apply (by fun_prop)]
    · rfl
    simp only [measurableSet_setOf]
    fun_prop

probReal_sumRewards_le_sumRewards_le🔗

LemmaBandits.ArrayModel.probReal_sumRewards_le_sumRewards_le

No docstring.

🔗theorem
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ) : MeasureTheory.Measure.real (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} MeasureTheory.Measure.real (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ) : MeasureTheory.Measure.real (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ Learning.pullCount (action alg) a n ω = m₂ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω Learning.sumRewards (action alg) (reward alg) a n ω} MeasureTheory.Measure.real (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}

Code

lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
    (𝔓).real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      (streamMeasure ν).real
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}
Type uses (10)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  simp_rw [measureReal_def]
  gcongr
  · finiteness
  · exact prob_sumRewards_le_sumRewards_le a n m₁ m₂

sumRewards_eq_comp🔗

LemmaBandits.sumRewards_eq_comp

No docstring.

🔗theorem
Bandits.sumRewards_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} : Learning.sumRewards A R a n = (fun p => i Finset.range n, if Prod.fst (p i) = a then Prod.snd (p i) else 0) Learning.trajectory A R
Bandits.sumRewards_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} : Learning.sumRewards A R a n = (fun p => i Finset.range n, if Prod.fst (p i) = a then Prod.snd (p i) else 0) Learning.trajectory A R

Code

lemma sumRewards_eq_comp :
    sumRewards A R a n =
     (fun p ↦ ∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0) ∘ (trajectory A R)
Type uses (2)
Used by (2)

Actions: Source · Open Issue

Proof
by
  ext
  simp [sumRewards, trajectory]

pullCount_eq_comp🔗

LemmaBandits.pullCount_eq_comp

No docstring.

🔗theorem
Bandits.pullCount_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} : Learning.pullCount A a n = (fun p => i Finset.range n, if Prod.fst (p i) = a then 1 else 0) Learning.trajectory A R
Bandits.pullCount_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} : Learning.pullCount A a n = (fun p => i Finset.range n, if Prod.fst (p i) = a then 1 else 0) Learning.trajectory A R

Code

lemma pullCount_eq_comp :
    pullCount A a n =
      (fun p ↦ ∑ i ∈ range n, if (p i).1 = a then 1 else 0) ∘ (trajectory A R)
Type uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  ext
  simp [pullCount, trajectory]

law_sumRewards_unique🔗

LemmaLearning.IsAlgEnvSeq.law_sumRewards_unique

No docstring.

🔗theorem
Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'
Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_sumRewards_unique [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    P.map (sumRewards A R a n) = P'.map (sumRewards A₂ R₂ a n)
Type uses (4)
Body uses (5)

Actions: Source · Open Issue

Proof
by
  have hA := h1.measurable_action
  have hR := h1.measurable_feedback
  have hA2 := h2.measurable_action
  have hR2 := h2.measurable_feedback
  have h_unique := isAlgEnvSeq_unique h1 h2
  rw [sumRewards_eq_comp, sumRewards_eq_comp, ← Measure.map_map, h_unique, Measure.map_map,
    ← sumRewards_eq_comp]
  · refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
    exact (measurableSet_singleton _).preimage (by fun_prop)
  · fun_prop
  · refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
    exact (measurableSet_singleton _).preimage (by fun_prop)
  · fun_prop

law_pullCount_sumRewards_unique'🔗

LemmaLearning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'

No docstring.

🔗theorem
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω a => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω a => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    IdentDistrib (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      (fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
Type uses (5)
Body uses (7)
Used by (2)

Actions: Source · Open Issue

Proof
by
  have hA := h1.measurable_action
  have hR := h1.measurable_feedback
  have hA2 := h2.measurable_action
  have hR2 := h2.measurable_feedback
  constructor
  · refine Measurable.aemeasurable ?_
    rw [measurable_pi_iff]
    exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA hR _ _)
  · refine Measurable.aemeasurable ?_
    rw [measurable_pi_iff]
    exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA2 hR2 _ _)
  have h_unique := isAlgEnvSeq_unique h1 h2
  let f := fun (p : ℕ → 𝓐 × ℝ ) (a : 𝓐) ↦ (∑ i ∈ range n, if (p i).1 = a then 1 else 0,
    ∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0)
  have hf : Measurable f := by
    rw [measurable_pi_iff]
    intro a
    refine Measurable.prod ?_ ?_
    · simp only [f]
      refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
      exact (measurableSet_singleton _).preimage (by fun_prop)
    · simp only [f]
      refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
      exact (measurableSet_singleton _).preimage (by fun_prop)
  have h_eq_comp : (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      = f ∘ (trajectory A R) := by
    ext ω a : 2
    rw [pullCount_eq_comp (R := R), sumRewards_eq_comp]
    grind
  have h_eq_comp2 : (fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))
      = f ∘ (trajectory A₂ R₂) := by
    ext ω a : 2
    rw [pullCount_eq_comp (R := R₂), sumRewards_eq_comp]
    grind
  rw [h_eq_comp, h_eq_comp2, ← Measure.map_map hf, h_unique, Measure.map_map hf,
    ← h_eq_comp2]
  · rw [measurable_pi_iff]
    exact fun n ↦ Measurable.prodMk (hA2 n) (hR2 n)
  · rw [measurable_pi_iff]
    exact fun n ↦ Measurable.prodMk (hA n) (hR n)

law_pullCount_sumRewards_unique🔗

LemmaLearning.IsAlgEnvSeq.law_pullCount_sumRewards_unique

No docstring.

🔗theorem
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (fun ω => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun ω => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } {n : } {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (fun ω => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun ω => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
      P'.map (fun ω ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))
Type uses (5)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
((h1.law_pullCount_sumRewards_unique' h2 (n := n)).comp (u := fun f ↦ f a) (by fun_prop)).map_eq

identDistrib_pullCount_sumRewards🔗

LemmaLearning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards

No docstring.

🔗theorem
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω n a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω' n a => (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {A₂ : Ω' 𝓐} {R₂ : Ω' } [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω n a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω' n a => (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'

Code

lemma _root_.Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    IdentDistrib (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      (fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'
Type uses (5)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  let f (τ : ℕ → 𝓐 × ℝ) (n : ℕ) (a : 𝓐) : ℕ × ℝ :=
    (∑ i ∈ range n, if (τ i).1 = a then 1 else 0,
     ∑ i ∈ range n, if (τ i).1 = a then (τ i).2 else 0)
  have hc1 : (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
      f ∘ (trajectory A R) := by
    ext ω n a : 3
    simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
    rfl
  have hc2 : (fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) =
      f ∘ (trajectory A₂ R₂) := by
    ext ω' n a : 3
    simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
    rfl
  have hf : Measurable f := by
    simp_rw [f, measurable_pi_iff]
    intro n a
    apply Measurable.prod
    · dsimp only
      exact measurable_sum _
        (fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
    · dsimp only
      exact measurable_sum _
        (fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
  rw [hc1, hc2]
  exact (h1.identDistrib_trajectory h2).comp hf

prob_pullCount_prod_sumRewards_mem_le🔗

LemmaBandits.prob_pullCount_prod_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ( × )} [DecidablePred fun x => x Prod.fst '' s] (hs : MeasurableSet s) : P {ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) s} k Finset.range (n + 1) with k Prod.fst '' s, (streamMeasure ν) {ω | i Finset.range k, ω i a Prod.mk k ⁻¹' s}
Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ( × )} [DecidablePred fun x => x Prod.fst '' s] (hs : MeasurableSet s) : P {ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) s} k Finset.range (n + 1) with k Prod.fst '' s, (streamMeasure ν) {ω | i Finset.range k, ω i a Prod.mk k ⁻¹' s}

Code

lemma prob_pullCount_prod_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
    P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}
Type uses (6)
Body uses (14)
Used by (3)

Actions: Source · Open Issue

Proof
by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  calc P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s}
  _ = (P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω))) s := by
      rw [Measure.map_apply (by fun_prop) hs]; rfl
  _ = ((ArrayModel.arrayMeasure ν).map
      (fun ω ↦ (pullCount (ArrayModel.action alg) a n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω))) s := by
    rw [h.law_pullCount_sumRewards_unique (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)]
  _ = (ArrayModel.arrayMeasure ν) {ω | (pullCount (ArrayModel.action alg) a n ω,
      sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω) ∈ s} := by
    rw [Measure.map_apply (by fun_prop) hs]; rfl
  _ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    ArrayModel.prob_pullCount_prod_sumRewards_mem_le a n hs

prob_pullCount_mem_and_sumRewards_mem_le🔗

LemmaBandits.prob_pullCount_mem_and_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set } [DecidablePred fun x => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω s Learning.sumRewards A R a n ω B} k Finset.range (n + 1) with k s, (streamMeasure ν) {ω | i Finset.range k, ω i a B}
Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set } [DecidablePred fun x => x s] (hs : MeasurableSet s) {B : Set } (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω s Learning.sumRewards A R a n ω B} k Finset.range (n + 1) with k s, (streamMeasure ν) {ω | i Finset.range k, ω i a B}

Code

lemma prob_pullCount_mem_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
    P {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}
Type uses (6)
Body uses (1)
Used by (2)

Actions: Source · Open Issue

Proof
by
  classical
  rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
  · simp [h_empty]
  convert prob_pullCount_prod_sumRewards_mem_le h (hs.prod hB) (ν := ν) (alg := alg) with _ _ _ k hk
  · rfl
  · ext n
    have : ∃ x, x ∈ B := h_nonempty
    simp [this]
  · ext x
    simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
      exists_eq_right, mem_filter, mem_range] at hk
    simp [hk.2.1]

prob_sumRewards_mem_le🔗

LemmaBandits.prob_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.prob_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set } (hB : MeasurableSet B) : P (Learning.sumRewards A R a n ⁻¹' B) k Finset.range (n + 1), (streamMeasure ν) {ω | i Finset.range k, ω i a B}
Bandits.prob_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set } (hB : MeasurableSet B) : P (Learning.sumRewards A R a n ⁻¹' B) k Finset.range (n + 1), (streamMeasure ν) {ω | i Finset.range k, ω i a B}

Code

lemma prob_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {B : Set ℝ} (hB : MeasurableSet B) :
    P (sumRewards A R a n ⁻¹' B) ≤
      ∑ k ∈ range (n + 1), streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}
Type uses (5)
Body uses (2)

Actions: Source · Open Issue

Proof
by
  classical
  have h_le := prob_pullCount_mem_and_sumRewards_mem_le h .univ hB (a := a) (n := n)
  simp only [Set.mem_univ, true_and, filter_true] at h_le
  convert h_le
  rfl

prob_pullCount_eq_and_sumRewards_mem_le🔗

LemmaBandits.prob_pullCount_eq_and_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : } (hm : m n) {B : Set } (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω = m Learning.sumRewards A R a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}
Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : } (hm : m n) {B : Set } (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω = m Learning.sumRewards A R a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}

Code

lemma prob_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) :
    P {ω | pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}
Type uses (6)
Body uses (1)

Actions: Source · Open Issue

Proof
by
  have h_le := prob_pullCount_mem_and_sumRewards_mem_le h (s := {m}) (by simp) hB (a := a) (n := n)
  have hm' : m < n + 1 := by lia
  simpa [hm'] using h_le

prob_exists_pullCount_eq_and_sumRewards_mem_le🔗

LemmaBandits.prob_exists_pullCount_eq_and_sumRewards_mem_le

No docstring.

🔗theorem
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (m : ) {B : Set } (hB : MeasurableSet B) : P {ω | n, Learning.pullCount A a n ω = m Learning.sumRewards A R a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (m : ) {B : Set } (hB : MeasurableSet B) : P {ω | n, Learning.pullCount A a n ω = m Learning.sumRewards A R a n ω B} (streamMeasure ν) {ω | i Finset.range m, ω i a B}

Code

lemma prob_exists_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m : ℕ) {B : Set ℝ}
    (hB : MeasurableSet B) :
    P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}
Type uses (6)
Body uses (9)
Used by (2)

Actions: Source · Open Issue

Proof
let s := {p : ℕ → 𝓐 → ℕ × ℝ | ∃ n, (p n a).1 = m ∧ (p n a).2 ∈ B}
  have : s = ⋃ n, (fun p ↦ p n a) ⁻¹' ({m} ×ˢ B) := by
    ext p
    simp [s]
  have hs : MeasurableSet s := by measurability
  calc P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B}
    _ = (ArrayModel.arrayMeasure ν) {ω | ∃ n, pullCount (ArrayModel.action alg) a n ω = m ∧
            sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω ∈ B} :=
        (h.identDistrib_pullCount_sumRewards
          (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)).measure_mem_eq hs
    _ ≤ _ := ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le a m hB

probReal_sumRewards_le_sumRewards_le🔗

LemmaBandits.probReal_sumRewards_le_sumRewards_le

No docstring.

🔗theorem
Bandits.probReal_sumRewards_le_sumRewards_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (n m₁ m₂ : ) : MeasureTheory.Measure.real P {ω | Learning.pullCount A (bestArm ν) n ω = m₁ Learning.pullCount A a n ω = m₂ Learning.sumRewards A R (bestArm ν) n ω Learning.sumRewards A R a n ω} MeasureTheory.Measure.real (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}
Bandits.probReal_sumRewards_le_sumRewards_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (n m₁ m₂ : ) : MeasureTheory.Measure.real P {ω | Learning.pullCount A (bestArm ν) n ω = m₁ Learning.pullCount A a n ω = m₂ Learning.sumRewards A R (bestArm ν) n ω Learning.sumRewards A R a n ω} MeasureTheory.Measure.real (streamMeasure ν) {ω | i Finset.range m₁, ω i (bestArm ν) i Finset.range m₂, ω i a}

Code

lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (a : 𝓐) (n m₁ m₂ : ℕ) :
    P.real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      (streamMeasure ν).real
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}
Type uses (7)
Body uses (14)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  refine le_trans (le_of_eq ?_)
    (ArrayModel.probReal_sumRewards_le_sumRewards_le (alg := alg) a n m₁ m₂)
  let s := {p : ℕ × ℕ × ℝ × ℝ | p.1 = m₁ ∧ p.2.1 = m₂ ∧ p.2.2.1 ≤ p.2.2.2}
  have hs : MeasurableSet s := by simp only [measurableSet_setOf, s]; fun_prop
  change P.real ((fun ω ↦ (pullCount A (bestArm ν) n ω,
      pullCount A a n ω, sumRewards A R (bestArm ν) n ω, sumRewards A R a n ω)) ⁻¹' s) =
    (ArrayModel.arrayMeasure ν).real
      ((fun ω ↦ (pullCount (ArrayModel.action alg) (bestArm ν) n ω,
        pullCount (ArrayModel.action alg) a n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) (bestArm ν) n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω)) ⁻¹' s)
  simp_rw [measureReal_def]
  congr 1
  rw [← Measure.map_apply ?_ hs, ← Measure.map_apply (by fun_prop) hs]
  swap
  · refine Measurable.prod (by fun_prop) (Measurable.prod (by fun_prop) ?_)
    exact (measurable_sumRewards hA hR _ _).prod (measurable_sumRewards hA hR _ _)
  congr 1
  refine IdentDistrib.map_eq ?_
  have h_eq := h.law_pullCount_sumRewards_unique' (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)
    (n := n)
  exact h_eq.comp (u := fun p ↦ ((p (bestArm ν)).1, (p a).1, (p (bestArm ν)).2, (p a).2))
    (by fun_prop)

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF

No docstring.

🔗theorem
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {ε : } ( : 0 ε) (n : ) : (streamMeasure ν) {ω | ε k Finset.range n, (ω k a - (x : ), id x ν a)} ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {ε : } ( : 0 ε) (n : ) : (streamMeasure ν) {ω | ε k Finset.range n, (ω k a - (x : ), id x ν a)} ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))

Code

lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
    streamMeasure ν {ω | ε ≤ ∑ k ∈ range n, (ω k a - (ν a)[id])} ≤
      ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))
Type uses (1)
Body uses (3)
Used by (1)

Actions: Source · Open Issue

Proof
by
  rw [← ofReal_measureReal]
  gcongr
  apply HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun _ _ hε
  · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
  · intro _ _
    exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)

prob_sum_range_sub_le_le_of_HasSubgaussianMGF🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF

No docstring.

🔗theorem
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {ε : } ( : 0 ε) (n : ) : (streamMeasure ν) {ω | k Finset.range n, (ω k a - (x : ), id x ν a) -ε} ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {ε : } ( : 0 ε) (n : ) : (streamMeasure ν) {ω | k Finset.range n, (ω k a - (x : ), id x ν a) -ε} ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))

Code

lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
    streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤ -ε} ≤
      ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))
Type uses (1)
Body uses (4)
Used by (1)

Actions: Source · Open Issue

Proof
by
  rw [← ofReal_measureReal]
  gcongr
  apply HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun _ _ hε
  · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
  · intro _ _
    exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'

No docstring.

🔗theorem
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {δ : } ( : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | (2 * n * σ2 * Real.log (1 / δ)) k Finset.range n, (ω k a - (x : ), id x ν a)} ENNReal.ofReal δ
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {δ : } ( : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | (2 * n * σ2 * Real.log (1 / δ)) k Finset.range n, (ω k a - (x : ), id x ν a)} ENNReal.ofReal δ

Code

lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
    streamMeasure ν {ω | √(2 * n * σ2 * Real.log (1 / δ)) ≤
      ∑ k ∈ range n, (ω k a - (ν a)[id])} ≤ ENNReal.ofReal δ
Type uses (1)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
calc
  _ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
    prob_sum_range_sub_ge_le_of_HasSubgaussianMGF h (by positivity) n
  _ ≤ ENNReal.ofReal δ := by
    gcongr
    exact exp_neg_sqrt_sq_div_le hσ2 hδ hn

prob_sum_range_sub_le_le_of_HasSubgaussianMGF'🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'

No docstring.

🔗theorem
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {δ : } ( : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | k Finset.range n, (ω k a - (x : ), id x ν a) -(2 * n * σ2 * Real.log (1 / δ))} ENNReal.ofReal δ
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) {δ : } ( : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | k Finset.range n, (ω k a - (x : ), id x ν a) -(2 * n * σ2 * Real.log (1 / δ))} ENNReal.ofReal δ

Code

lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
    streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤
      -√(2 * n * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal δ
Type uses (1)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
calc
  _ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
    prob_sum_range_sub_le_le_of_HasSubgaussianMGF h (by positivity) n
  _ ≤ ENNReal.ofReal δ := by
    gcongr
    exact exp_neg_sqrt_sq_div_le hσ2 hδ hn

prob_sumRewards_sub_pullCount_mul_ge_le🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_ge_le

No docstring.

🔗theorem
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | t < n, Learning.pullCount A a t ω 0 (2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ)) Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a} ENNReal.ofReal ((n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | t < n, Learning.pullCount A a t ω 0 (2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ)) Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a} ENNReal.ofReal ((n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_ge_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧ √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
      sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤ ENNReal.ofReal ((n - 1) * δ)
Type uses (5)
Body uses (4)
Used by (1)

Actions: Source · Open Issue

Proof
let B (m : ℕ) := {x : ℝ | √(2 * m * σ2 * Real.log (1 / δ)) ≤ x - m * (ν a)[id]}
  calc
    _ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
            sumRewards A R a t ω ∈ B m}) := by
        apply measure_mono
        intro ω ⟨t, ht, hp, hb⟩
        have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
          (pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
        exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
          sumRewards A R a t ω ∈ B m} :=
        measure_biUnion_finset_le _ _
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
        sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
        apply sum_le_sum
        exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
      apply sum_le_sum
      intro m hm
      exact le_of_eq_of_le (by simp [B])
        (StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      by_cases hn : n = 0
      · simp [hn, hδ.le]
      · rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
          Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
        ring_nf

prob_sumRewards_sub_pullCount_mul_le_le🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_le_le

No docstring.

🔗theorem
Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | t < n, Learning.pullCount A a t ω 0 Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a -(2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ))} ENNReal.ofReal ((n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | t < n, Learning.pullCount A a t ω 0 Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a -(2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ))} ENNReal.ofReal ((n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_le_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
      sumRewards A R a t ω - pullCount A a t ω * (ν a)[id] ≤
        -√(2 * pullCount A a t ω * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal ((n - 1) * δ)
Type uses (5)
Body uses (4)
Used by (1)

Actions: Source · Open Issue

Proof
let B (m : ℕ) := {x : ℝ | x - m * (ν a)[id] ≤ -√(2 * m * σ2 * Real.log (1 / δ))}
  calc
    _ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
            sumRewards A R a t ω ∈ B m}) := by
        apply measure_mono
        intro ω ⟨t, ht, hp, hb⟩
        have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
          (pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
        exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
          sumRewards A R a t ω ∈ B m} :=
        measure_biUnion_finset_le _ _
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
        sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
        apply sum_le_sum
        exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
      apply sum_le_sum
      intro m hm
      exact le_of_eq_of_le (by simp [B])
        (StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      by_cases hn : n = 0
      · simp [hn, hδ.le]
      · rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
          Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
        ring_nf

prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype

No docstring.

🔗theorem
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | a, t < n, Learning.pullCount A a t ω 0 (2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ)) Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a} ENNReal.ofReal ((Fintype.card 𝓐) * (n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 } {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {A : Ω 𝓐} {R : Ω } {n : } [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : } ( : 0 < δ) : P {ω | a, t < n, Learning.pullCount A a t ω 0 (2 * (Learning.pullCount A a t ω) * σ2 * Real.log (1 / δ)) Learning.sumRewards A R a t ω - (Learning.pullCount A a t ω) * (x : ), id x ν a} ENNReal.ofReal ((Fintype.card 𝓐) * (n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ a, ∃ t < n, pullCount A a t ω ≠ 0 ∧
        √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
          sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤
            ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ)
Type uses (5)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
calc
    _ ≤ ∑ a, P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
                √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
                  sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} := by
        rw [Set.setOf_exists]
        exact measure_iUnion_fintype_le _ _
    _ ≤ ∑ a, ENNReal.ofReal ((n - 1) * δ) :=
        sum_le_sum fun a _ ↦ prob_sumRewards_sub_pullCount_mul_ge_le hσ2 (hν a) h hδ
    _ = ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ) := by
      rw [sum_const, Finset.card_univ, ← ENNReal.ofReal_nsmul, nsmul_eq_mul]
      ring_nf

probReal_sum_le_sum_streamMeasure🔗

LemmaBandits.probReal_sum_le_sum_streamMeasure

No docstring.

🔗theorem
Bandits.probReal_sum_le_sum_streamMeasure.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Nonempty 𝓐] [Fintype 𝓐] {c : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) c (ν a)) (a : 𝓐) (m : ) : MeasureTheory.Measure.real (streamMeasure ν) {ω | s Finset.range m, ω s (bestArm ν) s Finset.range m, ω s a} Real.exp (-m * gap ν a ^ 2 / (4 * c))
Bandits.probReal_sum_le_sum_streamMeasure.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] [Nonempty 𝓐] [Fintype 𝓐] {c : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) c (ν a)) (a : 𝓐) (m : ) : MeasureTheory.Measure.real (streamMeasure ν) {ω | s Finset.range m, ω s (bestArm ν) s Finset.range m, ω s a} Real.exp (-m * gap ν a ^ 2 / (4 * c))

Code

lemma probReal_sum_le_sum_streamMeasure [Fintype 𝓐] {c : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) c (ν a)) (a : 𝓐) (m : ℕ) :
    (streamMeasure ν).real
        {ω | ∑ s ∈ range m, ω s (bestArm ν) ≤ ∑ s ∈ range m, ω s a} ≤
      Real.exp (-↑m * gap ν a ^ 2 / (4 * c))
Type uses (3)
Body uses (9)
Used by (1)

Actions: Source · Open Issue

Proof
by
  by_cases ha : a = bestArm ν
  · simp [ha]
  refine (HasSubgaussianMGF.measure_sum_le_sum_le' (cX := fun _ ↦ c) (cY := fun _ ↦ c)
    ?_ ?_ ?_ ?_ ?_ ?_).trans_eq ?_
  · exact iIndepFun_eval_streamMeasure'' ν (bestArm ν)
  · exact iIndepFun_eval_streamMeasure'' ν a
  · intro i him
    simp_rw [integral_eval_streamMeasure]
    refine (hν (bestArm ν)).congr_identDistrib ?_
    exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  · intro i him
    simp_rw [integral_eval_streamMeasure]
    refine (hν a).congr_identDistrib ?_
    exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  · exact indepFun_eval_streamMeasure' ν (Ne.symm ha)
  · gcongr 1 with i him
    simp_rw [integral_eval_streamMeasure]
    exact le_bestArm a
  · congr 1
    simp_rw [integral_eval_streamMeasure]
    simp only [id_eq, sum_const, card_range, nsmul_eq_mul, NNReal.coe_mul, NNReal.coe_natCast,
      gap_eq_bestArm_sub, neg_mul]
    field_simp
    ring

prob_sum_le_sqrt_log🔗

LemmaBandits.prob_sum_le_sqrt_log

No docstring.

🔗theorem
Bandits.prob_sum_le_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {σ2 : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) {c : } (hc : 0 c) (a : 𝓐) (k : ) (hk : k 0) : (streamMeasure ν) {ω | s Finset.range k, (ω s a - (x : ), id x ν a) -(2 * c * k * σ2 * Real.log (n + 1))} 1 / (n + 1) ^ c
Bandits.prob_sum_le_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {σ2 : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) {c : } (hc : 0 c) (a : 𝓐) (k : ) (hk : k 0) : (streamMeasure ν) {ω | s Finset.range k, (ω s a - (x : ), id x ν a) -(2 * c * k * σ2 * Real.log (n + 1))} 1 / (n + 1) ^ c

Code

lemma prob_sum_le_sqrt_log {σ2 : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))} ≤
      1 / (n + 1) ^ c
Type uses (1)
Body uses (4)
Used by (1)

Actions: Source · Open Issue

Proof
by
  calc
    streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))}
  _ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
    rw [← ofReal_measureReal]
    gcongr
    refine (HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
    · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
        (fun _ ↦ by fun_prop)
    · intro i him
      refine (hν a).congr_identDistrib ?_
      exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  _ = 1 / (n + 1) ^ c := by
    rw [Real.sq_sqrt]
    swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
    field_simp
    rw [← Real.log_rpow (by positivity), ← Real.log_inv,
      Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
      ← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
    norm_cast

prob_sum_ge_sqrt_log🔗

LemmaBandits.prob_sum_ge_sqrt_log

No docstring.

🔗theorem
Bandits.prob_sum_ge_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {σ2 : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) {c : } (hc : 0 c) (a : 𝓐) (k : ) (hk : k 0) : (streamMeasure ν) {ω | (2 * c * k * σ2 * Real.log (n + 1)) s Finset.range k, (ω s a - (x : ), id x ν a)} 1 / (n + 1) ^ c
Bandits.prob_sum_ge_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {n : } {σ2 : NNReal} ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) {c : } (hc : 0 c) (a : 𝓐) (k : ) (hk : k 0) : (streamMeasure ν) {ω | (2 * c * k * σ2 * Real.log (n + 1)) s Finset.range k, (ω s a - (x : ), id x ν a)} 1 / (n + 1) ^ c

Code

lemma prob_sum_ge_sqrt_log {σ2 : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} ≤
      1 / (n + 1) ^ c
Type uses (1)
Body uses (3)
Used by (1)

Actions: Source · Open Issue

Proof
by
  calc
    streamMeasure ν
      {ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))}
  _ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
    rw [← ofReal_measureReal]
    gcongr
    refine (HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
    · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
        (fun _ ↦ by fun_prop)
    · intro i him
      refine (hν a).congr_identDistrib ?_
      exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  _ = 1 / (n + 1) ^ c := by
    rw [Real.sq_sqrt]
    swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
    field_simp
    rw [← Real.log_rpow (by positivity), ← Real.log_inv,
      Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
      ← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
    norm_cast

prob_avg_add_sqrt_log_le🔗

LemmaBandits.prob_avg_add_sqrt_log_le

No docstring.

🔗theorem
Bandits.prob_avg_add_sqrt_log_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : } ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : 𝓐) (n k : ) (hk : k 0) : (streamMeasure ν) {ω | (∑ m Finset.range k, ω m a) / k + (2 * c * σ2 * Real.log (n + 1) / k) (x : ), id x ν a} 1 / (n + 1) ^ c
Bandits.prob_avg_add_sqrt_log_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : } ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : 𝓐) (n k : ) (hk : k 0) : (streamMeasure ν) {ω | (∑ m Finset.range k, ω m a) / k + (2 * c * σ2 * Real.log (n + 1) / k) (x : ), id x ν a} 1 / (n + 1) ^ c

Code

lemma prob_avg_add_sqrt_log_le {σ2 : ℝ≥0} {c : ℝ}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
    (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ c
Type uses (1)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
  calc
    streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]}
  _ = streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) / k ≤ - √(2 * c * σ2 * log (n + 1) / k)} := by
    congr with ω
    field_simp
    rw [Finset.sum_sub_distrib]
    simp
    grind
  _ = streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * log (n + 1))} := by
    congr with ω
    field_simp
    congr! 2
    rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
      mul_assoc (k : ℝ), mul_assoc (k : ℝ), mul_assoc (k : ℝ),
      sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
  _ ≤ 1 / (n + 1) ^ c := prob_sum_le_sqrt_log hν hσ2 hc a k hk

prob_avg_sub_sqrt_log_ge🔗

LemmaBandits.prob_avg_sub_sqrt_log_ge

No docstring.

🔗theorem
Bandits.prob_avg_sub_sqrt_log_ge.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : } ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : 𝓐) (n k : ) (hk : k 0) : (streamMeasure ν) {ω | (x : ), id x ν a (∑ m Finset.range k, ω m a) / k - (2 * c * σ2 * Real.log (n + 1) / k)} 1 / (n + 1) ^ c
Bandits.prob_avg_sub_sqrt_log_ge.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 } [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : } ( : (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : 𝓐) (n k : ) (hk : k 0) : (streamMeasure ν) {ω | (x : ), id x ν a (∑ m Finset.range k, ω m a) / k - (2 * c * σ2 * Real.log (n + 1) / k)} 1 / (n + 1) ^ c

Code

lemma prob_avg_sub_sqrt_log_ge {σ2 : ℝ≥0} {c : ℝ}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
    (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 *log (n + 1) / k)} ≤
      1 / (n + 1) ^ c
Type uses (1)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
  calc
    streamMeasure ν {ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 * log (n + 1) / k)}
  _ = streamMeasure ν
      {ω | √(2 * c * σ2 * log (n + 1) / k) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id])) / k} := by
    congr with ω
    field_simp
    rw [Finset.sum_sub_distrib]
    simp
    grind
  _ = streamMeasure ν
      {ω | √(2 * c * k * σ2 * log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} := by
    congr with ω
    field_simp
    congr! 1
    rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
      mul_comm _ (k : ℝ), sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
  _ ≤ 1 / (n + 1) ^ c := prob_sum_ge_sqrt_log hν hσ2 hc a k hk

prob_empMean_sub_actionMean_ge_le🔗

LemmaLearning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le

No docstring.

🔗theorem
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : } [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) } [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) } {E : Ω 𝓔} {A : Ω Fin K} {R : Ω } {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x κ (e, a)) σ2 (κ (e, a))) {δ : } ( : 0 < δ) (n : ) : P {ω | t < n, a, pullCount A a t ω 0 (2 * σ2 * Real.log (1 / δ) / (pullCount A a t ω)) empMean A R a t ω - actionMean κ E a ω} ENNReal.ofReal (K * (n - 1) * δ)
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : } [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) } [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) } {E : Ω 𝓔} {A : Ω Fin K} {R : Ω } {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x κ (e, a)) σ2 (κ (e, a))) {δ : } ( : 0 < δ) (n : ) : P {ω | t < n, a, pullCount A a t ω 0 (2 * σ2 * Real.log (1 / δ) / (pullCount A a t ω)) empMean A R a t ω - actionMean κ E a ω} ENNReal.ofReal (K * (n - 1) * δ)

Code

lemma prob_empMean_sub_actionMean_ge_le (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : ℝ≥0}
    (hσ2 : 0 < σ2) (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
    {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
    P {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧
      √(2 * σ2 * Real.log (1 / δ) / pullCount A a t ω) ≤ empMean A R a t ω - actionMean κ E a ω}
      ≤ ENNReal.ofReal (K * (n - 1) * δ)
Type uses (5)
Body uses (14)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have := h.measurable_param
  have := h.measurable_action
  have := h.measurable_feedback
  let S := {(e, τ) | ∃ a, ∃ t < n, pullCount IT.action a t τ ≠ 0 ∧
    √(2 * pullCount IT.action a t τ * σ2 * Real.log (1 / δ)) ≤
      sumRewards IT.action IT.feedback a t τ - pullCount IT.action a t τ * actionMean κ id a e}
  calc
    _ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
        rw [Measure.map_apply (by fun_prop) (by measurability)]
        apply measure_mono
        intro ω ⟨t, ht, a, hpc, hle⟩
        rw [empMean] at hle
        exact ⟨a, t, ht, hpc, sqrt_two_mul_le_sub hpc hle⟩
    _ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
        rw [← compProd_map_condDistrib (by fun_prop)]
    _ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
        Measure.compProd_apply (by measurability)
    _ ≤ ∫⁻ e, ENNReal.ofReal (Fintype.card (Fin K) * (n - 1) * δ) ∂(P.map E) := by
        apply lintegral_mono_ae
        rw [h.hasLaw_env.map_eq]
        filter_upwards [h.ae_IsAlgEnvSeq] with e he
        exact Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype hσ2 (hs e) he hδ
    _ = ENNReal.ofReal (K * (n - 1) * δ) := by
      simp [Measure.map_apply h.measurable_param]

prob_empMean_bestAction_sub_actionMean_le_le🔗

LemmaLearning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le

No docstring.

🔗theorem
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : } [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) } [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) } {E : Ω 𝓔} {A : Ω Fin K} {R : Ω } {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x κ (e, a)) σ2 (κ (e, a))) {δ : } ( : 0 < δ) (n : ) : P {ω | t < n, pullCount A (bestAction κ E ω) t ω 0 empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω -(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))} ENNReal.ofReal ((n - 1) * δ)
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : } [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) } [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) } {E : Ω 𝓔} {A : Ω Fin K} {R : Ω } {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x κ (e, a)) σ2 (κ (e, a))) {δ : } ( : 0 < δ) (n : ) : P {ω | t < n, pullCount A (bestAction κ E ω) t ω 0 empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω -(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))} ENNReal.ofReal ((n - 1) * δ)

Code

lemma prob_empMean_bestAction_sub_actionMean_le_le (h : IsBayesAlgEnvSeq Q κ alg E A R P)
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
    {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
    P {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧
        empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤
          -√(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))}
      ≤ ENNReal.ofReal ((n - 1) * δ)
Type uses (6)
Body uses (15)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have := h.measurable_param
  have := h.measurable_action
  have := h.measurable_feedback
  let S := {(e, τ) | ∃ t < n, pullCount IT.action (bestAction κ id e) t τ ≠ 0 ∧
    sumRewards IT.action IT.feedback (bestAction κ id e) t τ -
        pullCount IT.action (bestAction κ id e) t τ * actionMean κ id (bestAction κ id e) e ≤
          -√(2 * pullCount IT.action (bestAction κ id e) t τ * σ2 * Real.log (1 / δ))}
  calc
    _ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
        rw [Measure.map_apply (by fun_prop) (by measurability)]
        apply measure_mono
        intro ω ⟨t, ht, hpc, hle⟩
        rw [empMean] at hle
        exact ⟨t, ht, hpc, sub_le_neg_sqrt_two_mul hpc hle⟩
    _ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
        rw [← compProd_map_condDistrib (by fun_prop)]
    _ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
        Measure.compProd_apply (by measurability)
    _ ≤ ∫⁻ e, ENNReal.ofReal ((n - 1) * δ) ∂(P.map E) := by
        apply lintegral_mono_ae
        rw [h.hasLaw_env.map_eq]
        filter_upwards [h.ae_IsAlgEnvSeq] with e he
        exact Bandits.prob_sumRewards_sub_pullCount_mul_le_le (ν := κ.sectR e) hσ2 (hs e _) he
          hδ
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      simp [Measure.map_apply h.measurable_param]
  1. Bandits.ArrayModel.identDistrib_sum_range_snd
  2. Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le
  3. Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le
  4. Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le
  5. Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le
  6. Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le
  7. Bandits.sumRewards_eq_comp
  8. Bandits.pullCount_eq_comp
  9. Learning.IsAlgEnvSeq.law_sumRewards_unique
  10. Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'
  11. Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique
  12. Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards
  13. Bandits.prob_pullCount_prod_sumRewards_mem_le
  14. Bandits.prob_pullCount_mem_and_sumRewards_mem_le
  15. Bandits.prob_sumRewards_mem_le
  16. Bandits.prob_pullCount_eq_and_sumRewards_mem_le
  17. Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le
  18. Bandits.probReal_sumRewards_le_sumRewards_le
  19. Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF
  20. Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF
  21. Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'
  22. Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'
  23. Bandits.prob_sumRewards_sub_pullCount_mul_ge_le
  24. Bandits.prob_sumRewards_sub_pullCount_mul_le_le
  25. Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype
  26. Bandits.probReal_sum_le_sum_streamMeasure
  27. Bandits.prob_sum_le_sqrt_log
  28. Bandits.prob_sum_ge_sqrt_log
  29. Bandits.prob_avg_add_sqrt_log_le
  30. Bandits.prob_avg_sub_sqrt_log_ge
  31. Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le
  32. Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le