2.2. Online.Bandit.SumRewards
Law of the sum of rewards
Module LeanMachineLearning.Online.Bandit.SumRewards contains 32 exposed declarations.
identDistrib_sum_range_snd🔗
Bandits.ArrayModel.identDistrib_sum_range_sndNo docstring.
Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [Countable 𝓐] {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (k : ℕ) : ProbabilityTheory.IdentDistrib (fun ω => ∑ i ∈ Finset.range k, Prod.snd ω i a) (fun ω => ∑ i ∈ Finset.range k, ω i a) (arrayMeasure ν) (streamMeasure ν)Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [Countable 𝓐] {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (k : ℕ) : ProbabilityTheory.IdentDistrib (fun ω => ∑ i ∈ Finset.range k, Prod.snd ω i a) (fun ω => ∑ i ∈ Finset.range k, ω i a) (arrayMeasure ν) (streamMeasure ν)
Code
lemma identDistrib_sum_range_snd (a : 𝓐) (k : ℕ) :
IdentDistrib (fun ω ↦ ∑ i ∈ range k, ω.2 i a) (fun ω ↦ ∑ i ∈ range k, ω i a)
𝔓 (streamMeasure ν) where
aemeasurable_fstType uses (4)
Actions: Source · Open Issue
Proof
by fun_prop
aemeasurable_snd := (measurable_sum _ fun i _ ↦ by fun_prop).aemeasurable
map_eq := by
rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
(ν := streamMeasure ν), Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
rfl
prob_pullCount_prod_sumRewards_mem_le🔗
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_leNo docstring.
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set (ℕ × ℝ)} [DecidablePred fun x => x ∈ Prod.fst '' s] (hs : MeasurableSet s) : (arrayMeasure ν) {ω | (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set (ℕ × ℝ)} [DecidablePred fun x => x ∈ Prod.fst '' s] (hs : MeasurableSet s) : (arrayMeasure ν) {ω | (Learning.pullCount (action alg) a n ω, Learning.sumRewards (action alg) (reward alg) a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}
Code
lemma prob_pullCount_prod_sumRewards_mem_le (a : 𝓐) (n : ℕ)
{s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
𝔓 {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}Type uses (9)
Body uses (3)
Actions: Source · Open Issue
Proof
by
simp_rw [sumRewards_eq]
calc 𝔓 ((fun ω ↦ (pullCount A a n ω, ∑ i ∈ range (pullCount A a n ω), ω.2 i a)) ⁻¹' s)
_ ≤ 𝔓 {ω | ∃ k ≤ n, (k, ∑ i ∈ range k, ω.2 i a) ∈ s} := by
refine measure_mono fun ω hω ↦ ?_
simp only [Set.mem_setOf_eq] at hω ⊢
exact ⟨pullCount A a n ω, pullCount_le _ _ _, hω⟩
_ = 𝔓 (⋃ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
{ω | (k, ∑ i ∈ range k, ω.2 i a) ∈ s}) := by congr 1; ext; simp; grind
_ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
𝔓 {ω | ∑ i ∈ range k, ω.2 i a ∈ Prod.mk k ⁻¹' s} := measure_biUnion_finset_le _ _
_ = ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
(streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
congr with k
have : (𝔓).map (fun ω ↦ ∑ i ∈ range k, ω.2 i a) =
(streamMeasure ν).map (fun ω ↦ ∑ i ∈ range k, ω i a) :=
(identDistrib_sum_range_snd a k).map_eq
rw [Measure.ext_iff] at this
specialize this (Prod.mk k ⁻¹' s) (hs.preimage (by fun_prop))
rwa [Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop)),
Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop))] at this
prob_pullCount_mem_and_sumRewards_mem_le🔗
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_leNo docstring.
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set ℕ} [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) a n ω ∈ s ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set ℕ} [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) a n ω ∈ s ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Code
lemma prob_pullCount_mem_and_sumRewards_mem_le (a : 𝓐) (n : ℕ)
{s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
𝔓 {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
∑ k ∈ (range (n + 1)).filter (· ∈ s),
streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}Type uses (9)
Body uses (1)
Actions: Source · Open Issue
Proof
by
classical
rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
· simp [h_empty]
convert prob_pullCount_prod_sumRewards_mem_le a n (hs.prod hB) (ν := ν) (alg := alg)
with _ _ _ k hk
· rfl
· ext n
have : ∃ x, x ∈ B := h_nonempty
simp [this]
· ext x
simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
exists_eq_right, mem_filter, mem_range] at hk
simp [hk.2.1]
prob_exists_pullCount_eq_and_sumRewards_mem_le🔗
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_leNo docstring.
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ℕ) {B : Set ℝ} (hB : MeasurableSet B) : (arrayMeasure ν) {ω | ∃ n, Learning.pullCount (action alg) a n ω = m ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ℕ) {B : Set ℝ} (hB : MeasurableSet B) : (arrayMeasure ν) {ω | ∃ n, Learning.pullCount (action alg) a n ω = m ∧ Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Code
lemma prob_exists_pullCount_eq_and_sumRewards_mem_le (a : 𝓐) (m : ℕ) {B : Set ℝ}
(hB : MeasurableSet B) : 𝔓 {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}Type uses (9)
Body uses (2)
Used by (1)
Actions: Source · Open Issue
Proof
calc
_ ≤ 𝔓 {ω | ∑ i ∈ range m, ω.2 i a ∈ B} := by
apply measure_mono
intro ω ⟨n, hp, hn⟩
rwa [sumRewards_eq alg a n ω, hp] at hn
_ = streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B} :=
(identDistrib_sum_range_snd a m).measure_mem_eq hB
prob_sumRewards_le_sumRewards_le🔗
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_leNo docstring.
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) : (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
Code
lemma prob_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
(𝔓) {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
streamMeasure ν
{ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}Type uses (10)
Used by (1)
Actions: Source · Open Issue
Proof
by
simp_rw [sumRewards_eq]
calc 𝔓 {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
∑ i ∈ range (pullCount A (bestArm ν) n ω), ω.2 i (bestArm ν) ≤
∑ i ∈ range (pullCount A a n ω), ω.2 i a}
_ ≤ 𝔓 ((fun ω ↦ (∑ i ∈ range m₁, ω.2 i (bestArm ν), ∑ i ∈ range m₂, ω.2 i a)) ⁻¹'
{p | p.1 ≤ p.2}) := by
refine measure_mono fun ω hω ↦ ?_
simp only [Set.preimage_setOf_eq, Set.mem_setOf_eq] at hω ⊢
grind
_ = streamMeasure ν
{ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a} := by
rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
(ν := streamMeasure ν), Measure.snd, Measure.map_apply (by fun_prop)]
· rfl
simp only [measurableSet_setOf]
fun_prop
probReal_sumRewards_le_sumRewards_le🔗
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_leNo docstring.
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) : MeasureTheory.Measure.real (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐] [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) : MeasureTheory.Measure.real (arrayMeasure ν) {ω | Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧ Learning.pullCount (action alg) a n ω = m₂ ∧ Learning.sumRewards (action alg) (reward alg) (bestArm ν) n ω ≤ Learning.sumRewards (action alg) (reward alg) a n ω} ≤ MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
Code
lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
(𝔓).real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
(streamMeasure ν).real
{ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}Type uses (10)
Body uses (2)
Used by (1)
Actions: Source · Open Issue
Proof
by simp_rw [measureReal_def] gcongr · finiteness · exact prob_sumRewards_le_sumRewards_le a n m₁ m₂
sumRewards_eq_comp🔗
Bandits.sumRewards_eq_compNo docstring.
Bandits.sumRewards_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} : Learning.sumRewards A R a n = (fun p => ∑ i ∈ Finset.range n, if Prod.fst (p i) = a then Prod.snd (p i) else 0) ∘ Learning.trajectory A RBandits.sumRewards_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} : Learning.sumRewards A R a n = (fun p => ∑ i ∈ Finset.range n, if Prod.fst (p i) = a then Prod.snd (p i) else 0) ∘ Learning.trajectory A R
Code
lemma sumRewards_eq_comp :
sumRewards A R a n =
(fun p ↦ ∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0) ∘ (trajectory A R)Type uses (2)
Actions: Source · Open Issue
Proof
by ext simp [sumRewards, trajectory]
pullCount_eq_comp🔗
Bandits.pullCount_eq_compNo docstring.
Bandits.pullCount_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} : Learning.pullCount A a n = (fun p => ∑ i ∈ Finset.range n, if Prod.fst (p i) = a then 1 else 0) ∘ Learning.trajectory A RBandits.pullCount_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} : Learning.pullCount A a n = (fun p => ∑ i ∈ Finset.range n, if Prod.fst (p i) = a then 1 else 0) ∘ Learning.trajectory A R
Code
lemma pullCount_eq_comp :
pullCount A a n =
(fun p ↦ ∑ i ∈ range n, if (p i).1 = a then 1 else 0) ∘ (trajectory A R)Type uses (2)
Used by (1)
Actions: Source · Open Issue
Proof
by ext simp [pullCount, trajectory]
law_sumRewards_unique🔗
Learning.IsAlgEnvSeq.law_sumRewards_uniqueNo docstring.
Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (sumRewards A R a n) P = MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'
Code
lemma _root_.Learning.IsAlgEnvSeq.law_sumRewards_unique [MeasurableSingletonClass 𝓐]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
P.map (sumRewards A R a n) = P'.map (sumRewards A₂ R₂ a n)Type uses (4)
Actions: Source · Open Issue
Proof
by
have hA := h1.measurable_action
have hR := h1.measurable_feedback
have hA2 := h2.measurable_action
have hR2 := h2.measurable_feedback
have h_unique := isAlgEnvSeq_unique h1 h2
rw [sumRewards_eq_comp, sumRewards_eq_comp, ← Measure.map_map, h_unique, Measure.map_map,
← sumRewards_eq_comp]
· refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
exact (measurableSet_singleton _).preimage (by fun_prop)
· fun_prop
· refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
exact (measurableSet_singleton _).preimage (by fun_prop)
· fun_prop
law_pullCount_sumRewards_unique'🔗
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'No docstring.
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω a => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω a => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
Code
lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' [MeasurableSingletonClass 𝓐]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
IdentDistrib (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
(fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'Type uses (5)
Body uses (7)
Actions: Source · Open Issue
Proof
by
have hA := h1.measurable_action
have hR := h1.measurable_feedback
have hA2 := h2.measurable_action
have hR2 := h2.measurable_feedback
constructor
· refine Measurable.aemeasurable ?_
rw [measurable_pi_iff]
exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA hR _ _)
· refine Measurable.aemeasurable ?_
rw [measurable_pi_iff]
exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA2 hR2 _ _)
have h_unique := isAlgEnvSeq_unique h1 h2
let f := fun (p : ℕ → 𝓐 × ℝ ) (a : 𝓐) ↦ (∑ i ∈ range n, if (p i).1 = a then 1 else 0,
∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0)
have hf : Measurable f := by
rw [measurable_pi_iff]
intro a
refine Measurable.prod ?_ ?_
· simp only [f]
refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
exact (measurableSet_singleton _).preimage (by fun_prop)
· simp only [f]
refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
exact (measurableSet_singleton _).preimage (by fun_prop)
have h_eq_comp : (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
= f ∘ (trajectory A R) := by
ext ω a : 2
rw [pullCount_eq_comp (R := R), sumRewards_eq_comp]
grind
have h_eq_comp2 : (fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))
= f ∘ (trajectory A₂ R₂) := by
ext ω a : 2
rw [pullCount_eq_comp (R := R₂), sumRewards_eq_comp]
grind
rw [h_eq_comp, h_eq_comp2, ← Measure.map_map hf, h_unique, Measure.map_map hf,
← h_eq_comp2]
· rw [measurable_pi_iff]
exact fun n ↦ Measurable.prodMk (hA2 n) (hR2 n)
· rw [measurable_pi_iff]
exact fun n ↦ Measurable.prodMk (hA n) (hR n)
law_pullCount_sumRewards_unique🔗
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_uniqueNo docstring.
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (fun ω => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun ω => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : MeasureTheory.Measure.map (fun ω => (pullCount A a n ω, sumRewards A R a n ω)) P = MeasureTheory.Measure.map (fun ω => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'
Code
lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique [MeasurableSingletonClass 𝓐]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
P'.map (fun ω ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))Type uses (5)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
((h1.law_pullCount_sumRewards_unique' h2 (n := n)).comp (u := fun f ↦ f a) (by fun_prop)).map_eq
identDistrib_pullCount_sumRewards🔗
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewardsNo docstring.
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω n a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω' n a => (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1, u_2, u_3} {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'} [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} [MeasurableSingletonClass 𝓐] (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') : ProbabilityTheory.IdentDistrib (fun ω n a => (pullCount A a n ω, sumRewards A R a n ω)) (fun ω' n a => (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'
Code
lemma _root_.Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards [MeasurableSingletonClass 𝓐]
(h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
IdentDistrib (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω))
(fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'Type uses (5)
Body uses (2)
Used by (1)
Actions: Source · Open Issue
Proof
by
let f (τ : ℕ → 𝓐 × ℝ) (n : ℕ) (a : 𝓐) : ℕ × ℝ :=
(∑ i ∈ range n, if (τ i).1 = a then 1 else 0,
∑ i ∈ range n, if (τ i).1 = a then (τ i).2 else 0)
have hc1 : (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
f ∘ (trajectory A R) := by
ext ω n a : 3
simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
rfl
have hc2 : (fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) =
f ∘ (trajectory A₂ R₂) := by
ext ω' n a : 3
simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
rfl
have hf : Measurable f := by
simp_rw [f, measurable_pi_iff]
intro n a
apply Measurable.prod
· dsimp only
exact measurable_sum _
(fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
· dsimp only
exact measurable_sum _
(fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
rw [hc1, hc2]
exact (h1.identDistrib_trajectory h2).comp hf
prob_pullCount_prod_sumRewards_mem_le🔗
Bandits.prob_pullCount_prod_sumRewards_mem_leNo docstring.
Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set (ℕ × ℝ)} [DecidablePred fun x => x ∈ Prod.fst '' s] (hs : MeasurableSet s) : P {ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set (ℕ × ℝ)} [DecidablePred fun x => x ∈ Prod.fst '' s] (hs : MeasurableSet s) : P {ω | (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) ∈ s} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}
Code
lemma prob_pullCount_prod_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
{s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}Type uses (6)
Actions: Source · Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
calc P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s}
_ = (P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω))) s := by
rw [Measure.map_apply (by fun_prop) hs]; rfl
_ = ((ArrayModel.arrayMeasure ν).map
(fun ω ↦ (pullCount (ArrayModel.action alg) a n ω,
sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω))) s := by
rw [h.law_pullCount_sumRewards_unique (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)]
_ = (ArrayModel.arrayMeasure ν) {ω | (pullCount (ArrayModel.action alg) a n ω,
sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω) ∈ s} := by
rw [Measure.map_apply (by fun_prop) hs]; rfl
_ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
ArrayModel.prob_pullCount_prod_sumRewards_mem_le a n hs
prob_pullCount_mem_and_sumRewards_mem_le🔗
Bandits.prob_pullCount_mem_and_sumRewards_mem_leNo docstring.
Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ℕ} [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω ∈ s ∧ Learning.sumRewards A R a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {s : Set ℕ} [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω ∈ s ∧ Learning.sumRewards A R a n ω ∈ B} ≤ ∑ k ∈ Finset.range (n + 1) with k ∈ s, (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Code
lemma prob_pullCount_mem_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
{s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
P {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
∑ k ∈ (range (n + 1)).filter (· ∈ s),
streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}Type uses (6)
Body uses (1)
Actions: Source · Open Issue
Proof
by
classical
rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
· simp [h_empty]
convert prob_pullCount_prod_sumRewards_mem_le h (hs.prod hB) (ν := ν) (alg := alg) with _ _ _ k hk
· rfl
· ext n
have : ∃ x, x ∈ B := h_nonempty
simp [this]
· ext x
simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
exists_eq_right, mem_filter, mem_range] at hk
simp [hk.2.1]
prob_sumRewards_mem_le🔗
Bandits.prob_sumRewards_mem_leNo docstring.
Bandits.prob_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set ℝ} (hB : MeasurableSet B) : P (Learning.sumRewards A R a n ⁻¹' B) ≤ ∑ k ∈ Finset.range (n + 1), (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}Bandits.prob_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {B : Set ℝ} (hB : MeasurableSet B) : P (Learning.sumRewards A R a n ⁻¹' B) ≤ ∑ k ∈ Finset.range (n + 1), (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Code
lemma prob_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
{B : Set ℝ} (hB : MeasurableSet B) :
P (sumRewards A R a n ⁻¹' B) ≤
∑ k ∈ range (n + 1), streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}Type uses (5)
Body uses (2)
Actions: Source · Open Issue
Proof
by classical have h_le := prob_pullCount_mem_and_sumRewards_mem_le h .univ hB (a := a) (n := n) simp only [Set.mem_univ, true_and, filter_true] at h_le convert h_le rfl
prob_pullCount_eq_and_sumRewards_mem_le🔗
Bandits.prob_pullCount_eq_and_sumRewards_mem_leNo docstring.
Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Code
lemma prob_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
{m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) :
P {ω | pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}Type uses (6)
Body uses (1)
Actions: Source · Open Issue
Proof
by
have h_le := prob_pullCount_mem_and_sumRewards_mem_le h (s := {m}) (by simp) hB (a := a) (n := n)
have hm' : m < n + 1 := by lia
simpa [hm'] using h_le
prob_exists_pullCount_eq_and_sumRewards_mem_le🔗
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_leNo docstring.
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (m : ℕ) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | ∃ n, Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (m : ℕ) {B : Set ℝ} (hB : MeasurableSet B) : P {ω | ∃ n, Learning.pullCount A a n ω = m ∧ Learning.sumRewards A R a n ω ∈ B} ≤ (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Code
lemma prob_exists_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m : ℕ) {B : Set ℝ}
(hB : MeasurableSet B) :
P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}Type uses (6)
Body uses (9)
Actions: Source · Open Issue
Proof
let s := {p : ℕ → 𝓐 → ℕ × ℝ | ∃ n, (p n a).1 = m ∧ (p n a).2 ∈ B}
have : s = ⋃ n, (fun p ↦ p n a) ⁻¹' ({m} ×ˢ B) := by
ext p
simp [s]
have hs : MeasurableSet s := by measurability
calc P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B}
_ = (ArrayModel.arrayMeasure ν) {ω | ∃ n, pullCount (ArrayModel.action alg) a n ω = m ∧
sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω ∈ B} :=
(h.identDistrib_pullCount_sumRewards
(ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)).measure_mem_eq hs
_ ≤ _ := ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le a m hB
probReal_sumRewards_le_sumRewards_le🔗
Bandits.probReal_sumRewards_le_sumRewards_leNo docstring.
Bandits.probReal_sumRewards_le_sumRewards_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (n m₁ m₂ : ℕ) : MeasureTheory.Measure.real P {ω | Learning.pullCount A (bestArm ν) n ω = m₁ ∧ Learning.pullCount A a n ω = m₂ ∧ Learning.sumRewards A R (bestArm ν) n ω ≤ Learning.sumRewards A R a n ω} ≤ MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}Bandits.probReal_sumRewards_le_sumRewards_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) (a : 𝓐) (n m₁ m₂ : ℕ) : MeasureTheory.Measure.real P {ω | Learning.pullCount A (bestArm ν) n ω = m₁ ∧ Learning.pullCount A a n ω = m₂ ∧ Learning.sumRewards A R (bestArm ν) n ω ≤ Learning.sumRewards A R a n ω} ≤ MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤ ∑ i ∈ Finset.range m₂, ω i a}
Code
lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
(a : 𝓐) (n m₁ m₂ : ℕ) :
P.real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
(streamMeasure ν).real
{ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}Type uses (7)
Used by (1)
Actions: Source · Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
refine le_trans (le_of_eq ?_)
(ArrayModel.probReal_sumRewards_le_sumRewards_le (alg := alg) a n m₁ m₂)
let s := {p : ℕ × ℕ × ℝ × ℝ | p.1 = m₁ ∧ p.2.1 = m₂ ∧ p.2.2.1 ≤ p.2.2.2}
have hs : MeasurableSet s := by simp only [measurableSet_setOf, s]; fun_prop
change P.real ((fun ω ↦ (pullCount A (bestArm ν) n ω,
pullCount A a n ω, sumRewards A R (bestArm ν) n ω, sumRewards A R a n ω)) ⁻¹' s) =
(ArrayModel.arrayMeasure ν).real
((fun ω ↦ (pullCount (ArrayModel.action alg) (bestArm ν) n ω,
pullCount (ArrayModel.action alg) a n ω,
sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) (bestArm ν) n ω,
sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω)) ⁻¹' s)
simp_rw [measureReal_def]
congr 1
rw [← Measure.map_apply ?_ hs, ← Measure.map_apply (by fun_prop) hs]
swap
· refine Measurable.prod (by fun_prop) (Measurable.prod (by fun_prop) ?_)
exact (measurable_sumRewards hA hR _ _).prod (measurable_sumRewards hA hR _ _)
congr 1
refine IdentDistrib.map_eq ?_
have h_eq := h.law_pullCount_sumRewards_unique' (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)
(n := n)
exact h_eq.comp (u := fun p ↦ ((p (bestArm ν)).1, (p a).1, (p (bestArm ν)).2, (p a).2))
(by fun_prop)
prob_sum_range_sub_ge_le_of_HasSubgaussianMGF🔗
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGFNo docstring.
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) : (streamMeasure ν) {ω | ε ≤ ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤ ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) : (streamMeasure ν) {ω | ε ≤ ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤ ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))
Code
lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
(h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
streamMeasure ν {ω | ε ≤ ∑ k ∈ range n, (ω k a - (ν a)[id])} ≤
ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))Type uses (1)
Body uses (3)
Used by (1)
Actions: Source · Open Issue
Proof
by
rw [← ofReal_measureReal]
gcongr
apply HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun _ _ hε
· exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
· intro _ _
exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)
prob_sum_range_sub_le_le_of_HasSubgaussianMGF🔗
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGFNo docstring.
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) : (streamMeasure ν) {ω | ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤ -ε} ≤ ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal} (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) : (streamMeasure ν) {ω | ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤ -ε} ≤ ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))
Code
lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
(h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤ -ε} ≤
ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))Type uses (1)
Body uses (4)
Used by (1)
Actions: Source · Open Issue
Proof
by
rw [← ofReal_measureReal]
gcongr
apply HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun _ _ hε
· exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
· intro _ _
exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)
prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'🔗
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'No docstring.
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | √(2 * ↑n * ↑σ2 * Real.log (1 / δ)) ≤ ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤ ENNReal.ofReal δBandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | √(2 * ↑n * ↑σ2 * Real.log (1 / δ)) ≤ ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤ ENNReal.ofReal δ
Code
lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
(h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
streamMeasure ν {ω | √(2 * n * σ2 * Real.log (1 / δ)) ≤
∑ k ∈ range n, (ω k a - (ν a)[id])} ≤ ENNReal.ofReal δType uses (1)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
calc
_ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
prob_sum_range_sub_ge_le_of_HasSubgaussianMGF h (by positivity) n
_ ≤ ENNReal.ofReal δ := by
gcongr
exact exp_neg_sqrt_sq_div_le hσ2 hδ hn
prob_sum_range_sub_le_le_of_HasSubgaussianMGF'🔗
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'No docstring.
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤ -√(2 * ↑n * ↑σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal δBandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal} (hσ2 : 0 < σ2) (h : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) : (streamMeasure ν) {ω | ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤ -√(2 * ↑n * ↑σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal δ
Code
lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
(h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤
-√(2 * n * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal δType uses (1)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
calc
_ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
prob_sum_range_sub_le_le_of_HasSubgaussianMGF h (by positivity) n
_ ≤ ENNReal.ofReal δ := by
gcongr
exact exp_neg_sqrt_sq_div_le hσ2 hδ hn
prob_sumRewards_sub_pullCount_mul_ge_le🔗
Bandits.prob_sumRewards_sub_pullCount_mul_ge_leNo docstring.
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ)) ≤ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a} ≤ ENNReal.ofReal ((↑n - 1) * δ)Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ)) ≤ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a} ≤ ENNReal.ofReal ((↑n - 1) * δ)
Code
lemma prob_sumRewards_sub_pullCount_mul_ge_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
{σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧ √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤ ENNReal.ofReal ((n - 1) * δ)Type uses (5)
Body uses (4)
Actions: Source · Open Issue
Proof
let B (m : ℕ) := {x : ℝ | √(2 * m * σ2 * Real.log (1 / δ)) ≤ x - m * (ν a)[id]}
calc
_ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
sumRewards A R a t ω ∈ B m}) := by
apply measure_mono
intro ω ⟨t, ht, hp, hb⟩
have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
(pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
_ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
sumRewards A R a t ω ∈ B m} :=
measure_biUnion_finset_le _ _
_ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
_ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
apply sum_le_sum
exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
_ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
apply sum_le_sum
intro m hm
exact le_of_eq_of_le (by simp [B])
(StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
_ = ENNReal.ofReal ((n - 1) * δ) := by
by_cases hn : n = 0
· simp [hn, hδ.le]
· rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
ring_nf
prob_sumRewards_sub_pullCount_mul_le_le🔗
Bandits.prob_sumRewards_sub_pullCount_mul_le_leNo docstring.
Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a ≤ -√(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal ((↑n - 1) * δ)Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (ha : ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a ≤ -√(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal ((↑n - 1) * δ)
Code
lemma prob_sumRewards_sub_pullCount_mul_le_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
{σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
sumRewards A R a t ω - pullCount A a t ω * (ν a)[id] ≤
-√(2 * pullCount A a t ω * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal ((n - 1) * δ)Type uses (5)
Body uses (4)
Used by (1)
Actions: Source · Open Issue
Proof
let B (m : ℕ) := {x : ℝ | x - m * (ν a)[id] ≤ -√(2 * m * σ2 * Real.log (1 / δ))}
calc
_ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
sumRewards A R a t ω ∈ B m}) := by
apply measure_mono
intro ω ⟨t, ht, hp, hb⟩
have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
(pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
_ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
sumRewards A R a t ω ∈ B m} :=
measure_biUnion_finset_le _ _
_ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
_ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
apply sum_le_sum
exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
_ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
apply sum_le_sum
intro m hm
exact le_of_eq_of_le (by simp [B])
(StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
_ = ENNReal.ofReal ((n - 1) * δ) := by
by_cases hn : n = 0
· simp [hn, hδ.le]
· rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
ring_nf
prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype🔗
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_FintypeNo docstring.
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ a, ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ)) ≤ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a} ≤ ENNReal.ofReal (↑(Fintype.card 𝓐) * (↑n - 1) * δ)Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2) (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) : P {ω | ∃ a, ∃ t < n, Learning.pullCount A a t ω ≠ 0 ∧ √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 * Real.log (1 / δ)) ≤ Learning.sumRewards A R a t ω - ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a} ≤ ENNReal.ofReal (↑(Fintype.card 𝓐) * (↑n - 1) * δ)
Code
lemma prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
{σ2 : ℝ≥0} (hσ2 : 0 < σ2) (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
(h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
P {ω | ∃ a, ∃ t < n, pullCount A a t ω ≠ 0 ∧
√(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤
ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ)Type uses (5)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
calc
_ ≤ ∑ a, P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
√(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} := by
rw [Set.setOf_exists]
exact measure_iUnion_fintype_le _ _
_ ≤ ∑ a, ENNReal.ofReal ((n - 1) * δ) :=
sum_le_sum fun a _ ↦ prob_sumRewards_sub_pullCount_mul_ge_le hσ2 (hν a) h hδ
_ = ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ) := by
rw [sum_const, Finset.card_univ, ← ENNReal.ofReal_nsmul, nsmul_eq_mul]
ring_nf
probReal_sum_le_sum_streamMeasure🔗
Bandits.probReal_sum_le_sum_streamMeasureNo docstring.
Bandits.probReal_sum_le_sum_streamMeasure.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Nonempty 𝓐] [Fintype 𝓐] {c : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) c (ν a)) (a : 𝓐) (m : ℕ) : MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ s ∈ Finset.range m, ω s (bestArm ν) ≤ ∑ s ∈ Finset.range m, ω s a} ≤ Real.exp (-↑m * gap ν a ^ 2 / (4 * ↑c))Bandits.probReal_sum_le_sum_streamMeasure.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] [Nonempty 𝓐] [Fintype 𝓐] {c : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) c (ν a)) (a : 𝓐) (m : ℕ) : MeasureTheory.Measure.real (streamMeasure ν) {ω | ∑ s ∈ Finset.range m, ω s (bestArm ν) ≤ ∑ s ∈ Finset.range m, ω s a} ≤ Real.exp (-↑m * gap ν a ^ 2 / (4 * ↑c))
Code
lemma probReal_sum_le_sum_streamMeasure [Fintype 𝓐] {c : ℝ≥0}
(hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) c (ν a)) (a : 𝓐) (m : ℕ) :
(streamMeasure ν).real
{ω | ∑ s ∈ range m, ω s (bestArm ν) ≤ ∑ s ∈ range m, ω s a} ≤
Real.exp (-↑m * gap ν a ^ 2 / (4 * c))Type uses (3)
Body uses (9)
Used by (1)
Actions: Source · Open Issue
Proof
by
by_cases ha : a = bestArm ν
· simp [ha]
refine (HasSubgaussianMGF.measure_sum_le_sum_le' (cX := fun _ ↦ c) (cY := fun _ ↦ c)
?_ ?_ ?_ ?_ ?_ ?_).trans_eq ?_
· exact iIndepFun_eval_streamMeasure'' ν (bestArm ν)
· exact iIndepFun_eval_streamMeasure'' ν a
· intro i him
simp_rw [integral_eval_streamMeasure]
refine (hν (bestArm ν)).congr_identDistrib ?_
exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
· intro i him
simp_rw [integral_eval_streamMeasure]
refine (hν a).congr_identDistrib ?_
exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
· exact indepFun_eval_streamMeasure' ν (Ne.symm ha)
· gcongr 1 with i him
simp_rw [integral_eval_streamMeasure]
exact le_bestArm a
· congr 1
simp_rw [integral_eval_streamMeasure]
simp only [id_eq, sum_const, card_range, nsmul_eq_mul, NNReal.coe_mul, NNReal.coe_natCast,
gap_eq_bestArm_sub, neg_mul]
field_simp
ring
prob_sum_le_sqrt_log🔗
Bandits.prob_sum_le_sqrt_logNo docstring.
Bandits.prob_sum_le_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a) ≤ -√(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1))} ≤ 1 / (↑n + 1) ^ cBandits.prob_sum_le_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a) ≤ -√(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1))} ≤ 1 / (↑n + 1) ^ c
Code
lemma prob_sum_le_sqrt_log {σ2 : ℝ≥0}
(hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
(hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
streamMeasure ν
{ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))} ≤
1 / (n + 1) ^ cType uses (1)
Body uses (4)
Used by (1)
Actions: Source · Open Issue
Proof
by
calc
streamMeasure ν
{ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))}
_ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
rw [← ofReal_measureReal]
gcongr
refine (HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
· exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
(fun _ ↦ by fun_prop)
· intro i him
refine (hν a).congr_identDistrib ?_
exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
_ = 1 / (n + 1) ^ c := by
rw [Real.sq_sqrt]
swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
field_simp
rw [← Real.log_rpow (by positivity), ← Real.log_inv,
Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
norm_cast
prob_sum_ge_sqrt_log🔗
Bandits.prob_sum_ge_sqrt_logNo docstring.
Bandits.prob_sum_ge_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | √(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1)) ≤ ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a)} ≤ 1 / (↑n + 1) ^ cBandits.prob_sum_ge_sqrt_log.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | √(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1)) ≤ ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a)} ≤ 1 / (↑n + 1) ^ c
Code
lemma prob_sum_ge_sqrt_log {σ2 : ℝ≥0}
(hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
(hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
streamMeasure ν
{ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} ≤
1 / (n + 1) ^ cType uses (1)
Body uses (3)
Used by (1)
Actions: Source · Open Issue
Proof
by
calc
streamMeasure ν
{ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))}
_ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
rw [← ofReal_measureReal]
gcongr
refine (HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
· exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
(fun _ ↦ by fun_prop)
· intro i him
refine (hν a).congr_identDistrib ?_
exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
_ = 1 / (n + 1) ^ c := by
rw [Real.sq_sqrt]
swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
field_simp
rw [← Real.log_rpow (by positivity), ← Real.log_inv,
Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
norm_cast
prob_avg_add_sqrt_log_le🔗
Bandits.prob_avg_add_sqrt_log_leNo docstring.
Bandits.prob_avg_add_sqrt_log_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | (∑ m ∈ Finset.range k, ω m a) / ↑k + √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k) ≤ ∫ (x : ℝ), id x ∂ν a} ≤ 1 / (↑n + 1) ^ cBandits.prob_avg_add_sqrt_log_le.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | (∑ m ∈ Finset.range k, ω m a) / ↑k + √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k) ≤ ∫ (x : ℝ), id x ∂ν a} ≤ 1 / (↑n + 1) ^ c
Code
lemma prob_avg_add_sqrt_log_le {σ2 : ℝ≥0} {c : ℝ}
(hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
(hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]} ≤
1 / (n + 1) ^ cType uses (1)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
by
have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
calc
streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]}
_ = streamMeasure ν
{ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) / k ≤ - √(2 * c * σ2 * log (n + 1) / k)} := by
congr with ω
field_simp
rw [Finset.sum_sub_distrib]
simp
grind
_ = streamMeasure ν
{ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * log (n + 1))} := by
congr with ω
field_simp
congr! 2
rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
mul_assoc (k : ℝ), mul_assoc (k : ℝ), mul_assoc (k : ℝ),
sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
_ ≤ 1 / (n + 1) ^ c := prob_sum_le_sqrt_log hν hσ2 hc a k hk
prob_avg_sub_sqrt_log_ge🔗
Bandits.prob_avg_sub_sqrt_log_geNo docstring.
Bandits.prob_avg_sub_sqrt_log_ge.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | ∫ (x : ℝ), id x ∂ν a ≤ (∑ m ∈ Finset.range k, ω m a) / ↑k - √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k)} ≤ 1 / (↑n + 1) ^ cBandits.prob_avg_sub_sqrt_log_ge.{u_1} {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ} [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ} (hν : ∀ (a : 𝓐), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a)) (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) : (streamMeasure ν) {ω | ∫ (x : ℝ), id x ∂ν a ≤ (∑ m ∈ Finset.range k, ω m a) / ↑k - √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k)} ≤ 1 / (↑n + 1) ^ c
Code
lemma prob_avg_sub_sqrt_log_ge {σ2 : ℝ≥0} {c : ℝ}
(hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
(hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
streamMeasure ν
{ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 *log (n + 1) / k)} ≤
1 / (n + 1) ^ cType uses (1)
Body uses (1)
Used by (1)
Actions: Source · Open Issue
Proof
by
have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
calc
streamMeasure ν {ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 * log (n + 1) / k)}
_ = streamMeasure ν
{ω | √(2 * c * σ2 * log (n + 1) / k) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id])) / k} := by
congr with ω
field_simp
rw [Finset.sum_sub_distrib]
simp
grind
_ = streamMeasure ν
{ω | √(2 * c * k * σ2 * log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} := by
congr with ω
field_simp
congr! 1
rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
mul_comm _ (k : ℝ), sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
_ ≤ 1 / (n + 1) ^ c := prob_sum_ge_sqrt_log hν hσ2 hc a k hk
prob_empMean_sub_actionMean_ge_le🔗
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_leNo docstring.
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : ∀ (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a))) {δ : ℝ} (hδ : 0 < δ) (n : ℕ) : P {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧ √(2 * ↑σ2 * Real.log (1 / δ) / ↑(pullCount A a t ω)) ≤ empMean A R a t ω - actionMean κ E a ω} ≤ ENNReal.ofReal (↑K * (↑n - 1) * δ)Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : ∀ (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a))) {δ : ℝ} (hδ : 0 < δ) (n : ℕ) : P {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧ √(2 * ↑σ2 * Real.log (1 / δ) / ↑(pullCount A a t ω)) ≤ empMean A R a t ω - actionMean κ E a ω} ≤ ENNReal.ofReal (↑K * (↑n - 1) * δ)
Code
lemma prob_empMean_sub_actionMean_ge_le (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : ℝ≥0}
(hσ2 : 0 < σ2) (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
{δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
P {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧
√(2 * σ2 * Real.log (1 / δ) / pullCount A a t ω) ≤ empMean A R a t ω - actionMean κ E a ω}
≤ ENNReal.ofReal (K * (n - 1) * δ)Type uses (5)
Body uses (14)
Actions: Source · Open Issue
Proof
by
have := h.measurable_param
have := h.measurable_action
have := h.measurable_feedback
let S := {(e, τ) | ∃ a, ∃ t < n, pullCount IT.action a t τ ≠ 0 ∧
√(2 * pullCount IT.action a t τ * σ2 * Real.log (1 / δ)) ≤
sumRewards IT.action IT.feedback a t τ - pullCount IT.action a t τ * actionMean κ id a e}
calc
_ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
rw [Measure.map_apply (by fun_prop) (by measurability)]
apply measure_mono
intro ω ⟨t, ht, a, hpc, hle⟩
rw [empMean] at hle
exact ⟨a, t, ht, hpc, sqrt_two_mul_le_sub hpc hle⟩
_ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
rw [← compProd_map_condDistrib (by fun_prop)]
_ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
Measure.compProd_apply (by measurability)
_ ≤ ∫⁻ e, ENNReal.ofReal (Fintype.card (Fin K) * (n - 1) * δ) ∂(P.map E) := by
apply lintegral_mono_ae
rw [h.hasLaw_env.map_eq]
filter_upwards [h.ae_IsAlgEnvSeq] with e he
exact Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype hσ2 (hs e) he hδ
_ = ENNReal.ofReal (K * (n - 1) * δ) := by
simp [Measure.map_apply h.measurable_param]
prob_empMean_bestAction_sub_actionMean_le_le🔗
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_leNo docstring.
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : ∀ (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a))) {δ : ℝ} (hδ : 0 < δ) (n : ℕ) : P {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧ empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤ -√(2 * ↑σ2 * Real.log (1 / δ) / ↑(pullCount A (bestAction κ E ω) t ω))} ≤ ENNReal.ofReal ((↑n - 1) * δ)Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1, u_2} {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω] {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔} {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ} [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2) (hs : ∀ (e : 𝓔) (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a))) {δ : ℝ} (hδ : 0 < δ) (n : ℕ) : P {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧ empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤ -√(2 * ↑σ2 * Real.log (1 / δ) / ↑(pullCount A (bestAction κ E ω) t ω))} ≤ ENNReal.ofReal ((↑n - 1) * δ)
Code
lemma prob_empMean_bestAction_sub_actionMean_le_le (h : IsBayesAlgEnvSeq Q κ alg E A R P)
{σ2 : ℝ≥0} (hσ2 : 0 < σ2)
(hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
{δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
P {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧
empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤
-√(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))}
≤ ENNReal.ofReal ((n - 1) * δ)Type uses (6)
Body uses (15)
Actions: Source · Open Issue
Proof
by
have := h.measurable_param
have := h.measurable_action
have := h.measurable_feedback
let S := {(e, τ) | ∃ t < n, pullCount IT.action (bestAction κ id e) t τ ≠ 0 ∧
sumRewards IT.action IT.feedback (bestAction κ id e) t τ -
pullCount IT.action (bestAction κ id e) t τ * actionMean κ id (bestAction κ id e) e ≤
-√(2 * pullCount IT.action (bestAction κ id e) t τ * σ2 * Real.log (1 / δ))}
calc
_ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
rw [Measure.map_apply (by fun_prop) (by measurability)]
apply measure_mono
intro ω ⟨t, ht, hpc, hle⟩
rw [empMean] at hle
exact ⟨t, ht, hpc, sub_le_neg_sqrt_two_mul hpc hle⟩
_ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
rw [← compProd_map_condDistrib (by fun_prop)]
_ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
Measure.compProd_apply (by measurability)
_ ≤ ∫⁻ e, ENNReal.ofReal ((n - 1) * δ) ∂(P.map E) := by
apply lintegral_mono_ae
rw [h.hasLaw_env.map_eq]
filter_upwards [h.ae_IsAlgEnvSeq] with e he
exact Bandits.prob_sumRewards_sub_pullCount_mul_le_le (ν := κ.sectR e) hσ2 (hs e _) he
hδ
_ = ENNReal.ofReal ((n - 1) * δ) := by
simp [Measure.map_apply h.measurable_param]-
Bandits.ArrayModel.identDistrib_sum_range_snd -
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le -
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le -
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le -
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le -
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le -
Bandits.sumRewards_eq_comp -
Bandits.pullCount_eq_comp -
Learning.IsAlgEnvSeq.law_sumRewards_unique -
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' -
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique -
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards -
Bandits.prob_pullCount_prod_sumRewards_mem_le -
Bandits.prob_pullCount_mem_and_sumRewards_mem_le -
Bandits.prob_sumRewards_mem_le -
Bandits.prob_pullCount_eq_and_sumRewards_mem_le -
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le -
Bandits.probReal_sumRewards_le_sumRewards_le -
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF -
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF -
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' -
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF' -
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le -
Bandits.prob_sumRewards_sub_pullCount_mul_le_le -
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype -
Bandits.probReal_sum_le_sum_streamMeasure -
Bandits.prob_sum_le_sqrt_log -
Bandits.prob_sum_ge_sqrt_log -
Bandits.prob_avg_add_sqrt_log_le -
Bandits.prob_avg_sub_sqrt_log_ge -
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le -
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le