Online.Bandit.SumRewards

`identDistrib_sum_range_snd`🔗

LemmaBandits.ArrayModel.identDistrib_sum_range_snd

Details

No docstring.

theorem

Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} [Countable 𝓐]
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (k : ℕ) :
  ProbabilityTheory.IdentDistrib
    (fun ω => ∑ i ∈ Finset.range k, Prod.snd ω i a)
    (fun ω => ∑ i ∈ Finset.range k, ω i a) (arrayMeasure ν)
    (streamMeasure ν)
Bandits.ArrayModel.identDistrib_sum_range_snd.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [Countable 𝓐]
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (k : ℕ) :
  ProbabilityTheory.IdentDistrib
    (fun ω =>
      ∑ i ∈ Finset.range k,
        Prod.snd ω i a)
    (fun ω => ∑ i ∈ Finset.range k, ω i a)
    (arrayMeasure ν) (streamMeasure ν)

Code

lemma identDistrib_sum_range_snd (a : 𝓐) (k : ℕ) :
    IdentDistrib (fun ω ↦ ∑ i ∈ range k, ω.2 i a) (fun ω ↦ ∑ i ∈ range k, ω i a)
      𝔓 (streamMeasure ν) where
  aemeasurable_fst

Type uses (4)

Body uses (1)

instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel

Used by (2)

Actions: Source · Open Issue

Proof

by fun_prop
  aemeasurable_snd := (measurable_sum _ fun i _ ↦ by fun_prop).aemeasurable
  map_eq := by
    rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
      (ν := streamMeasure ν), Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
    rfl

`prob_pullCount_prod_sumRewards_mem_le`🔗

LemmaBandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set (ℕ × ℝ)}
  [DecidablePred fun x => x ∈ Prod.fst '' s] (hs : MeasurableSet s) :
  (arrayMeasure ν)
      {ω |
        (Learning.pullCount (action alg) a n ω,
            Learning.sumRewards (action alg) (reward alg) a n ω) ∈
          s} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν)
        {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}
Bandits.ArrayModel.prob_pullCount_prod_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (n : ℕ) {s : Set (ℕ × ℝ)}
  [DecidablePred fun x =>
      x ∈ Prod.fst '' s]
  (hs : MeasurableSet s) :
  (arrayMeasure ν)
      {ω |
        (Learning.pullCount (action alg) a
              n ω,
            Learning.sumRewards
              (action alg) (reward alg) a
              n ω) ∈
          s} ≤
    ∑ k ∈ Finset.range (n + 1) with
      k ∈ Prod.fst '' s,
      (streamMeasure ν)
        {ω |
          ∑ i ∈ Finset.range k, ω i a ∈
            Prod.mk k ⁻¹' s}

Code

lemma prob_pullCount_prod_sumRewards_mem_le (a : 𝓐) (n : ℕ)
    {s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
    𝔓 {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}

Type uses (9)

Body uses (3)

Used by (2)

Actions: Source · Open Issue

Proof

by
  simp_rw [sumRewards_eq]
  calc 𝔓 ((fun ω ↦ (pullCount A a n ω, ∑ i ∈ range (pullCount A a n ω), ω.2 i a)) ⁻¹' s)
  _ ≤ 𝔓 {ω | ∃ k ≤ n, (k, ∑ i ∈ range k, ω.2 i a) ∈ s} := by
    refine measure_mono fun ω hω ↦ ?_
    simp only [Set.mem_setOf_eq] at hω ⊢
    exact ⟨pullCount A a n ω, pullCount_le _ _ _, hω⟩
  _ = 𝔓 (⋃ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      {ω | (k, ∑ i ∈ range k, ω.2 i a) ∈ s}) := by congr 1; ext; simp; grind
  _ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      𝔓 {ω | ∑ i ∈ range k, ω.2 i a ∈ Prod.mk k ⁻¹' s} := measure_biUnion_finset_le _ _
  _ = ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    congr with k
    have : (𝔓).map (fun ω ↦ ∑ i ∈ range k, ω.2 i a) =
        (streamMeasure ν).map (fun ω ↦ ∑ i ∈ range k, ω i a) :=
      (identDistrib_sum_range_snd a k).map_eq
    rw [Measure.ext_iff] at this
    specialize this (Prod.mk k ⁻¹' s) (hs.preimage (by fun_prop))
    rwa [Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop)),
      Measure.map_apply (by fun_prop) (hs.preimage (by fun_prop))] at this

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

LemmaBandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (n : ℕ) {s : Set ℕ}
  [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s) {B : Set ℝ}
  (hB : MeasurableSet B) :
  (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg) a n ω ∈ s ∧
          Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ s,
      (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Bandits.ArrayModel.prob_pullCount_mem_and_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (n : ℕ) {s : Set ℕ}
  [DecidablePred fun x => x ∈ s]
  (hs : MeasurableSet s) {B : Set ℝ}
  (hB : MeasurableSet B) :
  (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg) a
              n ω ∈
            s ∧
          Learning.sumRewards (action alg)
              (reward alg) a n ω ∈
            B} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ s,
      (streamMeasure ν)
        {ω |
          ∑ i ∈ Finset.range k, ω i a ∈ B}

Code

lemma prob_pullCount_mem_and_sumRewards_mem_le (a : 𝓐) (n : ℕ)
    {s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
    𝔓 {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}

Type uses (9)

Body uses (1)

prob_pullCount_prod_sumRewards_mem_le

Actions: Source · Open Issue

Proof

by
  classical
  rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
  · simp [h_empty]
  convert prob_pullCount_prod_sumRewards_mem_le a n (hs.prod hB) (ν := ν) (alg := alg)
    with _ _ _ k hk
  · rfl
  · ext n
    have : ∃ x, x ∈ B := h_nonempty
    simp [this]
  · ext x
    simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
      exists_eq_right, mem_filter, mem_range] at hk
    simp [hk.2.1]

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

LemmaBandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ℕ) {B : Set ℝ}
  (hB : MeasurableSet B) :
  (arrayMeasure ν)
      {ω |
        ∃ n,
          Learning.pullCount (action alg) a n ω = m ∧
            Learning.sumRewards (action alg) (reward alg) a n ω ∈ B} ≤
    (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Bandits.ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (m : ℕ) {B : Set ℝ}
  (hB : MeasurableSet B) :
  (arrayMeasure ν)
      {ω |
        ∃ n,
          Learning.pullCount (action alg)
                a n ω =
              m ∧
            Learning.sumRewards
                (action alg) (reward alg)
                a n ω ∈
              B} ≤
    (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m, ω i a ∈ B}

Code

lemma prob_exists_pullCount_eq_and_sumRewards_mem_le (a : 𝓐) (m : ℕ) {B : Set ℝ}
    (hB : MeasurableSet B) : 𝔓 {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}

Type uses (9)

Body uses (2)

Used by (1)

prob_exists_pullCount_eq_and_sumRewards_mem_le

Actions: Source · Open Issue

Proof

calc
    _ ≤ 𝔓 {ω | ∑ i ∈ range m, ω.2 i a ∈ B} := by
        apply measure_mono
        intro ω ⟨n, hp, hn⟩
        rwa [sumRewards_eq alg a n ω, hp] at hn
    _ = streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B} :=
        (identDistrib_sum_range_snd a m).measure_mem_eq hB

`prob_sumRewards_le_sumRewards_le`🔗

LemmaBandits.ArrayModel.prob_sumRewards_le_sumRewards_le

Details

No docstring.

theorem

Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐)
  (n m₁ m₂ : ℕ) :
  (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧
          Learning.pullCount (action alg) a n ω = m₂ ∧
            Learning.sumRewards (action alg) (reward alg) (bestArm ν) n
                ω ≤
              Learning.sumRewards (action alg) (reward alg) a n ω} ≤
    (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}
Bandits.ArrayModel.prob_sumRewards_le_sumRewards_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
  (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg)
              (bestArm ν) n ω =
            m₁ ∧
          Learning.pullCount (action alg)
                a n ω =
              m₂ ∧
            Learning.sumRewards
                (action alg) (reward alg)
                (bestArm ν) n ω ≤
              Learning.sumRewards
                (action alg) (reward alg)
                a n ω} ≤
    (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁,
            ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}

Code

lemma prob_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
    (𝔓) {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      streamMeasure ν
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}

Type uses (10)

Body uses (2)

Used by (1)

probReal_sumRewards_le_sumRewards_le

Actions: Source · Open Issue

Proof

by
  simp_rw [sumRewards_eq]
  calc 𝔓 {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
      ∑ i ∈ range (pullCount A (bestArm ν) n ω), ω.2 i (bestArm ν) ≤
        ∑ i ∈ range (pullCount A a n ω), ω.2 i a}
  _ ≤ 𝔓 ((fun ω ↦ (∑ i ∈ range m₁, ω.2 i (bestArm ν), ∑ i ∈ range m₂, ω.2 i a)) ⁻¹'
        {p | p.1 ≤ p.2}) := by
      refine measure_mono fun ω hω ↦ ?_
      simp only [Set.preimage_setOf_eq, Set.mem_setOf_eq] at hω ⊢
      grind
  _ = streamMeasure ν
      {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a} := by
    rw [← Measure.snd_prod (μ := (Measure.infinitePi fun (_ : ℕ) ↦ (volume : Measure unitInterval)))
      (ν := streamMeasure ν), Measure.snd, Measure.map_apply (by fun_prop)]
    · rfl
    simp only [measurableSet_setOf]
    fun_prop

`probReal_sumRewards_le_sumRewards_le`🔗

LemmaBandits.ArrayModel.probReal_sumRewards_le_sumRewards_le

Details

No docstring.

theorem

Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐} [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [Fintype 𝓐] (a : 𝓐)
  (n m₁ m₂ : ℕ) :
  MeasureTheory.Measure.real (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg) (bestArm ν) n ω = m₁ ∧
          Learning.pullCount (action alg) a n ω = m₂ ∧
            Learning.sumRewards (action alg) (reward alg) (bestArm ν) n
                ω ≤
              Learning.sumRewards (action alg) (reward alg) a n ω} ≤
    MeasureTheory.Measure.real (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}
Bandits.ArrayModel.probReal_sumRewards_le_sumRewards_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  [DecidableEq 𝓐] [Countable 𝓐]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
  MeasureTheory.Measure.real
      (arrayMeasure ν)
      {ω |
        Learning.pullCount (action alg)
              (bestArm ν) n ω =
            m₁ ∧
          Learning.pullCount (action alg)
                a n ω =
              m₂ ∧
            Learning.sumRewards
                (action alg) (reward alg)
                (bestArm ν) n ω ≤
              Learning.sumRewards
                (action alg) (reward alg)
                a n ω} ≤
    MeasureTheory.Measure.real
      (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁,
            ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}

Code

lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] (a : 𝓐) (n m₁ m₂ : ℕ) :
    (𝔓).real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      (streamMeasure ν).real
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}

Type uses (10)

Body uses (2)

Used by (1)

probReal_sumRewards_le_sumRewards_le

Actions: Source · Open Issue

Proof

by
  simp_rw [measureReal_def]
  gcongr
  · finiteness
  · exact prob_sumRewards_le_sumRewards_le a n m₁ m₂

`sumRewards_eq_comp`🔗

LemmaBandits.sumRewards_eq_comp

Details

No docstring.

theorem

Bandits.sumRewards_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} :
  Learning.sumRewards A R a n =
    (fun p =>
        ∑ i ∈ Finset.range n,
          if Prod.fst (p i) = a then Prod.snd (p i) else 0) ∘
      Learning.trajectory A R
Bandits.sumRewards_eq_comp.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} :
  Learning.sumRewards A R a n =
    (fun p =>
        ∑ i ∈ Finset.range n,
          if Prod.fst (p i) = a then
            Prod.snd (p i)
          else 0) ∘
      Learning.trajectory A R

Code

lemma sumRewards_eq_comp :
    sumRewards A R a n =
     (fun p ↦ ∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0) ∘ (trajectory A R)

Type uses (2)

Used by (2)

Actions: Source · Open Issue

Proof

by
  ext
  simp [sumRewards, trajectory]

`pullCount_eq_comp`🔗

LemmaBandits.pullCount_eq_comp

Details

No docstring.

theorem

Bandits.pullCount_eq_comp.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} :
  Learning.pullCount A a n =
    (fun p =>
        ∑ i ∈ Finset.range n, if Prod.fst (p i) = a then 1 else 0) ∘
      Learning.trajectory A R
Bandits.pullCount_eq_comp.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ} {n : ℕ} {a : 𝓐} :
  Learning.pullCount A a n =
    (fun p =>
        ∑ i ∈ Finset.range n,
          if Prod.fst (p i) = a then 1
          else 0) ∘
      Learning.trajectory A R

Code

lemma pullCount_eq_comp :
    pullCount A a n =
      (fun p ↦ ∑ i ∈ range n, if (p i).1 = a then 1 else 0) ∘ (trajectory A R)

Type uses (2)

Used by (1)

law_pullCount_sumRewards_unique'

Actions: Source · Open Issue

Proof

by
  ext
  simp [pullCount, trajectory]

`law_sumRewards_unique`🔗

LemmaLearning.IsAlgEnvSeq.law_sumRewards_unique

Details

No docstring.

theorem

Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1, u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐}
  [MeasurableSingletonClass 𝓐]
  (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
  (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
  MeasureTheory.Measure.map (sumRewards A R a n) P =
    MeasureTheory.Measure.map (sumRewards A₂ R₂ a n) P'
Learning.IsAlgEnvSeq.law_sumRewards_unique.{u_1,
    u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P']
  {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ}
  {n : ℕ} {a : 𝓐}
  [MeasurableSingletonClass 𝓐]
  (h1 :
    IsAlgEnvSeq A R alg (stationaryEnv ν)
      P)
  (h2 :
    IsAlgEnvSeq A₂ R₂ alg
      (stationaryEnv ν) P') :
  MeasureTheory.Measure.map
      (sumRewards A R a n) P =
    MeasureTheory.Measure.map
      (sumRewards A₂ R₂ a n) P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_sumRewards_unique [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    P.map (sumRewards A R a n) = P'.map (sumRewards A₂ R₂ a n)

Type uses (4)

Body uses (5)

Actions: Source · Open Issue

Proof

by
  have hA := h1.measurable_action
  have hR := h1.measurable_feedback
  have hA2 := h2.measurable_action
  have hR2 := h2.measurable_feedback
  have h_unique := isAlgEnvSeq_unique h1 h2
  rw [sumRewards_eq_comp, sumRewards_eq_comp, ← Measure.map_map, h_unique, Measure.map_map,
    ← sumRewards_eq_comp]
  · refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
    exact (measurableSet_singleton _).preimage (by fun_prop)
  · fun_prop
  · refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
    exact (measurableSet_singleton _).preimage (by fun_prop)
  · fun_prop

`law_pullCount_sumRewards_unique'`🔗

LemmaLearning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'

Details

No docstring.

theorem

Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1, u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ}
  [MeasurableSingletonClass 𝓐]
  (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
  (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
  ProbabilityTheory.IdentDistrib
    (fun ω a => (pullCount A a n ω, sumRewards A R a n ω))
    (fun ω a => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique'.{u_1,
    u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P']
  {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ}
  {n : ℕ} [MeasurableSingletonClass 𝓐]
  (h1 :
    IsAlgEnvSeq A R alg (stationaryEnv ν)
      P)
  (h2 :
    IsAlgEnvSeq A₂ R₂ alg
      (stationaryEnv ν) P') :
  ProbabilityTheory.IdentDistrib
    (fun ω a =>
      (pullCount A a n ω,
        sumRewards A R a n ω))
    (fun ω a =>
      (pullCount A₂ a n ω,
        sumRewards A₂ R₂ a n ω))
    P P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique' [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    IdentDistrib (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      (fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P P'

Type uses (5)

Body uses (7)

Used by (2)

Actions: Source · Open Issue

Proof

by
  have hA := h1.measurable_action
  have hR := h1.measurable_feedback
  have hA2 := h2.measurable_action
  have hR2 := h2.measurable_feedback
  constructor
  · refine Measurable.aemeasurable ?_
    rw [measurable_pi_iff]
    exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA hR _ _)
  · refine Measurable.aemeasurable ?_
    rw [measurable_pi_iff]
    exact fun a ↦ Measurable.prod (by fun_prop) (measurable_sumRewards hA2 hR2 _ _)
  have h_unique := isAlgEnvSeq_unique h1 h2
  let f := fun (p : ℕ → 𝓐 × ℝ ) (a : 𝓐) ↦ (∑ i ∈ range n, if (p i).1 = a then 1 else 0,
    ∑ i ∈ range n, if (p i).1 = a then (p i).2 else 0)
  have hf : Measurable f := by
    rw [measurable_pi_iff]
    intro a
    refine Measurable.prod ?_ ?_
    · simp only [f]
      refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
      exact (measurableSet_singleton _).preimage (by fun_prop)
    · simp only [f]
      refine measurable_sum _ fun i hi ↦ Measurable.ite ?_ (by fun_prop) (by fun_prop)
      exact (measurableSet_singleton _).preimage (by fun_prop)
  have h_eq_comp : (fun ω a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      = f ∘ (trajectory A R) := by
    ext ω a : 2
    rw [pullCount_eq_comp (R := R), sumRewards_eq_comp]
    grind
  have h_eq_comp2 : (fun ω a ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))
      = f ∘ (trajectory A₂ R₂) := by
    ext ω a : 2
    rw [pullCount_eq_comp (R := R₂), sumRewards_eq_comp]
    grind
  rw [h_eq_comp, h_eq_comp2, ← Measure.map_map hf, h_unique, Measure.map_map hf,
    ← h_eq_comp2]
  · rw [measurable_pi_iff]
    exact fun n ↦ Measurable.prodMk (hA2 n) (hR2 n)
  · rw [measurable_pi_iff]
    exact fun n ↦ Measurable.prodMk (hA n) (hR n)

`law_pullCount_sumRewards_unique`🔗

LemmaLearning.IsAlgEnvSeq.law_pullCount_sumRewards_unique

Details

No docstring.

theorem

Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1, u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} {n : ℕ} {a : 𝓐}
  [MeasurableSingletonClass 𝓐]
  (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
  (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
  MeasureTheory.Measure.map
      (fun ω => (pullCount A a n ω, sumRewards A R a n ω)) P =
    MeasureTheory.Measure.map
      (fun ω => (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω)) P'
Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique.{u_1,
    u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P']
  {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ}
  {n : ℕ} {a : 𝓐}
  [MeasurableSingletonClass 𝓐]
  (h1 :
    IsAlgEnvSeq A R alg (stationaryEnv ν)
      P)
  (h2 :
    IsAlgEnvSeq A₂ R₂ alg
      (stationaryEnv ν) P') :
  MeasureTheory.Measure.map
      (fun ω =>
        (pullCount A a n ω,
          sumRewards A R a n ω))
      P =
    MeasureTheory.Measure.map
      (fun ω =>
        (pullCount A₂ a n ω,
          sumRewards A₂ R₂ a n ω))
      P'

Code

lemma _root_.Learning.IsAlgEnvSeq.law_pullCount_sumRewards_unique [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
      P'.map (fun ω ↦ (pullCount A₂ a n ω, sumRewards A₂ R₂ a n ω))

Type uses (5)

Body uses (1)

law_pullCount_sumRewards_unique'

Used by (1)

prob_pullCount_prod_sumRewards_mem_le

Actions: Source · Open Issue

Proof

((h1.law_pullCount_sumRewards_unique' h2 (n := n)).comp (u := fun f ↦ f a) (by fun_prop)).map_eq

`identDistrib_pullCount_sumRewards`🔗

LemmaLearning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards

Details

No docstring.

theorem

Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1, u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2} {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P'] {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ} [MeasurableSingletonClass 𝓐]
  (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
  (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
  ProbabilityTheory.IdentDistrib
    (fun ω n a => (pullCount A a n ω, sumRewards A R a n ω))
    (fun ω' n a => (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'
Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards.{u_1,
    u_2, u_3}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {Ω' : Type u_3} [DecidableEq 𝓐]
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {mΩ' : MeasurableSpace Ω'}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {P' : MeasureTheory.Measure Ω'}
  [MeasureTheory.IsProbabilityMeasure P']
  {alg : Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {A₂ : ℕ → Ω' → 𝓐} {R₂ : ℕ → Ω' → ℝ}
  [MeasurableSingletonClass 𝓐]
  (h1 :
    IsAlgEnvSeq A R alg (stationaryEnv ν)
      P)
  (h2 :
    IsAlgEnvSeq A₂ R₂ alg
      (stationaryEnv ν) P') :
  ProbabilityTheory.IdentDistrib
    (fun ω n a =>
      (pullCount A a n ω,
        sumRewards A R a n ω))
    (fun ω' n a =>
      (pullCount A₂ a n ω',
        sumRewards A₂ R₂ a n ω'))
    P P'

Code

lemma _root_.Learning.IsAlgEnvSeq.identDistrib_pullCount_sumRewards [MeasurableSingletonClass 𝓐]
    (h1 : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (h2 : IsAlgEnvSeq A₂ R₂ alg (stationaryEnv ν) P') :
    IdentDistrib (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω))
      (fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) P P'

Type uses (5)

Body uses (2)

Used by (1)

prob_exists_pullCount_eq_and_sumRewards_mem_le

Actions: Source · Open Issue

Proof

by
  let f (τ : ℕ → 𝓐 × ℝ) (n : ℕ) (a : 𝓐) : ℕ × ℝ :=
    (∑ i ∈ range n, if (τ i).1 = a then 1 else 0,
     ∑ i ∈ range n, if (τ i).1 = a then (τ i).2 else 0)
  have hc1 : (fun ω n a ↦ (pullCount A a n ω, sumRewards A R a n ω)) =
      f ∘ (trajectory A R) := by
    ext ω n a : 3
    simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
    rfl
  have hc2 : (fun ω' n a ↦ (pullCount A₂ a n ω', sumRewards A₂ R₂ a n ω')) =
      f ∘ (trajectory A₂ R₂) := by
    ext ω' n a : 3
    simp_rw [Function.comp, f, pullCount, card_filter, sumRewards, trajectory]
    rfl
  have hf : Measurable f := by
    simp_rw [f, measurable_pi_iff]
    intro n a
    apply Measurable.prod
    · dsimp only
      exact measurable_sum _
        (fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
    · dsimp only
      exact measurable_sum _
        (fun _ _ ↦ Measurable.ite (by measurability) (by fun_prop) (by fun_prop))
  rw [hc1, hc2]
  exact (h1.identDistrib_trajectory h2).comp hf

`prob_pullCount_prod_sumRewards_mem_le`🔗

LemmaBandits.prob_pullCount_prod_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {s : Set (ℕ × ℝ)} [DecidablePred fun x => x ∈ Prod.fst '' s]
  (hs : MeasurableSet s) :
  P
      {ω |
        (Learning.pullCount A a n ω, Learning.sumRewards A R a n ω) ∈
          s} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν)
        {ω | ∑ i ∈ Finset.range k, ω i a ∈ Prod.mk k ⁻¹' s}
Bandits.prob_pullCount_prod_sumRewards_mem_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {s : Set (ℕ × ℝ)}
  [DecidablePred fun x =>
      x ∈ Prod.fst '' s]
  (hs : MeasurableSet s) :
  P
      {ω |
        (Learning.pullCount A a n ω,
            Learning.sumRewards A R a n
              ω) ∈
          s} ≤
    ∑ k ∈ Finset.range (n + 1) with
      k ∈ Prod.fst '' s,
      (streamMeasure ν)
        {ω |
          ∑ i ∈ Finset.range k, ω i a ∈
            Prod.mk k ⁻¹' s}

Code

lemma prob_pullCount_prod_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {s : Set (ℕ × ℝ)} [DecidablePred (· ∈ Prod.fst '' s)] (hs : MeasurableSet s) :
    P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s}

Type uses (6)

Body uses (14)

Used by (3)

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  calc P {ω | (pullCount A a n ω, sumRewards A R a n ω) ∈ s}
  _ = (P.map (fun ω ↦ (pullCount A a n ω, sumRewards A R a n ω))) s := by
      rw [Measure.map_apply (by fun_prop) hs]; rfl
  _ = ((ArrayModel.arrayMeasure ν).map
      (fun ω ↦ (pullCount (ArrayModel.action alg) a n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω))) s := by
    rw [h.law_pullCount_sumRewards_unique (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)]
  _ = (ArrayModel.arrayMeasure ν) {ω | (pullCount (ArrayModel.action alg) a n ω,
      sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω) ∈ s} := by
    rw [Measure.map_apply (by fun_prop) hs]; rfl
  _ ≤ ∑ k ∈ (range (n + 1)).filter (· ∈ Prod.fst '' s),
      streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    ArrayModel.prob_pullCount_prod_sumRewards_mem_le a n hs

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

LemmaBandits.prob_pullCount_mem_and_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {s : Set ℕ} [DecidablePred fun x => x ∈ s] (hs : MeasurableSet s)
  {B : Set ℝ} (hB : MeasurableSet B) :
  P
      {ω |
        Learning.pullCount A a n ω ∈ s ∧
          Learning.sumRewards A R a n ω ∈ B} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ s,
      (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Bandits.prob_pullCount_mem_and_sumRewards_mem_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {s : Set ℕ}
  [DecidablePred fun x => x ∈ s]
  (hs : MeasurableSet s) {B : Set ℝ}
  (hB : MeasurableSet B) :
  P
      {ω |
        Learning.pullCount A a n ω ∈ s ∧
          Learning.sumRewards A R a n ω ∈
            B} ≤
    ∑ k ∈ Finset.range (n + 1) with k ∈ s,
      (streamMeasure ν)
        {ω |
          ∑ i ∈ Finset.range k, ω i a ∈ B}

Code

lemma prob_pullCount_mem_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {s : Set ℕ} [DecidablePred (· ∈ s)] (hs : MeasurableSet s) {B : Set ℝ} (hB : MeasurableSet B) :
    P {ω | pullCount A a n ω ∈ s ∧ sumRewards A R a n ω ∈ B} ≤
      ∑ k ∈ (range (n + 1)).filter (· ∈ s),
        streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}

Type uses (6)

Body uses (1)

prob_pullCount_prod_sumRewards_mem_le

Used by (2)

Actions: Source · Open Issue

Proof

by
  classical
  rcases Set.eq_empty_or_nonempty B with h_empty | h_nonempty
  · simp [h_empty]
  convert prob_pullCount_prod_sumRewards_mem_le h (hs.prod hB) (ν := ν) (alg := alg) with _ _ _ k hk
  · rfl
  · ext n
    have : ∃ x, x ∈ B := h_nonempty
    simp [this]
  · ext x
    simp only [Set.mem_image, Set.mem_prod, Prod.exists, exists_and_right, exists_and_left,
      exists_eq_right, mem_filter, mem_range] at hk
    simp [hk.2.1]

`prob_sumRewards_mem_le`🔗

LemmaBandits.prob_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.prob_sumRewards_mem_le.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {B : Set ℝ} (hB : MeasurableSet B) :
  P (Learning.sumRewards A R a n ⁻¹' B) ≤
    ∑ k ∈ Finset.range (n + 1),
      (streamMeasure ν) {ω | ∑ i ∈ Finset.range k, ω i a ∈ B}
Bandits.prob_sumRewards_mem_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {B : Set ℝ} (hB : MeasurableSet B) :
  P (Learning.sumRewards A R a n ⁻¹' B) ≤
    ∑ k ∈ Finset.range (n + 1),
      (streamMeasure ν)
        {ω |
          ∑ i ∈ Finset.range k, ω i a ∈ B}

Code

lemma prob_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {B : Set ℝ} (hB : MeasurableSet B) :
    P (sumRewards A R a n ⁻¹' B) ≤
      ∑ k ∈ range (n + 1), streamMeasure ν {ω | ∑ i ∈ range k, ω i a ∈ B}

Type uses (5)

Body uses (2)

Actions: Source · Open Issue

Proof

by
  classical
  have h_le := prob_pullCount_mem_and_sumRewards_mem_le h .univ hB (a := a) (n := n)
  simp only [Set.mem_univ, true_and, filter_true] at h_le
  convert h_le
  rfl

`prob_pullCount_eq_and_sumRewards_mem_le`🔗

LemmaBandits.prob_pullCount_eq_and_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) :
  P
      {ω |
        Learning.pullCount A a n ω = m ∧
          Learning.sumRewards A R a n ω ∈ B} ≤
    (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Bandits.prob_pullCount_eq_and_sumRewards_mem_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {m : ℕ} (hm : m ≤ n) {B : Set ℝ}
  (hB : MeasurableSet B) :
  P
      {ω |
        Learning.pullCount A a n ω = m ∧
          Learning.sumRewards A R a n ω ∈
            B} ≤
    (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m, ω i a ∈ B}

Code

lemma prob_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    {m : ℕ} (hm : m ≤ n) {B : Set ℝ} (hB : MeasurableSet B) :
    P {ω | pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}

Type uses (6)

Body uses (1)

prob_pullCount_mem_and_sumRewards_mem_le

Actions: Source · Open Issue

Proof

by
  have h_le := prob_pullCount_mem_and_sumRewards_mem_le h (s := {m}) (by simp) hB (a := a) (n := n)
  have hm' : m < n + 1 := by lia
  simpa [hm'] using h_le

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

LemmaBandits.prob_exists_pullCount_eq_and_sumRewards_mem_le

Details

No docstring.

theorem

Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  [Nonempty 𝓐] [Countable 𝓐] [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) {B : Set ℝ} (hB : MeasurableSet B) :
  P
      {ω |
        ∃ n,
          Learning.pullCount A a n ω = m ∧
            Learning.sumRewards A R a n ω ∈ B} ≤
    (streamMeasure ν) {ω | ∑ i ∈ Finset.range m, ω i a ∈ B}
Bandits.prob_exists_pullCount_eq_and_sumRewards_mem_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) {B : Set ℝ}
  (hB : MeasurableSet B) :
  P
      {ω |
        ∃ n,
          Learning.pullCount A a n ω = m ∧
            Learning.sumRewards A R a n
                ω ∈
              B} ≤
    (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m, ω i a ∈ B}

Code

lemma prob_exists_pullCount_eq_and_sumRewards_mem_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m : ℕ) {B : Set ℝ}
    (hB : MeasurableSet B) :
    P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B} ≤
      streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B}

Type uses (6)

Body uses (9)

Used by (2)

Actions: Source · Open Issue

Proof

let s := {p : ℕ → 𝓐 → ℕ × ℝ | ∃ n, (p n a).1 = m ∧ (p n a).2 ∈ B}
  have : s = ⋃ n, (fun p ↦ p n a) ⁻¹' ({m} ×ˢ B) := by
    ext p
    simp [s]
  have hs : MeasurableSet s := by measurability
  calc P {ω | ∃ n, pullCount A a n ω = m ∧ sumRewards A R a n ω ∈ B}
    _ = (ArrayModel.arrayMeasure ν) {ω | ∃ n, pullCount (ArrayModel.action alg) a n ω = m ∧
            sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω ∈ B} :=
        (h.identDistrib_pullCount_sumRewards
          (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)).measure_mem_eq hs
    _ ≤ _ := ArrayModel.prob_exists_pullCount_eq_and_sumRewards_mem_le a m hB

`probReal_sumRewards_le_sumRewards_le`🔗

LemmaBandits.probReal_sumRewards_le_sumRewards_le

Details

No docstring.

theorem

Bandits.probReal_sumRewards_le_sumRewards_le.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m₁ m₂ : ℕ) :
  MeasureTheory.Measure.real P
      {ω |
        Learning.pullCount A (bestArm ν) n ω = m₁ ∧
          Learning.pullCount A a n ω = m₂ ∧
            Learning.sumRewards A R (bestArm ν) n ω ≤
              Learning.sumRewards A R a n ω} ≤
    MeasureTheory.Measure.real (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁, ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}
Bandits.probReal_sumRewards_le_sumRewards_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  [Nonempty 𝓐] [Fintype 𝓐]
  [MeasurableSingletonClass 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m₁ m₂ : ℕ) :
  MeasureTheory.Measure.real P
      {ω |
        Learning.pullCount A (bestArm ν) n
              ω =
            m₁ ∧
          Learning.pullCount A a n ω =
              m₂ ∧
            Learning.sumRewards A R
                (bestArm ν) n ω ≤
              Learning.sumRewards A R a n
                ω} ≤
    MeasureTheory.Measure.real
      (streamMeasure ν)
      {ω |
        ∑ i ∈ Finset.range m₁,
            ω i (bestArm ν) ≤
          ∑ i ∈ Finset.range m₂, ω i a}

Code

lemma probReal_sumRewards_le_sumRewards_le [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (a : 𝓐) (n m₁ m₂ : ℕ) :
    P.real {ω | pullCount A (bestArm ν) n ω = m₁ ∧ pullCount A a n ω = m₂ ∧
        sumRewards A R (bestArm ν) n ω ≤ sumRewards A R a n ω} ≤
      (streamMeasure ν).real
        {ω | ∑ i ∈ range m₁, ω i (bestArm ν) ≤ ∑ i ∈ range m₂, ω i a}

Type uses (7)

Body uses (14)

Used by (1)

probReal_sumRewards_le_sumRewards_le

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  refine le_trans (le_of_eq ?_)
    (ArrayModel.probReal_sumRewards_le_sumRewards_le (alg := alg) a n m₁ m₂)
  let s := {p : ℕ × ℕ × ℝ × ℝ | p.1 = m₁ ∧ p.2.1 = m₂ ∧ p.2.2.1 ≤ p.2.2.2}
  have hs : MeasurableSet s := by simp only [measurableSet_setOf, s]; fun_prop
  change P.real ((fun ω ↦ (pullCount A (bestArm ν) n ω,
      pullCount A a n ω, sumRewards A R (bestArm ν) n ω, sumRewards A R a n ω)) ⁻¹' s) =
    (ArrayModel.arrayMeasure ν).real
      ((fun ω ↦ (pullCount (ArrayModel.action alg) (bestArm ν) n ω,
        pullCount (ArrayModel.action alg) a n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) (bestArm ν) n ω,
        sumRewards (ArrayModel.action alg) (ArrayModel.reward alg) a n ω)) ⁻¹' s)
  simp_rw [measureReal_def]
  congr 1
  rw [← Measure.map_apply ?_ hs, ← Measure.map_apply (by fun_prop) hs]
  swap
  · refine Measurable.prod (by fun_prop) (Measurable.prod (by fun_prop) ?_)
    exact (measurable_sumRewards hA hR _ _).prod (measurable_sumRewards hA hR _ _)
  congr 1
  refine IdentDistrib.map_eq ?_
  have h_eq := h.law_pullCount_sumRewards_unique' (ArrayModel.isAlgEnvSeq_arrayMeasure alg ν)
    (n := n)
  exact h_eq.comp (u := fun p ↦ ((p (bestArm ν)).1, (p a).1, (p (bestArm ν)).2, (p a).2))
    (by fun_prop)

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF`🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF

Details

No docstring.

theorem

Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal}
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
  (streamMeasure ν)
      {ω | ε ≤ ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤
    ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {a : 𝓐} {σ2 : NNReal}
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
  (streamMeasure ν)
      {ω |
        ε ≤
          ∑ k ∈ Finset.range n,
            (ω k a -
              ∫ (x : ℝ), id x ∂ν a)} ≤
    ENNReal.ofReal
      (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))

Code

lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
    streamMeasure ν {ω | ε ≤ ∑ k ∈ range n, (ω k a - (ν a)[id])} ≤
      ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))

Type uses (1)

streamMeasure

Body uses (3)

Used by (1)

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'

Actions: Source · Open Issue

Proof

by
  rw [← ofReal_measureReal]
  gcongr
  apply HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun _ _ hε
  · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
  · intro _ _
    exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF`🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF

Details

No docstring.

theorem

Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {a : 𝓐} {σ2 : NNReal}
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
  (streamMeasure ν)
      {ω | ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤ -ε} ≤
    ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {a : 𝓐} {σ2 : NNReal}
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
  (streamMeasure ν)
      {ω |
        ∑ k ∈ Finset.range n,
            (ω k a -
              ∫ (x : ℝ), id x ∂ν a) ≤
          -ε} ≤
    ENNReal.ofReal
      (Real.exp (-ε ^ 2 / (2 * ↑n * ↑σ2)))

Code

lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF {σ2 : ℝ≥0}
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {ε : ℝ} (hε : 0 ≤ ε) (n : ℕ) :
    streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤ -ε} ≤
      ENNReal.ofReal (Real.exp (-ε ^ 2 / (2 * n * σ2)))

Type uses (1)

streamMeasure

Body uses (4)

Used by (1)

prob_sum_range_sub_le_le_of_HasSubgaussianMGF'

Actions: Source · Open Issue

Proof

by
  rw [← ofReal_measureReal]
  gcongr
  apply HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun _ _ hε
  · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun _ x ↦ x - (ν a)[id]) (by fun_prop)
  · intro _ _
    exact h.congr_identDistrib ((identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _)

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'`🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'

Details

No docstring.

theorem

Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal}
  (hσ2 : 0 < σ2)
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
  (streamMeasure ν)
      {ω |
        √(2 * ↑n * ↑σ2 * Real.log (1 / δ)) ≤
          ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a)} ≤
    ENNReal.ofReal δ
Bandits.StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {n : ℕ} {a : 𝓐} {σ2 : NNReal}
  (hσ2 : 0 < σ2)
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
  (streamMeasure ν)
      {ω |
        √(2 * ↑n * ↑σ2 *
              Real.log (1 / δ)) ≤
          ∑ k ∈ Finset.range n,
            (ω k a -
              ∫ (x : ℝ), id x ∂ν a)} ≤
    ENNReal.ofReal δ

Code

lemma prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
    streamMeasure ν {ω | √(2 * n * σ2 * Real.log (1 / δ)) ≤
      ∑ k ∈ range n, (ω k a - (ν a)[id])} ≤ ENNReal.ofReal δ

Type uses (1)

streamMeasure

Body uses (1)

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF

Used by (1)

prob_sumRewards_sub_pullCount_mul_ge_le

Actions: Source · Open Issue

Proof

calc
  _ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
    prob_sum_range_sub_ge_le_of_HasSubgaussianMGF h (by positivity) n
  _ ≤ ENNReal.ofReal δ := by
    gcongr
    exact exp_neg_sqrt_sq_div_le hσ2 hδ hn

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF'`🔗

LemmaBandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'

Details

No docstring.

theorem

Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {a : 𝓐} {σ2 : NNReal}
  (hσ2 : 0 < σ2)
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
  (streamMeasure ν)
      {ω |
        ∑ k ∈ Finset.range n, (ω k a - ∫ (x : ℝ), id x ∂ν a) ≤
          -√(2 * ↑n * ↑σ2 * Real.log (1 / δ))} ≤
    ENNReal.ofReal δ
Bandits.StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF'.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {n : ℕ} {a : 𝓐} {σ2 : NNReal}
  (hσ2 : 0 < σ2)
  (h :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
  (streamMeasure ν)
      {ω |
        ∑ k ∈ Finset.range n,
            (ω k a -
              ∫ (x : ℝ), id x ∂ν a) ≤
          -√(2 * ↑n * ↑σ2 *
                Real.log (1 / δ))} ≤
    ENNReal.ofReal δ

Code

lemma prob_sum_range_sub_le_le_of_HasSubgaussianMGF' {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (h : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) {δ : ℝ} (hδ : 0 < δ) (hn : 0 < n) :
    streamMeasure ν {ω | ∑ k ∈ range n, (ω k a - (ν a)[id]) ≤
      -√(2 * n * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal δ

Type uses (1)

streamMeasure

Body uses (1)

prob_sum_range_sub_le_le_of_HasSubgaussianMGF

Used by (1)

prob_sumRewards_sub_pullCount_mul_le_le

Actions: Source · Open Issue

Proof

calc
  _ ≤ ENNReal.ofReal (Real.exp (-√(2 * n * σ2 * Real.log (1 / δ)) ^ 2 / (2 * n * σ2))) :=
    prob_sum_range_sub_le_le_of_HasSubgaussianMGF h (by positivity) n
  _ ≤ ENNReal.ofReal δ := by
    gcongr
    exact exp_neg_sqrt_sq_div_le hσ2 hδ hn

`prob_sumRewards_sub_pullCount_mul_ge_le`🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_ge_le

Details

No docstring.

theorem

Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2)
  (ha :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ t < n,
          Learning.pullCount A a t ω ≠ 0 ∧
            √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 *
                  Real.log (1 / δ)) ≤
              Learning.sumRewards A R a t ω -
                ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a} ≤
    ENNReal.ofReal ((↑n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (ha :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ t < n,
          Learning.pullCount A a t ω ≠ 0 ∧
            √(2 *
                      ↑(Learning.pullCount
                          A a t ω) *
                    ↑σ2 *
                  Real.log (1 / δ)) ≤
              Learning.sumRewards A R a t
                  ω -
                ↑(Learning.pullCount A a t
                      ω) *
                  ∫ (x : ℝ), id x ∂ν a} ≤
    ENNReal.ofReal ((↑n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_ge_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧ √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
      sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤ ENNReal.ofReal ((n - 1) * δ)

Type uses (5)

Body uses (4)

Used by (1)

prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype

Actions: Source · Open Issue

Proof

let B (m : ℕ) := {x : ℝ | √(2 * m * σ2 * Real.log (1 / δ)) ≤ x - m * (ν a)[id]}
  calc
    _ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
            sumRewards A R a t ω ∈ B m}) := by
        apply measure_mono
        intro ω ⟨t, ht, hp, hb⟩
        have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
          (pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
        exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
          sumRewards A R a t ω ∈ B m} :=
        measure_biUnion_finset_le _ _
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
        sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
        apply sum_le_sum
        exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
      apply sum_le_sum
      intro m hm
      exact le_of_eq_of_le (by simp [B])
        (StreamMeasure.prob_sum_range_sub_ge_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      by_cases hn : n = 0
      · simp [hn, hδ.le]
      · rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
          Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
        ring_nf

`prob_sumRewards_sub_pullCount_mul_le_le`🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_le_le

Details

No docstring.

theorem

Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐] {σ2 : NNReal} (hσ2 : 0 < σ2)
  (ha :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ t < n,
          Learning.pullCount A a t ω ≠ 0 ∧
            Learning.sumRewards A R a t ω -
                ↑(Learning.pullCount A a t ω) * ∫ (x : ℝ), id x ∂ν a ≤
              -√(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 *
                    Real.log (1 / δ))} ≤
    ENNReal.ofReal ((↑n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_le_le.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  {a : 𝓐} [Nonempty 𝓐] [Countable 𝓐]
  [MeasurableSingletonClass 𝓐]
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (ha :
    ProbabilityTheory.HasSubgaussianMGF
      (fun x => x - ∫ (x : ℝ), id x ∂ν a)
      σ2 (ν a))
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ t < n,
          Learning.pullCount A a t ω ≠ 0 ∧
            Learning.sumRewards A R a t
                  ω -
                ↑(Learning.pullCount A a t
                      ω) *
                  ∫ (x : ℝ), id x ∂ν a ≤
              -√(2 *
                        ↑(Learning.pullCount
                            A a t ω) *
                      ↑σ2 *
                    Real.log (1 / δ))} ≤
    ENNReal.ofReal ((↑n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_le_le [Countable 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (ha : HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
      sumRewards A R a t ω - pullCount A a t ω * (ν a)[id] ≤
        -√(2 * pullCount A a t ω * σ2 * Real.log (1 / δ))} ≤ ENNReal.ofReal ((n - 1) * δ)

Type uses (5)

Body uses (4)

Used by (1)

prob_empMean_bestAction_sub_actionMean_le_le

Actions: Source · Open Issue

Proof

let B (m : ℕ) := {x : ℝ | x - m * (ν a)[id] ≤ -√(2 * m * σ2 * Real.log (1 / δ))}
  calc
    _ ≤ P (⋃ m ∈ Icc 1 (n - 1), {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
            sumRewards A R a t ω ∈ B m}) := by
        apply measure_mono
        intro ω ⟨t, ht, hp, hb⟩
        have hm : pullCount A a t ω ∈ Icc 1 (n - 1) := mem_Icc.mpr ⟨Nat.one_le_iff_ne_zero.mpr hp,
          (pullCount_le a t ω).trans (Nat.le_sub_one_of_lt ht)⟩
        exact Set.mem_biUnion hm ⟨t, ht, rfl, hb⟩
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, t < n ∧ pullCount A a t ω = m ∧
          sumRewards A R a t ω ∈ B m} :=
        measure_biUnion_finset_le _ _
    _ ≤ ∑ m ∈ Icc 1 (n - 1), P {ω | ∃ t, pullCount A a t ω = m ∧ sumRewards A R a t ω ∈ B m} :=
        sum_le_sum (fun _ _ ↦ measure_mono (fun _ ⟨t, _, hps⟩ ↦ ⟨t, hps⟩))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), streamMeasure ν {ω | ∑ i ∈ range m, ω i a ∈ B m} := by
        apply sum_le_sum
        exact (fun m _ ↦ prob_exists_pullCount_eq_and_sumRewards_mem_le h a m (by measurability))
    _ ≤ ∑ m ∈ Icc 1 (n - 1), ENNReal.ofReal δ := by
      apply sum_le_sum
      intro m hm
      exact le_of_eq_of_le (by simp [B])
        (StreamMeasure.prob_sum_range_sub_le_le_of_HasSubgaussianMGF' hσ2 ha hδ (mem_Icc.mp hm).1)
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      by_cases hn : n = 0
      · simp [hn, hδ.le]
      · rw [sum_const, Nat.card_Icc, add_tsub_cancel_right, ← ENNReal.ofReal_nsmul, nsmul_eq_mul,
          Nat.cast_sub (Nat.one_le_iff_ne_zero.mpr hn)]
        ring_nf

`prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype`🔗

LemmaBandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype

Details

No docstring.

theorem

Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2} [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {n : ℕ} [Nonempty 𝓐] [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ a,
          ∃ t < n,
            Learning.pullCount A a t ω ≠ 0 ∧
              √(2 * ↑(Learning.pullCount A a t ω) * ↑σ2 *
                    Real.log (1 / δ)) ≤
                Learning.sumRewards A R a t ω -
                  ↑(Learning.pullCount A a t ω) *
                    ∫ (x : ℝ), id x ∂ν a} ≤
    ENNReal.ofReal (↑(Fintype.card 𝓐) * (↑n - 1) * δ)
Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  [DecidableEq 𝓐] {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {n : ℕ}
  [Nonempty 𝓐] [Fintype 𝓐]
  [MeasurableSingletonClass 𝓐]
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  {δ : ℝ} (hδ : 0 < δ) :
  P
      {ω |
        ∃ a,
          ∃ t < n,
            Learning.pullCount A a t ω ≠
                0 ∧
              √(2 *
                        ↑(Learning.pullCount
                            A a t ω) *
                      ↑σ2 *
                    Real.log (1 / δ)) ≤
                Learning.sumRewards A R a
                    t ω -
                  ↑(Learning.pullCount A a
                        t ω) *
                    ∫ (x : ℝ),
                      id x ∂ν a} ≤
    ENNReal.ofReal
      (↑(Fintype.card 𝓐) * (↑n - 1) * δ)

Code

lemma prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype [Fintype 𝓐] [MeasurableSingletonClass 𝓐]
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2) (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) {δ : ℝ} (hδ : 0 < δ) :
    P {ω | ∃ a, ∃ t < n, pullCount A a t ω ≠ 0 ∧
        √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
          sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} ≤
            ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ)

Type uses (5)

Body uses (1)

prob_sumRewards_sub_pullCount_mul_ge_le

Used by (1)

prob_empMean_sub_actionMean_ge_le

Actions: Source · Open Issue

Proof

calc
    _ ≤ ∑ a, P {ω | ∃ t < n, pullCount A a t ω ≠ 0 ∧
                √(2 * pullCount A a t ω * σ2 * Real.log (1 / δ)) ≤
                  sumRewards A R a t ω - pullCount A a t ω * (ν a)[id]} := by
        rw [Set.setOf_exists]
        exact measure_iUnion_fintype_le _ _
    _ ≤ ∑ a, ENNReal.ofReal ((n - 1) * δ) :=
        sum_le_sum fun a _ ↦ prob_sumRewards_sub_pullCount_mul_ge_le hσ2 (hν a) h hδ
    _ = ENNReal.ofReal (Fintype.card 𝓐 * (n - 1) * δ) := by
      rw [sum_const, Finset.card_univ, ← ENNReal.ofReal_nsmul, nsmul_eq_mul]
      ring_nf

`probReal_sum_le_sum_streamMeasure`🔗

LemmaBandits.probReal_sum_le_sum_streamMeasure

Details

No docstring.

theorem

Bandits.probReal_sum_le_sum_streamMeasure.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [Nonempty 𝓐] [Fintype 𝓐]
  {c : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) c (ν a))
  (a : 𝓐) (m : ℕ) :
  MeasureTheory.Measure.real (streamMeasure ν)
      {ω |
        ∑ s ∈ Finset.range m, ω s (bestArm ν) ≤
          ∑ s ∈ Finset.range m, ω s a} ≤
    Real.exp (-↑m * gap ν a ^ 2 / (4 * ↑c))
Bandits.probReal_sum_le_sum_streamMeasure.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [Nonempty 𝓐] [Fintype 𝓐] {c : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        c (ν a))
  (a : 𝓐) (m : ℕ) :
  MeasureTheory.Measure.real
      (streamMeasure ν)
      {ω |
        ∑ s ∈ Finset.range m,
            ω s (bestArm ν) ≤
          ∑ s ∈ Finset.range m, ω s a} ≤
    Real.exp
      (-↑m * gap ν a ^ 2 / (4 * ↑c))

Code

lemma probReal_sum_le_sum_streamMeasure [Fintype 𝓐] {c : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) c (ν a)) (a : 𝓐) (m : ℕ) :
    (streamMeasure ν).real
        {ω | ∑ s ∈ range m, ω s (bestArm ν) ≤ ∑ s ∈ range m, ω s a} ≤
      Real.exp (-↑m * gap ν a ^ 2 / (4 * c))

Type uses (3)

Body uses (9)

Used by (1)

probReal_sumRewards_le_sumRewards_le

Actions: Source · Open Issue

Proof

by
  by_cases ha : a = bestArm ν
  · simp [ha]
  refine (HasSubgaussianMGF.measure_sum_le_sum_le' (cX := fun _ ↦ c) (cY := fun _ ↦ c)
    ?_ ?_ ?_ ?_ ?_ ?_).trans_eq ?_
  · exact iIndepFun_eval_streamMeasure'' ν (bestArm ν)
  · exact iIndepFun_eval_streamMeasure'' ν a
  · intro i him
    simp_rw [integral_eval_streamMeasure]
    refine (hν (bestArm ν)).congr_identDistrib ?_
    exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  · intro i him
    simp_rw [integral_eval_streamMeasure]
    refine (hν a).congr_identDistrib ?_
    exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  · exact indepFun_eval_streamMeasure' ν (Ne.symm ha)
  · gcongr 1 with i him
    simp_rw [integral_eval_streamMeasure]
    exact le_bestArm a
  · congr 1
    simp_rw [integral_eval_streamMeasure]
    simp only [id_eq, sum_const, card_range, nsmul_eq_mul, NNReal.coe_mul, NNReal.coe_natCast,
      gap_eq_bestArm_sub, neg_mul]
    field_simp
    ring

`prob_sum_le_sqrt_log`🔗

LemmaBandits.prob_sum_le_sqrt_log

Details

No docstring.

theorem

Bandits.prob_sum_le_sqrt_log.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a) ≤
          -√(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1))} ≤
    1 / (↑n + 1) ^ c
Bandits.prob_sum_le_sqrt_log.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {n : ℕ} {σ2 : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c)
  (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        ∑ s ∈ Finset.range k,
            (ω s a -
              ∫ (x : ℝ), id x ∂ν a) ≤
          -√(2 * c * ↑k * ↑σ2 *
                Real.log (↑n + 1))} ≤
    1 / (↑n + 1) ^ c

Code

lemma prob_sum_le_sqrt_log {σ2 : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))} ≤
      1 / (n + 1) ^ c

Type uses (1)

streamMeasure

Body uses (4)

Used by (1)

prob_avg_add_sqrt_log_le

Actions: Source · Open Issue

Proof

by
  calc
    streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * Real.log (n + 1))}
  _ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
    rw [← ofReal_measureReal]
    gcongr
    refine (HasSubgaussianMGF.measure_sum_range_le_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
    · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
        (fun _ ↦ by fun_prop)
    · intro i him
      refine (hν a).congr_identDistrib ?_
      exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  _ = 1 / (n + 1) ^ c := by
    rw [Real.sq_sqrt]
    swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
    field_simp
    rw [← Real.log_rpow (by positivity), ← Real.log_inv,
      Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
      ← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
    norm_cast

`prob_sum_ge_sqrt_log`🔗

LemmaBandits.prob_sum_ge_sqrt_log

Details

No docstring.

theorem

Bandits.prob_sum_ge_sqrt_log.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {n : ℕ} {σ2 : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        √(2 * c * ↑k * ↑σ2 * Real.log (↑n + 1)) ≤
          ∑ s ∈ Finset.range k, (ω s a - ∫ (x : ℝ), id x ∂ν a)} ≤
    1 / (↑n + 1) ^ c
Bandits.prob_sum_ge_sqrt_log.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {n : ℕ} {σ2 : NNReal}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c)
  (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        √(2 * c * ↑k * ↑σ2 *
              Real.log (↑n + 1)) ≤
          ∑ s ∈ Finset.range k,
            (ω s a -
              ∫ (x : ℝ), id x ∂ν a)} ≤
    1 / (↑n + 1) ^ c

Code

lemma prob_sum_ge_sqrt_log {σ2 : ℝ≥0}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) {c : ℝ} (hc : 0 ≤ c) (a : 𝓐) (k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} ≤
      1 / (n + 1) ^ c

Type uses (1)

streamMeasure

Body uses (3)

Used by (1)

prob_avg_sub_sqrt_log_ge

Actions: Source · Open Issue

Proof

by
  calc
    streamMeasure ν
      {ω | √(2 * c * k * σ2 * Real.log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))}
  _ ≤ ENNReal.ofReal (Real.exp (-(√(2 * c * k * σ2 * Real.log (n + 1))) ^ 2 / (2 * k * σ2))) := by
    rw [← ofReal_measureReal]
    gcongr
    refine (HasSubgaussianMGF.measure_sum_range_ge_le_of_iIndepFun (c := σ2) ?_ ?_ (by positivity))
    · exact (iIndepFun_eval_streamMeasure'' ν a).comp (fun i ω ↦ ω - (ν a)[id])
        (fun _ ↦ by fun_prop)
    · intro i him
      refine (hν a).congr_identDistrib ?_
      exact (identDistrib_eval_eval_id_streamMeasure _ _ _).symm.sub_const _
  _ = 1 / (n + 1) ^ c := by
    rw [Real.sq_sqrt]
    swap; · exact mul_nonneg (by positivity) (Real.log_nonneg (by simp))
    field_simp
    rw [← Real.log_rpow (by positivity), ← Real.log_inv,
      Real.exp_log (by positivity), one_div, ENNReal.ofReal_inv_of_pos (by positivity),
      ← ENNReal.ofReal_rpow_of_nonneg (by positivity) (by positivity)]
    norm_cast

`prob_avg_add_sqrt_log_le`🔗

LemmaBandits.prob_avg_add_sqrt_log_le

Details

No docstring.

theorem

Bandits.prob_avg_add_sqrt_log_le.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        (∑ m ∈ Finset.range k, ω m a) / ↑k +
            √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k) ≤
          ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ c
Bandits.prob_avg_add_sqrt_log_le.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {σ2 : NNReal} {c : ℝ}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐)
  (n k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        (∑ m ∈ Finset.range k, ω m a) /
              ↑k +
            √(2 * c * ↑σ2 *
                  Real.log (↑n + 1) /
                ↑k) ≤
          ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ c

Code

lemma prob_avg_add_sqrt_log_le {σ2 : ℝ≥0} {c : ℝ}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
    (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ c

Type uses (1)

streamMeasure

Body uses (1)

prob_sum_le_sqrt_log

Used by (1)

prob_ucbIndex_le

Actions: Source · Open Issue

Proof

by
  have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
  calc
    streamMeasure ν {ω | (∑ m ∈ range k, ω m a) / k + √(2 * c * σ2 * log (n + 1) / k) ≤ (ν a)[id]}
  _ = streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) / k ≤ - √(2 * c * σ2 * log (n + 1) / k)} := by
    congr with ω
    field_simp
    rw [Finset.sum_sub_distrib]
    simp
    grind
  _ = streamMeasure ν
      {ω | (∑ s ∈ range k, (ω s a - (ν a)[id])) ≤ - √(2 * c * k * σ2 * log (n + 1))} := by
    congr with ω
    field_simp
    congr! 2
    rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
      mul_assoc (k : ℝ), mul_assoc (k : ℝ), mul_assoc (k : ℝ),
      sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
  _ ≤ 1 / (n + 1) ^ c := prob_sum_le_sqrt_log hν hσ2 hc a k hk

`prob_avg_sub_sqrt_log_ge`🔗

LemmaBandits.prob_avg_sub_sqrt_log_ge

Details

No docstring.

theorem

Bandits.prob_avg_sub_sqrt_log_ge.{u_1} {𝓐 : Type u_1}
  {m𝓐 : MeasurableSpace 𝓐} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {σ2 : NNReal} {c : ℝ}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        ∫ (x : ℝ), id x ∂ν a ≤
          (∑ m ∈ Finset.range k, ω m a) / ↑k -
            √(2 * c * ↑σ2 * Real.log (↑n + 1) / ↑k)} ≤
    1 / (↑n + 1) ^ c
Bandits.prob_avg_sub_sqrt_log_ge.{u_1}
  {𝓐 : Type u_1} {m𝓐 : MeasurableSpace 𝓐}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {σ2 : NNReal} {c : ℝ}
  (hν :
    ∀ (a : 𝓐),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : 𝓐)
  (n k : ℕ) (hk : k ≠ 0) :
  (streamMeasure ν)
      {ω |
        ∫ (x : ℝ), id x ∂ν a ≤
          (∑ m ∈ Finset.range k, ω m a) /
              ↑k -
            √(2 * c * ↑σ2 *
                  Real.log (↑n + 1) /
                ↑k)} ≤
    1 / (↑n + 1) ^ c

Code

lemma prob_avg_sub_sqrt_log_ge {σ2 : ℝ≥0} {c : ℝ}
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a)) (hσ2 : σ2 ≠ 0)
    (hc : 0 ≤ c) (a : 𝓐) (n k : ℕ) (hk : k ≠ 0) :
    streamMeasure ν
        {ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 *log (n + 1) / k)} ≤
      1 / (n + 1) ^ c

Type uses (1)

streamMeasure

Body uses (1)

prob_sum_ge_sqrt_log

Used by (1)

prob_ucbIndex_ge

Actions: Source · Open Issue

Proof

by
  have h_log_nonneg : 0 ≤ log (n + 1) := log_nonneg (by simp)
  calc
    streamMeasure ν {ω | (ν a)[id] ≤ (∑ m ∈ range k, ω m a) / k - √(2 * c * σ2 * log (n + 1) / k)}
  _ = streamMeasure ν
      {ω | √(2 * c * σ2 * log (n + 1) / k) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id])) / k} := by
    congr with ω
    field_simp
    rw [Finset.sum_sub_distrib]
    simp
    grind
  _ = streamMeasure ν
      {ω | √(2 * c * k * σ2 * log (n + 1)) ≤ (∑ s ∈ range k, (ω s a - (ν a)[id]))} := by
    congr with ω
    field_simp
    congr! 1
    rw [sqrt_div (by positivity), ← mul_div_assoc, mul_comm, mul_div_assoc, div_sqrt,
      mul_comm _ (k : ℝ), sqrt_mul (x := (k : ℝ)) (by positivity), mul_comm]
  _ ≤ 1 / (n + 1) ^ c := prob_sum_ge_sqrt_log hν hσ2 hc a k hk

`prob_empMean_sub_actionMean_ge_le`🔗

LemmaLearning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le

Details

No docstring.

theorem

Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1, u_2}
  {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω]
  {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔}
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ}
  {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a)))
  {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
  P
      {ω |
        ∃ t < n,
          ∃ a,
            pullCount A a t ω ≠ 0 ∧
              √(2 * ↑σ2 * Real.log (1 / δ) / ↑(pullCount A a t ω)) ≤
                empMean A R a t ω - actionMean κ E a ω} ≤
    ENNReal.ofReal (↑K * (↑n - 1) * δ)
Learning.IsBayesAlgEnvSeq.prob_empMean_sub_actionMean_ge_le.{u_1,
    u_2}
  {𝓔 : Type u_1} {Ω : Type u_2}
  [MeasurableSpace 𝓔] [MeasurableSpace Ω]
  {K : ℕ} [Nonempty (Fin K)]
  {Q : MeasureTheory.Measure 𝓔}
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  (h : IsBayesAlgEnvSeq Q κ alg E A R P)
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂κ (e, a))
        σ2 (κ (e, a)))
  {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
  P
      {ω |
        ∃ t < n,
          ∃ a,
            pullCount A a t ω ≠ 0 ∧
              √(2 * ↑σ2 *
                      Real.log (1 / δ) /
                    ↑(pullCount A a t
                        ω)) ≤
                empMean A R a t ω -
                  actionMean κ E a ω} ≤
    ENNReal.ofReal (↑K * (↑n - 1) * δ)

Code

lemma prob_empMean_sub_actionMean_ge_le (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : ℝ≥0}
    (hσ2 : 0 < σ2) (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
    {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
    P {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧
      √(2 * σ2 * Real.log (1 / δ) / pullCount A a t ω) ≤ empMean A R a t ω - actionMean κ E a ω}
      ≤ ENNReal.ofReal (K * (n - 1) * δ)

Type uses (5)

Body uses (14)

Used by (1)

integral_sum_range_ucb_action_sub_actionMean_action_le

Actions: Source · Open Issue

Proof

by
  have := h.measurable_param
  have := h.measurable_action
  have := h.measurable_feedback
  let S := {(e, τ) | ∃ a, ∃ t < n, pullCount IT.action a t τ ≠ 0 ∧
    √(2 * pullCount IT.action a t τ * σ2 * Real.log (1 / δ)) ≤
      sumRewards IT.action IT.feedback a t τ - pullCount IT.action a t τ * actionMean κ id a e}
  calc
    _ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
        rw [Measure.map_apply (by fun_prop) (by measurability)]
        apply measure_mono
        intro ω ⟨t, ht, a, hpc, hle⟩
        rw [empMean] at hle
        exact ⟨a, t, ht, hpc, sqrt_two_mul_le_sub hpc hle⟩
    _ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
        rw [← compProd_map_condDistrib (by fun_prop)]
    _ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
        Measure.compProd_apply (by measurability)
    _ ≤ ∫⁻ e, ENNReal.ofReal (Fintype.card (Fin K) * (n - 1) * δ) ∂(P.map E) := by
        apply lintegral_mono_ae
        rw [h.hasLaw_env.map_eq]
        filter_upwards [h.ae_IsAlgEnvSeq] with e he
        exact Bandits.prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype hσ2 (hs e) he hδ
    _ = ENNReal.ofReal (K * (n - 1) * δ) := by
      simp [Measure.map_apply h.measurable_param]

`prob_empMean_bestAction_sub_actionMean_le_le`🔗

LemmaLearning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le

Details

No docstring.

theorem

Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1,
    u_2}
  {𝓔 : Type u_1} {Ω : Type u_2} [MeasurableSpace 𝓔] [MeasurableSpace Ω]
  {K : ℕ} [Nonempty (Fin K)] {Q : MeasureTheory.Measure 𝓔}
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {alg : Algorithm (Fin K) ℝ}
  {E : Ω → 𝓔} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  (h : IsBayesAlgEnvSeq Q κ alg E A R P) {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) σ2 (κ (e, a)))
  {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
  P
      {ω |
        ∃ t < n,
          pullCount A (bestAction κ E ω) t ω ≠ 0 ∧
            empMean A R (bestAction κ E ω) t ω -
                actionMean κ E (bestAction κ E ω) ω ≤
              -√(2 * ↑σ2 * Real.log (1 / δ) /
                    ↑(pullCount A (bestAction κ E ω) t ω))} ≤
    ENNReal.ofReal ((↑n - 1) * δ)
Learning.IsBayesAlgEnvSeq.prob_empMean_bestAction_sub_actionMean_le_le.{u_1,
    u_2}
  {𝓔 : Type u_1} {Ω : Type u_2}
  [MeasurableSpace 𝓔] [MeasurableSpace Ω]
  {K : ℕ} [Nonempty (Fin K)]
  {Q : MeasureTheory.Measure 𝓔}
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {alg : Algorithm (Fin K) ℝ} {E : Ω → 𝓔}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  (h : IsBayesAlgEnvSeq Q κ alg E A R P)
  {σ2 : NNReal} (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂κ (e, a))
        σ2 (κ (e, a)))
  {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
  P
      {ω |
        ∃ t < n,
          pullCount A (bestAction κ E ω) t
                ω ≠
              0 ∧
            empMean A R (bestAction κ E ω)
                  t ω -
                actionMean κ E
                  (bestAction κ E ω) ω ≤
              -√(2 * ↑σ2 *
                      Real.log (1 / δ) /
                    ↑(pullCount A
                        (bestAction κ E ω)
                        t ω))} ≤
    ENNReal.ofReal ((↑n - 1) * δ)

Code

lemma prob_empMean_bestAction_sub_actionMean_le_le (h : IsBayesAlgEnvSeq Q κ alg E A R P)
    {σ2 : ℝ≥0} (hσ2 : 0 < σ2)
    (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) σ2 (κ (e, a)))
    {δ : ℝ} (hδ : 0 < δ) (n : ℕ) :
    P {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧
        empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤
          -√(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))}
      ≤ ENNReal.ofReal ((n - 1) * δ)

Type uses (6)

Body uses (15)

Used by (1)

integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le

Actions: Source · Open Issue

Proof

by
  have := h.measurable_param
  have := h.measurable_action
  have := h.measurable_feedback
  let S := {(e, τ) | ∃ t < n, pullCount IT.action (bestAction κ id e) t τ ≠ 0 ∧
    sumRewards IT.action IT.feedback (bestAction κ id e) t τ -
        pullCount IT.action (bestAction κ id e) t τ * actionMean κ id (bestAction κ id e) e ≤
          -√(2 * pullCount IT.action (bestAction κ id e) t τ * σ2 * Real.log (1 / δ))}
  calc
    _ ≤ (P.map (fun ω ↦ (E ω, trajectory A R ω))) S := by
        rw [Measure.map_apply (by fun_prop) (by measurability)]
        apply measure_mono
        intro ω ⟨t, ht, hpc, hle⟩
        rw [empMean] at hle
        exact ⟨t, ht, hpc, sub_le_neg_sqrt_two_mul hpc hle⟩
    _ = (P.map E ⊗ₘ condDistrib (trajectory A R) E P) S := by
        rw [← compProd_map_condDistrib (by fun_prop)]
    _ = ∫⁻ e, condDistrib (trajectory A R) E P e (Prod.mk e ⁻¹' S) ∂(P.map E) :=
        Measure.compProd_apply (by measurability)
    _ ≤ ∫⁻ e, ENNReal.ofReal ((n - 1) * δ) ∂(P.map E) := by
        apply lintegral_mono_ae
        rw [h.hasLaw_env.map_eq]
        filter_upwards [h.ae_IsAlgEnvSeq] with e he
        exact Bandits.prob_sumRewards_sub_pullCount_mul_le_le (ν := κ.sectR e) hσ2 (hs e _) he
          hδ
    _ = ENNReal.ofReal ((n - 1) * δ) := by
      simp [Measure.map_apply h.measurable_param]

LeanMachineLearning exposition

2.2. Online.Bandit.SumRewards🔗

`identDistrib_sum_range_snd`🔗

`prob_pullCount_prod_sumRewards_mem_le`🔗

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

`prob_sumRewards_le_sumRewards_le`🔗

`probReal_sumRewards_le_sumRewards_le`🔗

`sumRewards_eq_comp`🔗

`pullCount_eq_comp`🔗

`law_sumRewards_unique`🔗

`law_pullCount_sumRewards_unique'`🔗

`law_pullCount_sumRewards_unique`🔗

`identDistrib_pullCount_sumRewards`🔗

`prob_pullCount_prod_sumRewards_mem_le`🔗

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

`prob_sumRewards_mem_le`🔗

`prob_pullCount_eq_and_sumRewards_mem_le`🔗

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

`probReal_sumRewards_le_sumRewards_le`🔗

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF`🔗

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF`🔗

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'`🔗

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF'`🔗

`prob_sumRewards_sub_pullCount_mul_ge_le`🔗

`prob_sumRewards_sub_pullCount_mul_le_le`🔗

`prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype`🔗

`probReal_sum_le_sum_streamMeasure`🔗

`prob_sum_le_sqrt_log`🔗

`prob_sum_ge_sqrt_log`🔗

`prob_avg_add_sqrt_log_le`🔗

`prob_avg_sub_sqrt_log_ge`🔗

`prob_empMean_sub_actionMean_ge_le`🔗

`prob_empMean_bestAction_sub_actionMean_le_le`🔗

2.2. Online.Bandit.SumRewards🔗

identDistrib_sum_range_snd🔗

prob_pullCount_prod_sumRewards_mem_le🔗

prob_pullCount_mem_and_sumRewards_mem_le🔗

prob_exists_pullCount_eq_and_sumRewards_mem_le🔗

prob_sumRewards_le_sumRewards_le🔗

probReal_sumRewards_le_sumRewards_le🔗

sumRewards_eq_comp🔗

pullCount_eq_comp🔗

law_sumRewards_unique🔗

law_pullCount_sumRewards_unique'🔗

law_pullCount_sumRewards_unique🔗

identDistrib_pullCount_sumRewards🔗

prob_pullCount_prod_sumRewards_mem_le🔗

prob_pullCount_mem_and_sumRewards_mem_le🔗

prob_sumRewards_mem_le🔗

prob_pullCount_eq_and_sumRewards_mem_le🔗

prob_exists_pullCount_eq_and_sumRewards_mem_le🔗

probReal_sumRewards_le_sumRewards_le🔗

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF🔗

prob_sum_range_sub_le_le_of_HasSubgaussianMGF🔗

prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'🔗

prob_sum_range_sub_le_le_of_HasSubgaussianMGF'🔗

prob_sumRewards_sub_pullCount_mul_ge_le🔗

prob_sumRewards_sub_pullCount_mul_le_le🔗

prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype🔗

probReal_sum_le_sum_streamMeasure🔗

prob_sum_le_sqrt_log🔗

prob_sum_ge_sqrt_log🔗

prob_avg_add_sqrt_log_le🔗

prob_avg_sub_sqrt_log_ge🔗

prob_empMean_sub_actionMean_ge_le🔗

prob_empMean_bestAction_sub_actionMean_le_le🔗

`identDistrib_sum_range_snd`🔗

`prob_pullCount_prod_sumRewards_mem_le`🔗

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

`prob_sumRewards_le_sumRewards_le`🔗

`probReal_sumRewards_le_sumRewards_le`🔗

`sumRewards_eq_comp`🔗

`pullCount_eq_comp`🔗

`law_sumRewards_unique`🔗

`law_pullCount_sumRewards_unique'`🔗

`law_pullCount_sumRewards_unique`🔗

`identDistrib_pullCount_sumRewards`🔗

`prob_pullCount_prod_sumRewards_mem_le`🔗

`prob_pullCount_mem_and_sumRewards_mem_le`🔗

`prob_sumRewards_mem_le`🔗

`prob_pullCount_eq_and_sumRewards_mem_le`🔗

`prob_exists_pullCount_eq_and_sumRewards_mem_le`🔗

`probReal_sumRewards_le_sumRewards_le`🔗

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF`🔗

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF`🔗

`prob_sum_range_sub_ge_le_of_HasSubgaussianMGF'`🔗

`prob_sum_range_sub_le_le_of_HasSubgaussianMGF'`🔗

`prob_sumRewards_sub_pullCount_mul_ge_le`🔗

`prob_sumRewards_sub_pullCount_mul_le_le`🔗

`prob_sumRewards_sub_pullCount_mul_ge_le_of_Fintype`🔗

`probReal_sum_le_sum_streamMeasure`🔗

`prob_sum_le_sqrt_log`🔗

`prob_sum_ge_sqrt_log`🔗

`prob_avg_add_sqrt_log_le`🔗

`prob_avg_sub_sqrt_log_ge`🔗

`prob_empMean_sub_actionMean_ge_le`🔗

`prob_empMean_bestAction_sub_actionMean_le_le`🔗