LeanMachineLearning exposition

2.8. Online.Bandit.Algorithms.UCB🔗

UCB algorithm

Module LeanMachineLearning.Online.Bandit.Algorithms.UCB contains 32 exposed declarations.

ucbWidth'🔗

DefinitionBandits.ucbWidth'

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

🔗def
Bandits.ucbWidth' {K : } (c : ) (n : ) (h : (Finset.Iic n) Fin K × ) (a : Fin K) :
Bandits.ucbWidth' {K : } (c : ) (n : ) (h : (Finset.Iic n) Fin K × ) (a : Fin K) :

Code

noncomputable def ucbWidth' (c : ℝ) (n : ℕ) (h : Iic n → Fin K × ℝ) (a : Fin K) : ℝ :=
  √(2 * c * log (n + 2) / pullCount' n h a)
Body uses (1)
Used by (6)

Actions: Source · Open Issue

nextArm🔗

DefinitionBandits.UCB.nextArm

Arm pulled by the UCB algorithm at time n + 1.

🔗def
Bandits.UCB.nextArm {K : } (hK : 0 < K) (c : ) (n : ) (h : (Finset.Iic n) Fin K × ) : Fin K
Bandits.UCB.nextArm {K : } (hK : 0 < K) (c : ) (n : ) (h : (Finset.Iic n) Fin K × ) : Fin K

Code

noncomputable
def UCB.nextArm (hK : 0 < K) (c : ℝ) (n : ℕ) (h : Iic n → Fin K × ℝ) : Fin K :=
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  if n < K - 1 then RoundRobin.nextAction hK n else
  argmax (fun a ↦ empMean' n h a + ucbWidth' c n h a)
Body uses (4)
Used by (7)

Actions: Source · Open Issue

measurable_nextArm🔗

LemmaBandits.UCB.measurable_nextArm

No docstring.

🔗theorem
Bandits.UCB.measurable_nextArm {K : } (hK : 0 < K) (c : ) (n : ) : Measurable (nextArm hK c n)
Bandits.UCB.measurable_nextArm {K : } (hK : 0 < K) (c : ) (n : ) : Measurable (nextArm hK c n)

Code

lemma UCB.measurable_nextArm (hK : 0 < K) (c : ℝ) (n : ℕ) : Measurable (nextArm hK c n)
Type uses (1)
Body uses (9)
Used by (3)

Actions: Source · Open Issue

Proof
by
  refine Measurable.ite (by simp) (by fun_prop) ?_
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  unfold ucbWidth'
  fun_prop

ucbAlgorithm🔗

DefinitionBandits.ucbAlgorithm

The UCB algorithm.

🔗def
Bandits.ucbAlgorithm {K : } (hK : 0 < K) (c : ) : Learning.Algorithm (Fin K)
Bandits.ucbAlgorithm {K : } (hK : 0 < K) (c : ) : Learning.Algorithm (Fin K)

Code

noncomputable
def ucbAlgorithm (hK : 0 < K) (c : ℝ) : Algorithm (Fin K) ℝ :=
  detAlgorithm (UCB.nextArm hK c) (by fun_prop) ⟨0, hK⟩
Type uses (1)
Body uses (3)
Used by (16)

Actions: Source · Open Issue

isAlgEnvSeqUntil_roundRobinAlgorithm🔗

LemmaBandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm

Until round K - 1, the UCB algorithm behaves like the Round-Robin algorithm.

🔗theorem
Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : Learning.IsAlgEnvSeqUntil A R (Learning.roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1)
Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : Learning.IsAlgEnvSeqUntil A R (Learning.roundRobinAlgorithm hK) (Learning.stationaryEnv ν) P (K - 1)

Code

lemma isAlgEnvSeqUntil_roundRobinAlgorithm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (stationaryEnv ν) P (K - 1) where
  measurable_action
Type uses (5)
Body uses (10)
Used by (3)

Actions: Source · Open Issue

Proof
h.measurable_action
  measurable_feedback := h.measurable_feedback
  hasLaw_action_zero := h.hasLaw_action_zero
  hasCondDistrib_feedback_zero := h.hasCondDistrib_feedback_zero
  hasCondDistrib_action n hn := by
    convert h.hasCondDistrib_action n using 1
    simp only [roundRobinAlgorithm, detAlgorithm_policy, ucbAlgorithm]
    congr 1 with h
    simp [UCB.nextArm, hn]
  hasCondDistrib_feedback n _ := h.hasCondDistrib_feedback n

ucbWidth🔗

DefinitionBandits.UCB.ucbWidth

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

🔗def
Bandits.UCB.ucbWidth.{u_1} {K : } {Ω : Type u_1} (A : Ω Fin K) (c : ) (a : Fin K) (n : ) (ω : Ω) :
Bandits.UCB.ucbWidth.{u_1} {K : } {Ω : Type u_1} (A : Ω Fin K) (c : ) (a : Fin K) (n : ) (ω : Ω) :

Code

noncomputable def ucbWidth (A : ℕ → Ω → Fin K) (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ :=
  √(2 * c * log (n + 1) / pullCount A a n ω)
Body uses (1)
Used by (16)

Actions: Source · Open Issue

measurable_ucbWidth🔗

LemmaBandits.UCB.measurable_ucbWidth

No docstring.

🔗theorem
Bandits.UCB.measurable_ucbWidth.{u_1} {K : } {Ω : Type u_1} { : MeasurableSpace Ω} {A : Ω Fin K} {n : } (hA : (n : ), Measurable (A n)) (c : ) (a : Fin K) : Measurable (ucbWidth A c a n)
Bandits.UCB.measurable_ucbWidth.{u_1} {K : } {Ω : Type u_1} { : MeasurableSpace Ω} {A : Ω Fin K} {n : } (hA : (n : ), Measurable (A n)) (c : ) (a : Fin K) : Measurable (ucbWidth A c a n)

Code

lemma measurable_ucbWidth (hA : ∀ n, Measurable (A n)) (c : ℝ) (a : Fin K) :
    Measurable (ucbWidth A c a n)
Type uses (1)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  unfold ucbWidth
  fun_prop

ucbWidth_eq_ucbWidth'🔗

LemmaBandits.UCB.ucbWidth_eq_ucbWidth'

No docstring.

🔗theorem
Bandits.UCB.ucbWidth_eq_ucbWidth'.{u_1} {K : } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } (c : ) (a : Fin K) (n : ) (ω : Ω) (hn : n 0) : ucbWidth A c a n ω = ucbWidth' c (n - 1) (Learning.history A R (n - 1) ω) a
Bandits.UCB.ucbWidth_eq_ucbWidth'.{u_1} {K : } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } (c : ) (a : Fin K) (n : ) (ω : Ω) (hn : n 0) : ucbWidth A c a n ω = ucbWidth' c (n - 1) (Learning.history A R (n - 1) ω) a

Code

lemma ucbWidth_eq_ucbWidth' (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) (hn : n ≠ 0) :
    ucbWidth A c a n ω = ucbWidth' c (n - 1) (history A R (n - 1) ω) a
Type uses (3)
Body uses (3)
Used by (1)

Actions: Source · Open Issue

Proof
by
  simp only [ucbWidth, pullCount_eq_pullCount' (A := A) (R' := R) hn, Nat.cast_nonneg, sqrt_div',
    ucbWidth']
  congr 4
  norm_cast
  grind

arm_zero🔗

LemmaBandits.UCB.arm_zero

No docstring.

🔗theorem
Bandits.UCB.arm_zero.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : A 0 =ᵐ[P] fun x => 0, hK
Bandits.UCB.arm_zero.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : A 0 =ᵐ[P] fun x => 0, hK

Code

lemma arm_zero (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    A 0 =ᵐ[P] fun _ ↦ ⟨0, hK⟩
Type uses (3)
Body uses (4)
Used by (2)

Actions: Source · Open Issue

Proof
RoundRobin.action_zero ((isAlgEnvSeqUntil_roundRobinAlgorithm h).mono zero_le)

arm_ae_eq_ucbNextArm🔗

LemmaBandits.UCB.arm_ae_eq_ucbNextArm

No docstring.

🔗theorem
Bandits.UCB.arm_ae_eq_ucbNextArm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (n : ) : A (n + 1) =ᵐ[P] fun ω => nextArm hK c n (Learning.history A R n ω)
Bandits.UCB.arm_ae_eq_ucbNextArm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (n : ) : A (n + 1) =ᵐ[P] fun ω => nextArm hK c n (Learning.history A R n ω)

Code

lemma arm_ae_eq_ucbNextArm (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (n : ℕ) :
    A (n + 1) =ᵐ[P] fun ω ↦ nextArm hK c n (history A R n ω)
Type uses (5)
Body uses (3)
Used by (3)

Actions: Source · Open Issue

Proof
by
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  exact h.action_detAlgorithm_ae_eq n

arm_ae_all_eq🔗

LemmaBandits.UCB.arm_ae_all_eq

No docstring.

🔗theorem
Bandits.UCB.arm_ae_all_eq.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, A 0 h = 0, hK (n : ), A (n + 1) h = nextArm hK c n (Learning.history A R n h)
Bandits.UCB.arm_ae_all_eq.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, A 0 h = 0, hK (n : ), A (n + 1) h = nextArm hK c n (Learning.history A R n h)

Code

lemma arm_ae_all_eq (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P, A 0 h = ⟨0, hK⟩ ∧ ∀ n, A (n + 1) h = nextArm hK c n (history A R n h)
Type uses (5)
Body uses (2)

Actions: Source · Open Issue

Proof
by
  rw [eventually_and, ae_all_iff]
  exact ⟨arm_zero h, arm_ae_eq_ucbNextArm h⟩

ucbIndex_le_ucbIndex_arm🔗

LemmaBandits.UCB.ucbIndex_le_ucbIndex_arm

No docstring.

🔗theorem
Bandits.UCB.ucbIndex_le_ucbIndex_arm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {n : } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (hn : K n) : ∀ᵐ (h : Ω) P, Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Bandits.UCB.ucbIndex_le_ucbIndex_arm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {n : } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (hn : K n) : ∀ᵐ (h : Ω) P, Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

Code

lemma ucbIndex_le_ucbIndex_arm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) (hn : K ≤ n) :
    ∀ᵐ h ∂P, empMean A R a n h + ucbWidth A c a n h ≤
      empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Type uses (5)
Body uses (10)
Used by (1)

Actions: Source · Open Issue

Proof
by
  filter_upwards [arm_ae_eq_ucbNextArm h (n - 1)] with h h_arm
  have : n - 1 + 1 = n := by grind
  have h_not_lt : ¬ n - 1 < K - 1 := by grind
  simp only [this, nextArm, h_not_lt, ↓reduceIte] at h_arm
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  simp_rw [h_arm, empMean_eq_empMean' (by grind : n ≠ 0),
    ucbWidth_eq_ucbWidth' (A := A) (R := R) _ _ _ _ (by grind : n ≠ 0)]
  exact isMaxOn_argmax (fun a ↦ empMean' (n - 1) (history A R (n - 1) h) a
    + ucbWidth' c (n - 1) (history A R (n - 1) h) a) _

forall_arm_eq_mod_of_lt🔗

LemmaBandits.UCB.forall_arm_eq_mod_of_lt

No docstring.

🔗theorem
Bandits.UCB.forall_arm_eq_mod_of_lt.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, n < K, A n h = n % K,
Bandits.UCB.forall_arm_eq_mod_of_lt.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, n < K, A n h = n % K,

Code

lemma forall_arm_eq_mod_of_lt (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P, ∀ n < K, A n h = ⟨n % K, Nat.mod_lt _ hK⟩
Type uses (3)
Body uses (8)
Used by (1)

Actions: Source · Open Issue

Proof
by
  simp_rw [ae_all_iff]
  intro n hn
  induction n with
  | zero => exact arm_zero h
  | succ n _ =>
    filter_upwards [arm_ae_eq_ucbNextArm h n] with h h_eq
    rw [h_eq, nextArm, if_pos]
    · rfl
    · grind

forall_ucbIndex_le_ucbIndex_arm🔗

LemmaBandits.UCB.forall_ucbIndex_le_ucbIndex_arm

No docstring.

🔗theorem
Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (h : Ω) P, (n : ), K n Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (h : Ω) P, (n : ), K n Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

Code

lemma forall_ucbIndex_le_ucbIndex_arm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ h ∂P, ∀ n, K ≤ n →
      empMean A R a n h + ucbWidth A c a n h ≤
        empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Type uses (5)
Body uses (1)
Used by (2)

Actions: Source · Open Issue

Proof
by
  simp_rw [ae_all_iff]
  exact fun _ ↦ ucbIndex_le_ucbIndex_arm h a

forall_arm_prop🔗

LemmaBandits.UCB.forall_arm_prop

No docstring.

🔗theorem
Bandits.UCB.forall_arm_prop.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, (∀ n < K, A n h = n % K, ) (n : ), K n (a : Fin K), Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Bandits.UCB.forall_arm_prop.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) : ∀ᵐ (h : Ω) P, (∀ n < K, A n h = n % K, ) (n : ), K n (a : Fin K), Learning.empMean A R a n h + ucbWidth A c a n h Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h

Code

lemma forall_arm_prop
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P,
      (∀ n < K, A n h = ⟨n % K, Nat.mod_lt _ hK⟩) ∧
      (∀ n, K ≤ n → ∀ a, empMean A R a n h + ucbWidth A c a n h ≤
        empMean A R (A n h) n h + ucbWidth A c (A n h) n h)
Type uses (5)
Body uses (2)

Actions: Source · Open Issue

Proof
by
  simp only [eventually_and]
  constructor
  · exact forall_arm_eq_mod_of_lt h
  · simp_rw [ae_all_iff]
    intro n hn a
    have h_ae := forall_ucbIndex_le_ucbIndex_arm h a
    simp_rw [ae_all_iff] at h_ae
    exact h_ae n hn

time_gt_of_pullCount_gt_one🔗

LemmaBandits.UCB.time_gt_of_pullCount_gt_one

No docstring.

🔗theorem
Bandits.UCB.time_gt_of_pullCount_gt_one.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (ω : Ω) P, (n : ), 1 < Learning.pullCount A a n ω K < n
Bandits.UCB.time_gt_of_pullCount_gt_one.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (ω : Ω) P, (n : ), 1 < Learning.pullCount A a n ω K < n

Code

lemma time_gt_of_pullCount_gt_one
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ ω ∂P, ∀ n, 1 < pullCount A a n ω → K < n
Type uses (4)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
RoundRobin.time_gt_of_pullCount_gt_one (isAlgEnvSeqUntil_roundRobinAlgorithm h) a

pullCount_pos_of_pullCount_gt_one🔗

LemmaBandits.UCB.pullCount_pos_of_pullCount_gt_one

No docstring.

🔗theorem
Bandits.UCB.pullCount_pos_of_pullCount_gt_one.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (ω : Ω) P, (n : ), 1 < Learning.pullCount A a n ω (b : Fin K), 0 < Learning.pullCount A b n ω
Bandits.UCB.pullCount_pos_of_pullCount_gt_one.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) : ∀ᵐ (ω : Ω) P, (n : ), 1 < Learning.pullCount A a n ω (b : Fin K), 0 < Learning.pullCount A b n ω

Code

lemma pullCount_pos_of_pullCount_gt_one
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ ω ∂P, ∀ n, 1 < pullCount A a n ω → ∀ b : Fin K, 0 < pullCount A b n ω
Type uses (4)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
RoundRobin.pullCount_pos_of_pullCount_gt_one (isAlgEnvSeqUntil_roundRobinAlgorithm h) a

gap_arm_le_two_mul_ucbWidth🔗

LemmaBandits.UCB.gap_arm_le_two_mul_ucbWidth

No docstring.

🔗theorem
Bandits.UCB.gap_arm_le_two_mul_ucbWidth.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } {n : } {ω : Ω} [Nonempty (Fin K)] (h_best : (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω (x : ), id x ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) : gap ν (A n ω) 2 * ucbWidth A c (A n ω) n ω
Bandits.UCB.gap_arm_le_two_mul_ucbWidth.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } {n : } {ω : Ω} [Nonempty (Fin K)] (h_best : (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω (x : ), id x ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) : gap ν (A n ω) 2 * ucbWidth A c (A n ω) n ω

Code

lemma gap_arm_le_two_mul_ucbWidth [Nonempty (Fin K)]
    (h_best : (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
    (h_arm : empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ (ν (A n ω))[id])
    (h_le : empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤
      empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) :
    gap ν (A n ω) ≤ 2 * ucbWidth A c (A n ω) n ω
Type uses (4)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  rw [gap_eq_bestArm_sub, sub_le_iff_le_add']
  calc (ν (bestArm ν))[id]
  _ ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω := h_best
  _ ≤ empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω := h_le
  _ ≤ (ν (A n ω))[id] + 2 * ucbWidth A c (A n ω) n ω := by
    rw [two_mul, ← add_assoc]
    gcongr
    rwa [sub_le_iff_le_add] at h_arm

pullCount_arm_le🔗

LemmaBandits.UCB.pullCount_arm_le

No docstring.

🔗theorem
Bandits.UCB.pullCount_arm_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } {n : } {ω : Ω} [Nonempty (Fin K)] (hc : 0 c) (h_best : (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω (x : ), id x ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) (h_gap_pos : 0 < gap ν (A n ω)) (h_pull_pos : 0 < Learning.pullCount A (A n ω) n ω) : (Learning.pullCount A (A n ω) n ω) 8 * c * Real.log (n + 1) / gap ν (A n ω) ^ 2
Bandits.UCB.pullCount_arm_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } {n : } {ω : Ω} [Nonempty (Fin K)] (hc : 0 c) (h_best : (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω) (h_arm : Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω (x : ), id x ν (A n ω)) (h_le : Learning.empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) (h_gap_pos : 0 < gap ν (A n ω)) (h_pull_pos : 0 < Learning.pullCount A (A n ω) n ω) : (Learning.pullCount A (A n ω) n ω) 8 * c * Real.log (n + 1) / gap ν (A n ω) ^ 2

Code

lemma pullCount_arm_le [Nonempty (Fin K)] (hc : 0 ≤ c)
    (h_best : (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
    (h_arm : empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ (ν (A n ω))[id])
    (h_le : empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤
      empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω)
    (h_gap_pos : 0 < gap ν (A n ω)) (h_pull_pos : 0 < pullCount A (A n ω) n ω) :
    pullCount A (A n ω) n ω ≤ 8 * c * log (n + 1) / gap ν (A n ω) ^ 2
Type uses (5)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have h_gap_le := gap_arm_le_two_mul_ucbWidth h_best h_arm h_le
  rw [ucbWidth] at h_gap_le
  have h2 : (gap ν (A n ω)) ^ 2 ≤ (2 * √(2 * c * log (n + 1) / pullCount A (A n ω) n ω)) ^ 2 := by
    gcongr
  rw [mul_pow, sq_sqrt] at h2
  · have : (2 : ℝ) ^ 2 = 4 := by norm_num
    rw [this] at h2
    field_simp at h2 ⊢
    grind
  · have : 0 ≤ log (n + 1) := by simp [log_nonneg]
    positivity

prob_ucbIndex_le🔗

LemmaBandits.UCB.prob_ucbIndex_le

No docstring.

🔗theorem
Bandits.UCB.prob_ucbIndex_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : P {h | 0 < Learning.pullCount A a n h Learning.empMean A R a n h + ucbWidth A (c * σ2) a n h (x : ), id x ν a} 1 / (n + 1) ^ (c - 1)
Bandits.UCB.prob_ucbIndex_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : P {h | 0 < Learning.pullCount A a n h Learning.empMean A R a n h + ucbWidth A (c * σ2) a n h (x : ), id x ν a} 1 / (n + 1) ^ (c - 1)

Code

lemma prob_ucbIndex_le [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ (c - 1)
Type uses (6)
Body uses (3)
Used by (2)

Actions: Source · Open Issue

Proof
by
  let s : Set (ℕ × ℝ) := {(m, x) | 0 < m ∧ x / m + √(2 * (c * σ2) * log (↑n + 1) / m) ≤ (ν a)[id]}
  have hs : MeasurableSet s := by
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, measurableSet_setOf, s]
    fun_prop
  classical
  calc P {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]}
  _ ≤ ∑ k ∈ range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    prob_pullCount_prod_sumRewards_mem_le h hs
  _ ≤ ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    refine Finset.sum_le_sum_of_subset_of_nonneg (fun m ↦ ?_) fun _ _ _ ↦ by positivity
    simp [s]
    grind
  _ = ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | (∑ i ∈ range k, ω i a) / k + √(2 * c * σ2 * log (↑n + 1) / k) ≤
        (ν a)[id]} := by
    refine Finset.sum_congr rfl fun k hk ↦ ?_
    congr with ω
    have hk : 0 < k := by grind
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, Set.preimage_setOf_eq, hk, true_and,
      Set.mem_setOf_eq, s]
    grind
  _ ≤ ∑ k ∈ Icc 1 n, (1 : ℝ≥0∞) / (n + 1) ^ c := by
    gcongr with k hk
    exact prob_avg_add_sqrt_log_le hν hσ2 hc a n k (by grind)
  _ ≤ (n + 1) * (1 : ℝ≥0∞) / (n + 1) ^ c := by
    simp only [one_div, sum_const, Nat.card_Icc, add_tsub_cancel_right, nsmul_eq_mul, mul_one]
    rw [div_eq_mul_inv ((n : ℝ≥0∞) + 1)]
    gcongr
    exact le_self_add
  _ = 1 / (n + 1) ^ (c - 1) := by
    simp only [mul_one, one_div]
    rw [ENNReal.rpow_sub _ _ (by simp) (by finiteness), ENNReal.rpow_one, div_eq_mul_inv,
      ENNReal.div_eq_inv_mul, ENNReal.mul_inv (by simp) (by simp), inv_inv]

prob_ucbIndex_ge🔗

LemmaBandits.UCB.prob_ucbIndex_ge

No docstring.

🔗theorem
Bandits.UCB.prob_ucbIndex_ge.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : P {h | 0 < Learning.pullCount A a n h (x : ), id x ν a Learning.empMean A R a n h - ucbWidth A (c * σ2) a n h} 1 / (n + 1) ^ (c - 1)
Bandits.UCB.prob_ucbIndex_ge.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : P {h | 0 < Learning.pullCount A a n h (x : ), id x ν a Learning.empMean A R a n h - ucbWidth A (c * σ2) a n h} 1 / (n + 1) ^ (c - 1)

Code

lemma prob_ucbIndex_ge [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P {h | 0 < pullCount A a n h ∧
      (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h} ≤ 1 / (n + 1) ^ (c - 1)
Type uses (6)
Body uses (3)
Used by (2)

Actions: Source · Open Issue

Proof
by
  let s : Set (ℕ × ℝ) := {(m, x) | 0 < m ∧ (ν a)[id] ≤ x / m - √(2 * (c * σ2) * log (↑n + 1) / m)}
  have hs : MeasurableSet s := by
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, measurableSet_setOf, s]
    fun_prop
  classical
  calc P {h | 0 < pullCount A a n h ∧ (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h}
  _ ≤ ∑ k ∈ range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    prob_pullCount_prod_sumRewards_mem_le h hs
  _ ≤ ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    refine Finset.sum_le_sum_of_subset_of_nonneg (fun m ↦ ?_) fun _ _ _ ↦ by positivity
    simp [s]
    grind
  _ = ∑ k ∈ Icc 1 n,
      (streamMeasure ν)
        {ω | (ν a)[id] ≤ (∑ i ∈ range k, ω i a) / k - √(2 * c * σ2 * log (↑n + 1) / k)} := by
    refine Finset.sum_congr rfl fun k hk ↦ ?_
    congr with ω
    have hk : 0 < k := by grind
    simp only [id_eq, Nat.cast_nonneg, sqrt_div', Set.preimage_setOf_eq, hk, true_and,
      Set.mem_setOf_eq, s]
    grind
  _ ≤ ∑ k ∈ Icc 1 n, (1 : ℝ≥0∞) / (n + 1) ^ c := by
    gcongr with k hk
    exact prob_avg_sub_sqrt_log_ge hν hσ2 hc a n k (by grind)
  _ ≤ (n + 1) * (1 : ℝ≥0∞) / (n + 1) ^ c := by
    simp only [one_div, sum_const, Nat.card_Icc, add_tsub_cancel_right, nsmul_eq_mul, mul_one]
    rw [div_eq_mul_inv ((n : ℝ≥0∞) + 1)]
    gcongr
    exact le_self_add
  _ = 1 / (n + 1) ^ (c - 1) := by
    simp only [mul_one, one_div]
    rw [ENNReal.rpow_sub _ _ (by simp) (by finiteness), ENNReal.rpow_one, div_eq_mul_inv,
      ENNReal.div_eq_inv_mul, ENNReal.mul_inv (by simp) (by simp), inv_inv]

probReal_ucbIndex_le🔗

LemmaBandits.UCB.probReal_ucbIndex_le

No docstring.

🔗theorem
Bandits.UCB.probReal_ucbIndex_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : MeasureTheory.Measure.real P {h | 0 < Learning.pullCount A a n h Learning.empMean A R a n h + ucbWidth A (c * σ2) a n h (x : ), id x ν a} 1 / (n + 1) ^ (c - 1)
Bandits.UCB.probReal_ucbIndex_le.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : MeasureTheory.Measure.real P {h | 0 < Learning.pullCount A a n h Learning.empMean A R a n h + ucbWidth A (c * σ2) a n h (x : ), id x ν a} 1 / (n + 1) ^ (c - 1)

Code

lemma probReal_ucbIndex_le [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P.real {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ (c - 1)
Type uses (6)
Body uses (1)

Actions: Source · Open Issue

Proof
by
  rw [measureReal_def]
  grw [prob_ucbIndex_le h hν hσ2 hc a n]
  swap; · finiteness
  simp only [one_div, ENNReal.toReal_inv]
  rw [← ENNReal.toReal_rpow]
  norm_cast

probReal_ucbIndex_ge🔗

LemmaBandits.UCB.probReal_ucbIndex_ge

No docstring.

🔗theorem
Bandits.UCB.probReal_ucbIndex_ge.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : MeasureTheory.Measure.real P {h | 0 < Learning.pullCount A a n h (x : ), id x ν a Learning.empMean A R a n h - ucbWidth A (c * σ2) a n h} 1 / (n + 1) ^ (c - 1)
Bandits.UCB.probReal_ucbIndex_ge.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] {alg : Learning.Algorithm (Fin K) } (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 c) (a : Fin K) (n : ) : MeasureTheory.Measure.real P {h | 0 < Learning.pullCount A a n h (x : ), id x ν a Learning.empMean A R a n h - ucbWidth A (c * σ2) a n h} 1 / (n + 1) ^ (c - 1)

Code

lemma probReal_ucbIndex_ge [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P.real {h | 0 < pullCount A a n h ∧
      (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h} ≤ 1 / (n + 1) ^ (c - 1)
Type uses (6)
Body uses (1)

Actions: Source · Open Issue

Proof
by
  rw [measureReal_def]
  grw [prob_ucbIndex_ge h hν hσ2 hc a n]
  swap; · finiteness
  simp only [one_div, ENNReal.toReal_inv]
  rw [← ENNReal.toReal_rpow]
  norm_cast

pullCount_le_add_three🔗

LemmaBandits.UCB.pullCount_le_add_three

No docstring.

🔗theorem
Bandits.UCB.pullCount_le_add_three.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } [Nonempty (Fin K)] (a : Fin K) (n C : ) (ω : Ω) : Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω (x : ), id x ν (A s ω)} 1 s + s Finset.range n, Set.indicator {s | C < Learning.pullCount A a s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | C < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω} 1 s
Bandits.UCB.pullCount_le_add_three.{u_1} {K : } {c : } {ν : ProbabilityTheory.Kernel (Fin K) } {Ω : Type u_1} {A : Ω Fin K} {R : Ω } [Nonempty (Fin K)] (a : Fin K) (n C : ) (ω : Ω) : Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω (x : ), id x ν (A s ω)} 1 s + s Finset.range n, Set.indicator {s | C < Learning.pullCount A a s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | C < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω} 1 s

Code

lemma pullCount_le_add_three [Nonempty (Fin K)] (a : Fin K) (n C : ℕ) (ω : Ω) :
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | C < pullCount A a s ω ∧ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω <
          (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | C < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s
Type uses (4)
Body uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  refine (pullCount_le_add a n C ω).trans ?_
  simp_rw [add_assoc]
  gcongr
  simp_rw [← add_assoc]
  let A' := {s | A s ω = a ∧ C < pullCount A a s ω}
  let B := {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}
  let C' := {s | C < pullCount A a s ω ∧
    empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (ν (bestArm ν))[id]}
  let D := {s | C < pullCount A a s ω ∧ (ν a)[id] < empMean A R a s ω - ucbWidth A c a s ω}
  change ∑ s ∈ range n, A'.indicator 1 s ≤
    ∑ s ∈ range n, B.indicator 1 s + ∑ s ∈ range n, C'.indicator 1 s +
      ∑ s ∈ range n, D.indicator 1 s
  have h_union : A' ⊆ B ∪ C' ∪ D := by simp [A', B, C', D]; grind
  calc
    (∑ s ∈ range n, A'.indicator 1 s)
    _ ≤ (∑ s ∈ range n, (B ∪ C' ∪ D).indicator (fun _ ↦ (1 : ℕ)) s) := by
      gcongr with n hn
      by_cases h : n ∈ A'
      · have : n ∈ B ∪ C' ∪ D := h_union h
        simp [h, this]
      · simp [h]
    _ ≤ ∑ s ∈ range n, (B.indicator 1 s + C'.indicator 1 s + D.indicator 1 s) := by
      gcongr with s
      simp [Set.indicator_apply]
      grind
    _ = ∑ s ∈ range n, B.indicator 1 s + ∑ s ∈ range n, C'.indicator 1 s +
          ∑ s ∈ range n, D.indicator 1 s := by
      rw [Finset.sum_add_distrib, Finset.sum_add_distrib]

pullCount_le_add_three_ae🔗

LemmaBandits.UCB.pullCount_le_add_three_ae

No docstring.

🔗theorem
Bandits.UCB.pullCount_le_add_three_ae.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (n C : ) (hC : C 0) : ∀ᵐ (ω : Ω) P, Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω (x : ), id x ν (A s ω)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A (bestArm ν) s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω} 1 s
Bandits.UCB.pullCount_le_add_three_ae.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c) (Learning.stationaryEnv ν) P) (a : Fin K) (n C : ) (hC : C 0) : ∀ᵐ (ω : Ω) P, Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω (x : ), id x ν (A s ω)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A (bestArm ν) s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A c a s ω} 1 s

Code

lemma pullCount_le_add_three_ae [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P)
    (a : Fin K) (n C : ℕ) (hC : C ≠ 0) :
    ∀ᵐ ω ∂P,
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s
Type uses (7)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  filter_upwards [pullCount_pos_of_pullCount_gt_one h a] with ω hω
  refine (pullCount_le_add_three (R := R) a n C ω (ν := ν) (c := c)).trans ?_
  gcongr 5 with k hk j k hk j
  · gcongr 1
    exact fun h_gt ↦ hω _ (lt_of_le_of_lt (by grind) h_gt) _
  · exact fun h_gt ↦ hω _ (lt_of_le_of_lt (by grind) h_gt) _

some_sum_eq_zero🔗

LemmaBandits.UCB.some_sum_eq_zero

No docstring.

🔗theorem
Bandits.UCB.some_sum_eq_zero.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) (hc : 0 c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ) (hC : C 0) (hC' : 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 C) : ∀ᵐ (ω : Ω) P, s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A (c * σ2) (A s ω) s ω (x : ), id x ν (A s ω)} 1 s = 0
Bandits.UCB.some_sum_eq_zero.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) (hc : 0 c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ) (hC : C 0) (hC' : 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 C) : ∀ᵐ (ω : Ω) P, s Finset.range n, Set.indicator {s | A s ω = a C < Learning.pullCount A a s ω (x : ), id x ν (bestArm ν) Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω Learning.empMean A R (A s ω) s ω - ucbWidth A (c * σ2) (A s ω) s ω (x : ), id x ν (A s ω)} 1 s = 0

Code

lemma some_sum_eq_zero [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ)
    (hC : C ≠ 0) (hC' : 8 * c * σ2 * log (n + 1) / gap ν a ^ 2 ≤ C) :
    ∀ᵐ ω ∂P,
    ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
      (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω ∧
      empMean A R (A s ω) s ω - ucbWidth A (c * σ2) (A s ω) s ω
        ≤ (ν (A s ω))[id]}.indicator 1 s = 0
Type uses (8)
Body uses (3)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have h_ae := forall_ucbIndex_le_ucbIndex_arm h (bestArm ν) (ν := ν) (c := c * σ2) (hK := hK)
  have h_gt := time_gt_of_pullCount_gt_one h a (ν := ν) (c := c * σ2) (hK := hK)
  filter_upwards [h_ae, h_gt] with ω h_le h_time_ge
  simp only [id_eq, tsub_le_iff_right, sum_eq_zero_iff, mem_range, Set.indicator_apply_eq_zero,
    Set.mem_setOf_eq, Pi.one_apply, one_ne_zero, imp_false, not_and, not_le]
  intro k hn h_arm hC_lt h_le_best
  by_contra! h_le_arm
  have h := pullCount_arm_le (by positivity : 0 ≤ c * σ2) h_le_best (by simpa) ?_ ?_ ?_
  rotate_left
  · refine h_le _ ?_
    refine (h_time_ge _ ?_).le
    refine lt_of_le_of_lt ?_ hC_lt
    grind
  · rwa [h_arm]
  · rw [h_arm]
    exact zero_le.trans_lt hC_lt
  refine lt_irrefl (8 * c * σ2 * log (n + 1) / gap ν a ^ 2) ?_
  refine hC'.trans_lt (lt_of_lt_of_le ?_ (h.trans ?_))
  · rw [h_arm]
    exact mod_cast hC_lt
  · rw [h_arm]
    simp_rw [← mul_assoc]
    gcongr

pullCount_ae_le_add_two🔗

LemmaBandits.UCB.pullCount_ae_le_add_two

No docstring.

🔗theorem
Bandits.UCB.pullCount_ae_le_add_two.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) (hc : 0 c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ) (hC : C 0) (hC' : 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 C) : ∀ᵐ (ω : Ω) P, Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A (bestArm ν) s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A (c * σ2) a s ω} 1 s
Bandits.UCB.pullCount_ae_le_add_two.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} [Nonempty (Fin K)] (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) (hc : 0 c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ) (hC : C 0) (hC' : 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 C) : ∀ᵐ (ω : Ω) P, Learning.pullCount A a n ω C + 1 + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A (bestArm ν) s ω Learning.empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω < (x : ), id x ν (bestArm ν)} 1 s + s Finset.range n, Set.indicator {s | 0 < Learning.pullCount A a s ω (x : ), id x ν a < Learning.empMean A R a s ω - ucbWidth A (c * σ2) a s ω} 1 s

Code

lemma pullCount_ae_le_add_two [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a)
    (n C : ℕ) (hC : C ≠ 0) (hC' : 8 * c * σ2 * log (n + 1) / gap ν a ^ 2 ≤ C) :
    ∀ᵐ ω ∂P,
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω}.indicator 1 s
Type uses (8)
Body uses (2)
Used by (1)

Actions: Source · Open Issue

Proof
by
  filter_upwards [some_sum_eq_zero h hc a h_gap n C hC hC',
    pullCount_le_add_three_ae h a n C hC] with ω hω_zero hω_le
  refine (hω_le).trans_eq ?_
  rw [hω_zero]

constSum🔗

DefinitionBandits.UCB.constSum

A sum that appears in the UCB regret upper bound.

🔗def
Bandits.UCB.constSum (c : ) (n : ) : ENNReal
Bandits.UCB.constSum (c : ) (n : ) : ENNReal

Code

noncomputable
def constSum (c : ℝ) (n : ℕ) : ℝ≥0∞ := ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1)
Used by (4)

Actions: Source · Open Issue

constSum_lt_top🔗

LemmaBandits.UCB.constSum_lt_top

No docstring.

🔗theorem
Bandits.UCB.constSum_lt_top (c : ) (n : ) : constSum c n <
Bandits.UCB.constSum_lt_top (c : ) (n : ) : constSum c n <

Code

lemma constSum_lt_top (c : ℝ) (n : ℕ) : constSum c n < ∞
Type uses (1)
Used by (1)

Actions: Source · Open Issue

Proof
by
  rw [constSum, ENNReal.sum_lt_top]
  intro k hk
  simp only [one_div, ENNReal.inv_lt_top]
  positivity

expectation_pullCount_le'🔗

LemmaBandits.UCB.expectation_pullCount_le'

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

🔗theorem
Bandits.UCB.expectation_pullCount_le'.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ) : ∫⁻ (ω : Ω), (Learning.pullCount A a n ω) P ENNReal.ofReal (8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n
Bandits.UCB.expectation_pullCount_le'.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ) : ∫⁻ (ω : Ω), (Learning.pullCount A a n ω) P ENNReal.ofReal (8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n

Code

lemma expectation_pullCount_le'
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :
    ∫⁻ ω, pullCount A a n ω ∂P ≤
      ENNReal.ofReal (8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n
Type uses (6)
Body uses (12)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  by_cases hn_zero : n = 0
  · simp [hn_zero]
  let C a : ℕ := ⌈8 * c * σ2 * log (n + 1) / gap ν a ^ 2⌉₊
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  have h_set_1 b : MeasurableSet {a_1 | 0 < pullCount A a b a_1 ∧
      (ν a)[id] < empMean A R a b a_1 - ucbWidth A (c * σ2) a b a_1} := by
    change MeasurableSet ({a_1 | 0 < pullCount A a b a_1} ∩
      {a_1 | (ν a)[id] < empMean A R a b a_1 - ucbWidth A (c * σ2) a b a_1})
    exact (measurableSet_lt (by fun_prop) (by fun_prop)).inter
      (measurableSet_lt (by fun_prop) (by fun_prop))
  have h_set_2 b : MeasurableSet {a | 0 < pullCount A (bestArm ν) b a ∧
      empMean A R (bestArm ν) b a + ucbWidth A (c * σ2) (bestArm ν) b a < (ν (bestArm ν))[id]} := by
    change MeasurableSet ({a | 0 < pullCount A (bestArm ν) b a} ∩
      {a | empMean A R (bestArm ν) b a + ucbWidth A (c * σ2) (bestArm ν) b a < (ν (bestArm ν))[id]})
    exact (measurableSet_lt (by fun_prop) (by fun_prop)).inter
      (measurableSet_lt (by fun_prop) (by fun_prop))
  have h_meas_1 b : Measurable fun h ↦ {s | 0 < pullCount A a s h ∧ (ν a)[id] <
      empMean A R a s h - ucbWidth A (c * σ2) a s h}.indicator (1 : ℕ → ℕ) b := by
    simp only [id_eq, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply]
    exact Measurable.ite (h_set_1 _) (by fun_prop) (by fun_prop)
  have h_meas_2 b : Measurable fun h ↦ {s | 0 < pullCount A (bestArm ν) s h ∧
      empMean A R (bestArm ν) s h + ucbWidth A (c * σ2) (bestArm ν) s h <
          (ν (bestArm ν))[id]}.indicator (1 : ℕ → ℕ) b := by
    simp only [id_eq, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply]
    exact Measurable.ite (h_set_2 _) (by fun_prop) (by fun_prop)
  calc ∫⁻ ω, pullCount A a n ω ∂P
  _ ≤ ∫⁻ ω, C a + 1 +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator (1 : ℕ → ℕ) s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω}.indicator (1 : ℕ → ℕ) s ∂P := by
    refine lintegral_mono_ae ?_
    have hCa : C a ≠ 0 := by
      simp only [ne_eq, Nat.ceil_eq_zero, not_le, C]
      have : 0 < log (n + 1) := log_pos (by simp; grind)
      positivity
    filter_upwards [pullCount_ae_le_add_two h hc.le a h_gap n (C a) hCa (Nat.le_ceil _)] with ω hω
    simp only [id_eq, Nat.cast_sum]
    norm_cast
  _ ≤ (C a : ℝ≥0∞) + 1 +
      ∑ s ∈ range n,
        P {ω | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω < (ν (bestArm ν))[id]} +
      ∑ s ∈ range n,
        P {ω | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω} := by
    simp only [id_eq, Nat.cast_sum]
    rw [lintegral_add_left (by fun_prop), lintegral_add_left (by fun_prop)]
    simp only [lintegral_const, measure_univ, mul_one]
    rw [lintegral_finsetSum _ (by fun_prop), lintegral_finsetSum _ (by fun_prop)]
    gcongr with k hk k hk
    · rw [← lintegral_indicator_one]
      swap; · exact h_set_2 _
      gcongr with h
      simp [Set.indicator_apply]
    · rw [← lintegral_indicator_one]
      swap; · exact h_set_1 _
      gcongr with h
      simp [Set.indicator_apply]
  _ ≤ (C a : ℝ≥0∞) + 1 +
      ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1) +
      ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1) := by
    gcongr with s hs s hs
    · refine (measure_mono ?_).trans (prob_ucbIndex_le h hν hσ2 (by positivity) (bestArm ν) s)
      grind
    · refine (measure_mono ?_).trans (prob_ucbIndex_ge h hν hσ2 (by positivity) a s)
      grind
  _ ≤ ENNReal.ofReal (8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n := by
    rw [two_mul, add_assoc, constSum]
    gcongr
    simp only [C]
    rw [← ENNReal.ofReal_natCast]
    refine ENNReal.ofReal_le_ofReal ?_
    refine (Nat.ceil_lt_add_one ?_).le
    have : 0 ≤ log (n + 1) := log_nonneg (by simp)
    positivity

expectation_pullCount_le🔗

LemmaBandits.UCB.expectation_pullCount_le

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

🔗theorem
Bandits.UCB.expectation_pullCount_le.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ) : (x : Ω), (fun ω => (Learning.pullCount A a n ω)) x P 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 + 2 + 2 * ENNReal.toReal (constSum c n)
Bandits.UCB.expectation_pullCount_le.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ) : (x : Ω), (fun ω => (Learning.pullCount A a n ω)) x P 8 * c * σ2 * Real.log (n + 1) / gap ν a ^ 2 + 2 + 2 * ENNReal.toReal (constSum c n)

Code

lemma expectation_pullCount_le
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :
    P[fun ω ↦ (pullCount A a n ω : ℝ)] ≤
      8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 2 + 2 * (constSum c n).toReal
Type uses (6)
Body uses (5)
Used by (1)

Actions: Source · Open Issue

Proof
by
  have hA := h.measurable_action
  have h := expectation_pullCount_le' h hν hσ2 hc a h_gap n (hK := hK)
  simp_rw [← ENNReal.ofReal_natCast] at h
  rw [← ofReal_integral_eq_lintegral_ofReal] at h
  rotate_left
  · exact integrable_pullCount hA _ _
  · exact ae_of_all _ fun _ ↦ by simp
  simp only
  have : 0 ≤ log (n + 1) := log_nonneg (by simp)
  rw [← ENNReal.ofReal_toReal (a := 2 * constSum c n), ← ENNReal.ofReal_one, ← ENNReal.ofReal_add,
    ← ENNReal.ofReal_add, ENNReal.ofReal_le_ofReal_iff] at h
  rotate_left
  · positivity
  · positivity
  · simp
  · have : constSum c n ≠ ∞ := (constSum_lt_top c n).ne
    finiteness
  · simp
  · have : constSum c n ≠ ∞ := (constSum_lt_top c n).ne
    finiteness
  refine h.trans_eq ?_
  simp only [ENNReal.toReal_mul, ENNReal.toReal_ofNat, add_left_inj]
  ring

regret_le🔗

TheoremBandits.UCB.regret_le

Regret bound for the UCB algorithm.

🔗theorem
Bandits.UCB.regret_le.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (n : ) : (x : Ω), regret ν A n x P a, (8 * c * σ2 * Real.log (n + 1) / gap ν a + gap ν a * (2 + 2 * ENNReal.toReal (constSum c n)))
Bandits.UCB.regret_le.{u_1} {K : } {hK : 0 < K} {c : } {ν : ProbabilityTheory.Kernel (Fin K) } [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1} { : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P] {A : Ω Fin K} {R : Ω } {σ2 : NNReal} (h : Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (Learning.stationaryEnv ν) P) ( : (a : Fin K), ProbabilityTheory.HasSubgaussianMGF (fun x => x - (x : ), id x ν a) σ2 (ν a)) (hσ2 : σ2 0) (hc : 0 < c) (n : ) : (x : Ω), regret ν A n x P a, (8 * c * σ2 * Real.log (n + 1) / gap ν a + gap ν a * (2 + 2 * ENNReal.toReal (constSum c n)))

Code

theorem regret_le (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (n : ℕ) :
    P[regret ν A n] ≤
      ∑ a, (8 * c * σ2 * log (n + 1) / gap ν a + gap ν a * (2 + 2 * (constSum c n).toReal))
Type uses (6)
Body uses (3)

Actions: Source · Open Issue

Proof
by
  refine (integral_regret_le_of_forall_integral_pullCount_le h
    (fun a h_gap ↦ expectation_pullCount_le h hν hσ2 hc a
      (lt_of_le_of_ne' gap_nonneg h_gap) n)).trans_eq ?_
  congr with a
  by_cases h_gap : gap ν a = 0
  · simp [h_gap]
  · field
  1. Bandits.ucbWidth'
  2. Bandits.UCB.nextArm
  3. Bandits.UCB.measurable_nextArm
  4. Bandits.ucbAlgorithm
  5. Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm
  6. Bandits.UCB.ucbWidth
  7. Bandits.UCB.measurable_ucbWidth
  8. Bandits.UCB.ucbWidth_eq_ucbWidth'
  9. Bandits.UCB.arm_zero
  10. Bandits.UCB.arm_ae_eq_ucbNextArm
  11. Bandits.UCB.arm_ae_all_eq
  12. Bandits.UCB.ucbIndex_le_ucbIndex_arm
  13. Bandits.UCB.forall_arm_eq_mod_of_lt
  14. Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm
  15. Bandits.UCB.forall_arm_prop
  16. Bandits.UCB.time_gt_of_pullCount_gt_one
  17. Bandits.UCB.pullCount_pos_of_pullCount_gt_one
  18. Bandits.UCB.gap_arm_le_two_mul_ucbWidth
  19. Bandits.UCB.pullCount_arm_le
  20. Bandits.UCB.prob_ucbIndex_le
  21. Bandits.UCB.prob_ucbIndex_ge
  22. Bandits.UCB.probReal_ucbIndex_le
  23. Bandits.UCB.probReal_ucbIndex_ge
  24. Bandits.UCB.pullCount_le_add_three
  25. Bandits.UCB.pullCount_le_add_three_ae
  26. Bandits.UCB.some_sum_eq_zero
  27. Bandits.UCB.pullCount_ae_le_add_two
  28. Bandits.UCB.constSum
  29. Bandits.UCB.constSum_lt_top
  30. Bandits.UCB.expectation_pullCount_le'
  31. Bandits.UCB.expectation_pullCount_le
  32. Bandits.UCB.regret_le