Online.Bandit.Algorithms.UCB

`ucbWidth'`🔗

DefinitionBandits.ucbWidth'

Details

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

def

Bandits.ucbWidth' {K : ℕ} (c : ℝ) (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ) (a : Fin K) : ℝ
Bandits.ucbWidth' {K : ℕ} (c : ℝ) (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ)
  (a : Fin K) : ℝ

Code

noncomputable def ucbWidth' (c : ℝ) (n : ℕ) (h : Iic n → Fin K × ℝ) (a : Fin K) : ℝ :=
  √(2 * c * log (n + 2) / pullCount' n h a)

Body uses (1)

pullCount'

Used by (6)

Actions: Source · Open Issue

`nextArm`🔗

DefinitionBandits.UCB.nextArm

Details

Arm pulled by the UCB algorithm at time n + 1.

def

Bandits.UCB.nextArm {K : ℕ} (hK : 0 < K) (c : ℝ) (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ) : Fin K
Bandits.UCB.nextArm {K : ℕ} (hK : 0 < K)
  (c : ℝ) (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ) :
  Fin K

Code

noncomputable
def UCB.nextArm (hK : 0 < K) (c : ℝ) (n : ℕ) (h : Iic n → Fin K × ℝ) : Fin K :=
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  if n < K - 1 then RoundRobin.nextAction hK n else
  argmax (fun a ↦ empMean' n h a + ucbWidth' c n h a)

Body uses (4)

Used by (7)

Actions: Source · Open Issue

`measurable_nextArm`🔗

LemmaBandits.UCB.measurable_nextArm

Details

No docstring.

theorem

Bandits.UCB.measurable_nextArm {K : ℕ} (hK : 0 < K) (c : ℝ) (n : ℕ) :
  Measurable (nextArm hK c n)
Bandits.UCB.measurable_nextArm {K : ℕ}
  (hK : 0 < K) (c : ℝ) (n : ℕ) :
  Measurable (nextArm hK c n)

Code

lemma UCB.measurable_nextArm (hK : 0 < K) (c : ℝ) (n : ℕ) : Measurable (nextArm hK c n)

Type uses (1)

nextArm

Body uses (9)

Used by (3)

Actions: Source · Open Issue

Proof

by
  refine Measurable.ite (by simp) (by fun_prop) ?_
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  unfold ucbWidth'
  fun_prop

`ucbAlgorithm`🔗

DefinitionBandits.ucbAlgorithm

Details

The UCB algorithm.

def

Bandits.ucbAlgorithm {K : ℕ} (hK : 0 < K) (c : ℝ) :
  Learning.Algorithm (Fin K) ℝ
Bandits.ucbAlgorithm {K : ℕ} (hK : 0 < K)
  (c : ℝ) : Learning.Algorithm (Fin K) ℝ

Code

noncomputable
def ucbAlgorithm (hK : 0 < K) (c : ℝ) : Algorithm (Fin K) ℝ :=
  detAlgorithm (UCB.nextArm hK c) (by fun_prop) ⟨0, hK⟩

Type uses (1)

Algorithm

Body uses (3)

Used by (16)

Actions: Source · Open Issue

`isAlgEnvSeqUntil_roundRobinAlgorithm`🔗

LemmaBandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm

Details

Until round K - 1, the UCB algorithm behaves like the Round-Robin algorithm.

theorem

Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  Learning.IsAlgEnvSeqUntil A R (Learning.roundRobinAlgorithm hK)
    (Learning.stationaryEnv ν) P (K - 1)
Bandits.UCB.isAlgEnvSeqUntil_roundRobinAlgorithm.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  Learning.IsAlgEnvSeqUntil A R
    (Learning.roundRobinAlgorithm hK)
    (Learning.stationaryEnv ν) P (K - 1)

Code

lemma isAlgEnvSeqUntil_roundRobinAlgorithm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    IsAlgEnvSeqUntil A R (roundRobinAlgorithm hK) (stationaryEnv ν) P (K - 1) where
  measurable_action

Type uses (5)

Body uses (10)

Used by (3)

Actions: Source · Open Issue

Proof

h.measurable_action
  measurable_feedback := h.measurable_feedback
  hasLaw_action_zero := h.hasLaw_action_zero
  hasCondDistrib_feedback_zero := h.hasCondDistrib_feedback_zero
  hasCondDistrib_action n hn := by
    convert h.hasCondDistrib_action n using 1
    simp only [roundRobinAlgorithm, detAlgorithm_policy, ucbAlgorithm]
    congr 1 with h
    simp [UCB.nextArm, hn]
  hasCondDistrib_feedback n _ := h.hasCondDistrib_feedback n

`ucbWidth`🔗

DefinitionBandits.UCB.ucbWidth

Details

The exploration bonus of the UCB algorithm, which corresponds to the width of a confidence interval.

def

Bandits.UCB.ucbWidth.{u_1} {K : ℕ} {Ω : Type u_1} (A : ℕ → Ω → Fin K)
  (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ
Bandits.UCB.ucbWidth.{u_1} {K : ℕ}
  {Ω : Type u_1} (A : ℕ → Ω → Fin K)
  (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ

Code

noncomputable def ucbWidth (A : ℕ → Ω → Fin K) (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ :=
  √(2 * c * log (n + 1) / pullCount A a n ω)

Body uses (1)

pullCount

Used by (16)

Actions: Source · Open Issue

`measurable_ucbWidth`🔗

LemmaBandits.UCB.measurable_ucbWidth

Details

No docstring.

theorem

Bandits.UCB.measurable_ucbWidth.{u_1} {K : ℕ} {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → Fin K} {n : ℕ}
  (hA : ∀ (n : ℕ), Measurable (A n)) (c : ℝ) (a : Fin K) :
  Measurable (ucbWidth A c a n)
Bandits.UCB.measurable_ucbWidth.{u_1}
  {K : ℕ} {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω}
  {A : ℕ → Ω → Fin K} {n : ℕ}
  (hA : ∀ (n : ℕ), Measurable (A n))
  (c : ℝ) (a : Fin K) :
  Measurable (ucbWidth A c a n)

Code

lemma measurable_ucbWidth (hA : ∀ n, Measurable (A n)) (c : ℝ) (a : Fin K) :
    Measurable (ucbWidth A c a n)

Type uses (1)

ucbWidth

Body uses (2)

Used by (1)

expectation_pullCount_le'

Actions: Source · Open Issue

Proof

by
  unfold ucbWidth
  fun_prop

`ucbWidth_eq_ucbWidth'`🔗

LemmaBandits.UCB.ucbWidth_eq_ucbWidth'

Details

No docstring.

theorem

Bandits.UCB.ucbWidth_eq_ucbWidth'.{u_1} {K : ℕ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} (c : ℝ) (a : Fin K) (n : ℕ)
  (ω : Ω) (hn : n ≠ 0) :
  ucbWidth A c a n ω =
    ucbWidth' c (n - 1) (Learning.history A R (n - 1) ω) a
Bandits.UCB.ucbWidth_eq_ucbWidth'.{u_1}
  {K : ℕ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω)
  (hn : n ≠ 0) :
  ucbWidth A c a n ω =
    ucbWidth' c (n - 1)
      (Learning.history A R (n - 1) ω) a

Code

lemma ucbWidth_eq_ucbWidth' (c : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) (hn : n ≠ 0) :
    ucbWidth A c a n ω = ucbWidth' c (n - 1) (history A R (n - 1) ω) a

Type uses (3)

Body uses (3)

Used by (1)

ucbIndex_le_ucbIndex_arm

Actions: Source · Open Issue

Proof

by
  simp only [ucbWidth, pullCount_eq_pullCount' (A := A) (R' := R) hn, Nat.cast_nonneg, sqrt_div',
    ucbWidth']
  congr 4
  norm_cast
  grind

`arm_zero`🔗

LemmaBandits.UCB.arm_zero

Details

No docstring.

theorem

Bandits.UCB.arm_zero.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  A 0 =ᵐ[P] fun x => ⟨0, hK⟩
Bandits.UCB.arm_zero.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  A 0 =ᵐ[P] fun x => ⟨0, hK⟩

Code

lemma arm_zero (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    A 0 =ᵐ[P] fun _ ↦ ⟨0, hK⟩

Type uses (3)

Body uses (4)

Used by (2)

Actions: Source · Open Issue

Proof

RoundRobin.action_zero ((isAlgEnvSeqUntil_roundRobinAlgorithm h).mono zero_le)

`arm_ae_eq_ucbNextArm`🔗

LemmaBandits.UCB.arm_ae_eq_ucbNextArm

Details

No docstring.

theorem

Bandits.UCB.arm_ae_eq_ucbNextArm.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (n : ℕ) :
  A (n + 1) =ᵐ[P] fun ω => nextArm hK c n (Learning.history A R n ω)
Bandits.UCB.arm_ae_eq_ucbNextArm.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (n : ℕ) :
  A (n + 1) =ᵐ[P] fun ω =>
    nextArm hK c n
      (Learning.history A R n ω)

Code

lemma arm_ae_eq_ucbNextArm (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (n : ℕ) :
    A (n + 1) =ᵐ[P] fun ω ↦ nextArm hK c n (history A R n ω)

Type uses (5)

Body uses (3)

Used by (3)

Actions: Source · Open Issue

Proof

by
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  exact h.action_detAlgorithm_ae_eq n

`arm_ae_all_eq`🔗

LemmaBandits.UCB.arm_ae_all_eq

Details

No docstring.

theorem

Bandits.UCB.arm_ae_all_eq.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P,
    A 0 h = ⟨0, hK⟩ ∧
      ∀ (n : ℕ), A (n + 1) h = nextArm hK c n (Learning.history A R n h)
Bandits.UCB.arm_ae_all_eq.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P,
    A 0 h = ⟨0, hK⟩ ∧
      ∀ (n : ℕ),
        A (n + 1) h =
          nextArm hK c n
            (Learning.history A R n h)

Code

lemma arm_ae_all_eq (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P, A 0 h = ⟨0, hK⟩ ∧ ∀ n, A (n + 1) h = nextArm hK c n (history A R n h)

Type uses (5)

Body uses (2)

Actions: Source · Open Issue

Proof

by
  rw [eventually_and, ae_all_iff]
  exact ⟨arm_zero h, arm_ae_eq_ucbNextArm h⟩

`ucbIndex_le_ucbIndex_arm`🔗

LemmaBandits.UCB.ucbIndex_le_ucbIndex_arm

Details

No docstring.

theorem

Bandits.UCB.ucbIndex_le_ucbIndex_arm.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {n : ℕ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) (hn : K ≤ n) :
  ∀ᵐ (h : Ω) ∂P,
    Learning.empMean A R a n h + ucbWidth A c a n h ≤
      Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Bandits.UCB.ucbIndex_le_ucbIndex_arm.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {n : ℕ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) (hn : K ≤ n) :
  ∀ᵐ (h : Ω) ∂P,
    Learning.empMean A R a n h +
        ucbWidth A c a n h ≤
      Learning.empMean A R (A n h) n h +
        ucbWidth A c (A n h) n h

Code

lemma ucbIndex_le_ucbIndex_arm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) (hn : K ≤ n) :
    ∀ᵐ h ∂P, empMean A R a n h + ucbWidth A c a n h ≤
      empMean A R (A n h) n h + ucbWidth A c (A n h) n h

Type uses (5)

Body uses (10)

Used by (1)

forall_ucbIndex_le_ucbIndex_arm

Actions: Source · Open Issue

Proof

by
  filter_upwards [arm_ae_eq_ucbNextArm h (n - 1)] with h h_arm
  have : n - 1 + 1 = n := by grind
  have h_not_lt : ¬ n - 1 < K - 1 := by grind
  simp only [this, nextArm, h_not_lt, ↓reduceIte] at h_arm
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  simp_rw [h_arm, empMean_eq_empMean' (by grind : n ≠ 0),
    ucbWidth_eq_ucbWidth' (A := A) (R := R) _ _ _ _ (by grind : n ≠ 0)]
  exact isMaxOn_argmax (fun a ↦ empMean' (n - 1) (history A R (n - 1) h) a
    + ucbWidth' c (n - 1) (history A R (n - 1) h) a) _

`forall_arm_eq_mod_of_lt`🔗

LemmaBandits.UCB.forall_arm_eq_mod_of_lt

Details

No docstring.

theorem

Bandits.UCB.forall_arm_eq_mod_of_lt.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P, ∀ n < K, A n h = ⟨n % K, ⋯⟩
Bandits.UCB.forall_arm_eq_mod_of_lt.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P,
    ∀ n < K, A n h = ⟨n % K, ⋯⟩

Code

lemma forall_arm_eq_mod_of_lt (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P, ∀ n < K, A n h = ⟨n % K, Nat.mod_lt _ hK⟩

Type uses (3)

Body uses (8)

Used by (1)

forall_arm_prop

Actions: Source · Open Issue

Proof

by
  simp_rw [ae_all_iff]
  intro n hn
  induction n with
  | zero => exact arm_zero h
  | succ n _ =>
    filter_upwards [arm_ae_eq_ucbNextArm h n] with h h_eq
    rw [h_eq, nextArm, if_pos]
    · rfl
    · grind

`forall_ucbIndex_le_ucbIndex_arm`🔗

LemmaBandits.UCB.forall_ucbIndex_le_ucbIndex_arm

Details

No docstring.

theorem

Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm.{u_1} {K : ℕ} {hK : 0 < K}
  {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (h : Ω) ∂P,
    ∀ (n : ℕ),
      K ≤ n →
        Learning.empMean A R a n h + ucbWidth A c a n h ≤
          Learning.empMean A R (A n h) n h + ucbWidth A c (A n h) n h
Bandits.UCB.forall_ucbIndex_le_ucbIndex_arm.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (h : Ω) ∂P,
    ∀ (n : ℕ),
      K ≤ n →
        Learning.empMean A R a n h +
            ucbWidth A c a n h ≤
          Learning.empMean A R (A n h) n
              h +
            ucbWidth A c (A n h) n h

Code

lemma forall_ucbIndex_le_ucbIndex_arm
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ h ∂P, ∀ n, K ≤ n →
      empMean A R a n h + ucbWidth A c a n h ≤
        empMean A R (A n h) n h + ucbWidth A c (A n h) n h

Type uses (5)

Body uses (1)

ucbIndex_le_ucbIndex_arm

Used by (2)

Actions: Source · Open Issue

Proof

by
  simp_rw [ae_all_iff]
  exact fun _ ↦ ucbIndex_le_ucbIndex_arm h a

`forall_arm_prop`🔗

LemmaBandits.UCB.forall_arm_prop

Details

No docstring.

theorem

Bandits.UCB.forall_arm_prop.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P,
    (∀ n < K, A n h = ⟨n % K, ⋯⟩) ∧
      ∀ (n : ℕ),
        K ≤ n →
          ∀ (a : Fin K),
            Learning.empMean A R a n h + ucbWidth A c a n h ≤
              Learning.empMean A R (A n h) n h +
                ucbWidth A c (A n h) n h
Bandits.UCB.forall_arm_prop.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P) :
  ∀ᵐ (h : Ω) ∂P,
    (∀ n < K, A n h = ⟨n % K, ⋯⟩) ∧
      ∀ (n : ℕ),
        K ≤ n →
          ∀ (a : Fin K),
            Learning.empMean A R a n h +
                ucbWidth A c a n h ≤
              Learning.empMean A R (A n h)
                  n h +
                ucbWidth A c (A n h) n h

Code

lemma forall_arm_prop
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) :
    ∀ᵐ h ∂P,
      (∀ n < K, A n h = ⟨n % K, Nat.mod_lt _ hK⟩) ∧
      (∀ n, K ≤ n → ∀ a, empMean A R a n h + ucbWidth A c a n h ≤
        empMean A R (A n h) n h + ucbWidth A c (A n h) n h)

Type uses (5)

Body uses (2)

Actions: Source · Open Issue

Proof

by
  simp only [eventually_and]
  constructor
  · exact forall_arm_eq_mod_of_lt h
  · simp_rw [ae_all_iff]
    intro n hn a
    have h_ae := forall_ucbIndex_le_ucbIndex_arm h a
    simp_rw [ae_all_iff] at h_ae
    exact h_ae n hn

`time_gt_of_pullCount_gt_one`🔗

LemmaBandits.UCB.time_gt_of_pullCount_gt_one

Details

No docstring.

theorem

Bandits.UCB.time_gt_of_pullCount_gt_one.{u_1} {K : ℕ} {hK : 0 < K}
  {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (ω : Ω) ∂P, ∀ (n : ℕ), 1 < Learning.pullCount A a n ω → K < n
Bandits.UCB.time_gt_of_pullCount_gt_one.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (ω : Ω) ∂P,
    ∀ (n : ℕ),
      1 < Learning.pullCount A a n ω →
        K < n

Code

lemma time_gt_of_pullCount_gt_one
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ ω ∂P, ∀ n, 1 < pullCount A a n ω → K < n

Type uses (4)

Body uses (2)

Used by (1)

some_sum_eq_zero

Actions: Source · Open Issue

Proof

RoundRobin.time_gt_of_pullCount_gt_one (isAlgEnvSeqUntil_roundRobinAlgorithm h) a

`pullCount_pos_of_pullCount_gt_one`🔗

LemmaBandits.UCB.pullCount_pos_of_pullCount_gt_one

Details

No docstring.

theorem

Bandits.UCB.pullCount_pos_of_pullCount_gt_one.{u_1} {K : ℕ} {hK : 0 < K}
  {c : ℝ} {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (ω : Ω) ∂P,
    ∀ (n : ℕ),
      1 < Learning.pullCount A a n ω →
        ∀ (b : Fin K), 0 < Learning.pullCount A b n ω
Bandits.UCB.pullCount_pos_of_pullCount_gt_one.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) :
  ∀ᵐ (ω : Ω) ∂P,
    ∀ (n : ℕ),
      1 < Learning.pullCount A a n ω →
        ∀ (b : Fin K),
          0 < Learning.pullCount A b n ω

Code

lemma pullCount_pos_of_pullCount_gt_one
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P) (a : Fin K) :
    ∀ᵐ ω ∂P, ∀ n, 1 < pullCount A a n ω → ∀ b : Fin K, 0 < pullCount A b n ω

Type uses (4)

Body uses (2)

Used by (1)

pullCount_le_add_three_ae

Actions: Source · Open Issue

Proof

RoundRobin.pullCount_pos_of_pullCount_gt_one (isAlgEnvSeqUntil_roundRobinAlgorithm h) a

`gap_arm_le_two_mul_ucbWidth`🔗

LemmaBandits.UCB.gap_arm_le_two_mul_ucbWidth

Details

No docstring.

theorem

Bandits.UCB.gap_arm_le_two_mul_ucbWidth.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} [Nonempty (Fin K)]
  (h_best :
    ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
      Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω)
  (h_arm :
    Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤
      ∫ (x : ℝ), id x ∂ν (A n ω))
  (h_le :
    Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω ≤
      Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) :
  gap ν (A n ω) ≤ 2 * ucbWidth A c (A n ω) n ω
Bandits.UCB.gap_arm_le_two_mul_ucbWidth.{u_1}
  {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω}
  [Nonempty (Fin K)]
  (h_best :
    ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
      Learning.empMean A R (bestArm ν) n
          ω +
        ucbWidth A c (bestArm ν) n ω)
  (h_arm :
    Learning.empMean A R (A n ω) n ω -
        ucbWidth A c (A n ω) n ω ≤
      ∫ (x : ℝ), id x ∂ν (A n ω))
  (h_le :
    Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω ≤
      Learning.empMean A R (A n ω) n ω +
        ucbWidth A c (A n ω) n ω) :
  gap ν (A n ω) ≤
    2 * ucbWidth A c (A n ω) n ω

Code

lemma gap_arm_le_two_mul_ucbWidth [Nonempty (Fin K)]
    (h_best : (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
    (h_arm : empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ (ν (A n ω))[id])
    (h_le : empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤
      empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω) :
    gap ν (A n ω) ≤ 2 * ucbWidth A c (A n ω) n ω

Type uses (4)

Body uses (1)

gap_eq_bestArm_sub

Used by (1)

pullCount_arm_le

Actions: Source · Open Issue

Proof

by
  rw [gap_eq_bestArm_sub, sub_le_iff_le_add']
  calc (ν (bestArm ν))[id]
  _ ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω := h_best
  _ ≤ empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω := h_le
  _ ≤ (ν (A n ω))[id] + 2 * ucbWidth A c (A n ω) n ω := by
    rw [two_mul, ← add_assoc]
    gcongr
    rwa [sub_le_iff_le_add] at h_arm

`pullCount_arm_le`🔗

LemmaBandits.UCB.pullCount_arm_le

Details

No docstring.

theorem

Bandits.UCB.pullCount_arm_le.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω} [Nonempty (Fin K)]
  (hc : 0 ≤ c)
  (h_best :
    ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
      Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω)
  (h_arm :
    Learning.empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤
      ∫ (x : ℝ), id x ∂ν (A n ω))
  (h_le :
    Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω ≤
      Learning.empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω)
  (h_gap_pos : 0 < gap ν (A n ω))
  (h_pull_pos : 0 < Learning.pullCount A (A n ω) n ω) :
  ↑(Learning.pullCount A (A n ω) n ω) ≤
    8 * c * Real.log (↑n + 1) / gap ν (A n ω) ^ 2
Bandits.UCB.pullCount_arm_le.{u_1} {K : ℕ}
  {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω}
  [Nonempty (Fin K)] (hc : 0 ≤ c)
  (h_best :
    ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
      Learning.empMean A R (bestArm ν) n
          ω +
        ucbWidth A c (bestArm ν) n ω)
  (h_arm :
    Learning.empMean A R (A n ω) n ω -
        ucbWidth A c (A n ω) n ω ≤
      ∫ (x : ℝ), id x ∂ν (A n ω))
  (h_le :
    Learning.empMean A R (bestArm ν) n ω +
        ucbWidth A c (bestArm ν) n ω ≤
      Learning.empMean A R (A n ω) n ω +
        ucbWidth A c (A n ω) n ω)
  (h_gap_pos : 0 < gap ν (A n ω))
  (h_pull_pos :
    0 <
      Learning.pullCount A (A n ω) n ω) :
  ↑(Learning.pullCount A (A n ω) n ω) ≤
    8 * c * Real.log (↑n + 1) /
      gap ν (A n ω) ^ 2

Code

lemma pullCount_arm_le [Nonempty (Fin K)] (hc : 0 ≤ c)
    (h_best : (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω)
    (h_arm : empMean A R (A n ω) n ω - ucbWidth A c (A n ω) n ω ≤ (ν (A n ω))[id])
    (h_le : empMean A R (bestArm ν) n ω + ucbWidth A c (bestArm ν) n ω ≤
      empMean A R (A n ω) n ω + ucbWidth A c (A n ω) n ω)
    (h_gap_pos : 0 < gap ν (A n ω)) (h_pull_pos : 0 < pullCount A (A n ω) n ω) :
    pullCount A (A n ω) n ω ≤ 8 * c * log (n + 1) / gap ν (A n ω) ^ 2

Type uses (5)

Body uses (1)

gap_arm_le_two_mul_ucbWidth

Used by (1)

some_sum_eq_zero

Actions: Source · Open Issue

Proof

by
  have h_gap_le := gap_arm_le_two_mul_ucbWidth h_best h_arm h_le
  rw [ucbWidth] at h_gap_le
  have h2 : (gap ν (A n ω)) ^ 2 ≤ (2 * √(2 * c * log (n + 1) / pullCount A (A n ω) n ω)) ^ 2 := by
    gcongr
  rw [mul_pow, sq_sqrt] at h2
  · have : (2 : ℝ) ^ 2 = 4 := by norm_num
    rw [this] at h2
    field_simp at h2 ⊢
    grind
  · have : 0 ≤ log (n + 1) := by simp [log_nonneg]
    positivity

`prob_ucbIndex_le`🔗

LemmaBandits.UCB.prob_ucbIndex_le

Details

No docstring.

theorem

Bandits.UCB.prob_ucbIndex_le.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
  P
      {h |
        0 < Learning.pullCount A a n h ∧
          Learning.empMean A R a n h + ucbWidth A (c * ↑σ2) a n h ≤
            ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ (c - 1)
Bandits.UCB.prob_ucbIndex_le.{u_1} {K : ℕ}
  {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K)
  (n : ℕ) :
  P
      {h |
        0 < Learning.pullCount A a n h ∧
          Learning.empMean A R a n h +
              ucbWidth A (c * ↑σ2) a n h ≤
            ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ (c - 1)

Code

lemma prob_ucbIndex_le [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ (c - 1)

Type uses (6)

Body uses (3)

Used by (2)

Actions: Source · Open Issue

Proof

by
  let s : Set (ℕ × ℝ) := {(m, x) | 0 < m ∧ x / m + √(2 * (c * σ2) * log (↑n + 1) / m) ≤ (ν a)[id]}
  have hs : MeasurableSet s := by
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, measurableSet_setOf, s]
    fun_prop
  classical
  calc P {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]}
  _ ≤ ∑ k ∈ range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    prob_pullCount_prod_sumRewards_mem_le h hs
  _ ≤ ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    refine Finset.sum_le_sum_of_subset_of_nonneg (fun m ↦ ?_) fun _ _ _ ↦ by positivity
    simp [s]
    grind
  _ = ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | (∑ i ∈ range k, ω i a) / k + √(2 * c * σ2 * log (↑n + 1) / k) ≤
        (ν a)[id]} := by
    refine Finset.sum_congr rfl fun k hk ↦ ?_
    congr with ω
    have hk : 0 < k := by grind
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, Set.preimage_setOf_eq, hk, true_and,
      Set.mem_setOf_eq, s]
    grind
  _ ≤ ∑ k ∈ Icc 1 n, (1 : ℝ≥0∞) / (n + 1) ^ c := by
    gcongr with k hk
    exact prob_avg_add_sqrt_log_le hν hσ2 hc a n k (by grind)
  _ ≤ (n + 1) * (1 : ℝ≥0∞) / (n + 1) ^ c := by
    simp only [one_div, sum_const, Nat.card_Icc, add_tsub_cancel_right, nsmul_eq_mul, mul_one]
    rw [div_eq_mul_inv ((n : ℝ≥0∞) + 1)]
    gcongr
    exact le_self_add
  _ = 1 / (n + 1) ^ (c - 1) := by
    simp only [mul_one, one_div]
    rw [ENNReal.rpow_sub _ _ (by simp) (by finiteness), ENNReal.rpow_one, div_eq_mul_inv,
      ENNReal.div_eq_inv_mul, ENNReal.mul_inv (by simp) (by simp), inv_inv]

`prob_ucbIndex_ge`🔗

LemmaBandits.UCB.prob_ucbIndex_ge

Details

No docstring.

theorem

Bandits.UCB.prob_ucbIndex_ge.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
  P
      {h |
        0 < Learning.pullCount A a n h ∧
          ∫ (x : ℝ), id x ∂ν a ≤
            Learning.empMean A R a n h - ucbWidth A (c * ↑σ2) a n h} ≤
    1 / (↑n + 1) ^ (c - 1)
Bandits.UCB.prob_ucbIndex_ge.{u_1} {K : ℕ}
  {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K)
  (n : ℕ) :
  P
      {h |
        0 < Learning.pullCount A a n h ∧
          ∫ (x : ℝ), id x ∂ν a ≤
            Learning.empMean A R a n h -
              ucbWidth A (c * ↑σ2) a n
                h} ≤
    1 / (↑n + 1) ^ (c - 1)

Code

lemma prob_ucbIndex_ge [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P {h | 0 < pullCount A a n h ∧
      (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h} ≤ 1 / (n + 1) ^ (c - 1)

Type uses (6)

Body uses (3)

Used by (2)

Actions: Source · Open Issue

Proof

by
  let s : Set (ℕ × ℝ) := {(m, x) | 0 < m ∧ (ν a)[id] ≤ x / m - √(2 * (c * σ2) * log (↑n + 1) / m)}
  have hs : MeasurableSet s := by
    simp only [Nat.cast_nonneg, sqrt_div', id_eq, measurableSet_setOf, s]
    fun_prop
  classical
  calc P {h | 0 < pullCount A a n h ∧ (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h}
  _ ≤ ∑ k ∈ range (n + 1) with k ∈ Prod.fst '' s,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} :=
    prob_pullCount_prod_sumRewards_mem_le h hs
  _ ≤ ∑ k ∈ Icc 1 n,
      (streamMeasure ν) {ω | ∑ i ∈ range k, ω i a ∈ Prod.mk k ⁻¹' s} := by
    refine Finset.sum_le_sum_of_subset_of_nonneg (fun m ↦ ?_) fun _ _ _ ↦ by positivity
    simp [s]
    grind
  _ = ∑ k ∈ Icc 1 n,
      (streamMeasure ν)
        {ω | (ν a)[id] ≤ (∑ i ∈ range k, ω i a) / k - √(2 * c * σ2 * log (↑n + 1) / k)} := by
    refine Finset.sum_congr rfl fun k hk ↦ ?_
    congr with ω
    have hk : 0 < k := by grind
    simp only [id_eq, Nat.cast_nonneg, sqrt_div', Set.preimage_setOf_eq, hk, true_and,
      Set.mem_setOf_eq, s]
    grind
  _ ≤ ∑ k ∈ Icc 1 n, (1 : ℝ≥0∞) / (n + 1) ^ c := by
    gcongr with k hk
    exact prob_avg_sub_sqrt_log_ge hν hσ2 hc a n k (by grind)
  _ ≤ (n + 1) * (1 : ℝ≥0∞) / (n + 1) ^ c := by
    simp only [one_div, sum_const, Nat.card_Icc, add_tsub_cancel_right, nsmul_eq_mul, mul_one]
    rw [div_eq_mul_inv ((n : ℝ≥0∞) + 1)]
    gcongr
    exact le_self_add
  _ = 1 / (n + 1) ^ (c - 1) := by
    simp only [mul_one, one_div]
    rw [ENNReal.rpow_sub _ _ (by simp) (by finiteness), ENNReal.rpow_one, div_eq_mul_inv,
      ENNReal.div_eq_inv_mul, ENNReal.mul_inv (by simp) (by simp), inv_inv]

`probReal_ucbIndex_le`🔗

LemmaBandits.UCB.probReal_ucbIndex_le

Details

No docstring.

theorem

Bandits.UCB.probReal_ucbIndex_le.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
  MeasureTheory.Measure.real P
      {h |
        0 < Learning.pullCount A a n h ∧
          Learning.empMean A R a n h + ucbWidth A (c * ↑σ2) a n h ≤
            ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ (c - 1)
Bandits.UCB.probReal_ucbIndex_le.{u_1}
  {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K)
  (n : ℕ) :
  MeasureTheory.Measure.real P
      {h |
        0 < Learning.pullCount A a n h ∧
          Learning.empMean A R a n h +
              ucbWidth A (c * ↑σ2) a n h ≤
            ∫ (x : ℝ), id x ∂ν a} ≤
    1 / (↑n + 1) ^ (c - 1)

Code

lemma probReal_ucbIndex_le [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P.real {h | 0 < pullCount A a n h ∧ empMean A R a n h + ucbWidth A (c * σ2) a n h ≤ (ν a)[id]} ≤
      1 / (n + 1) ^ (c - 1)

Type uses (6)

Body uses (1)

prob_ucbIndex_le

Actions: Source · Open Issue

Proof

by
  rw [measureReal_def]
  grw [prob_ucbIndex_le h hν hσ2 hc a n]
  swap; · finiteness
  simp only [one_div, ENNReal.toReal_inv]
  rw [← ENNReal.toReal_rpow]
  norm_cast

`probReal_ucbIndex_ge`🔗

LemmaBandits.UCB.probReal_ucbIndex_ge

Details

No docstring.

theorem

Bandits.UCB.probReal_ucbIndex_ge.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
  MeasureTheory.Measure.real P
      {h |
        0 < Learning.pullCount A a n h ∧
          ∫ (x : ℝ), id x ∂ν a ≤
            Learning.empMean A R a n h - ucbWidth A (c * ↑σ2) a n h} ≤
    1 / (↑n + 1) ^ (c - 1)
Bandits.UCB.probReal_ucbIndex_ge.{u_1}
  {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K)
  (n : ℕ) :
  MeasureTheory.Measure.real P
      {h |
        0 < Learning.pullCount A a n h ∧
          ∫ (x : ℝ), id x ∂ν a ≤
            Learning.empMean A R a n h -
              ucbWidth A (c * ↑σ2) a n
                h} ≤
    1 / (↑n + 1) ^ (c - 1)

Code

lemma probReal_ucbIndex_ge [Nonempty (Fin K)] {alg : Algorithm (Fin K) ℝ}
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 ≤ c) (a : Fin K) (n : ℕ) :
    P.real {h | 0 < pullCount A a n h ∧
      (ν a)[id] ≤ empMean A R a n h - ucbWidth A (c * σ2) a n h} ≤ 1 / (n + 1) ^ (c - 1)

Type uses (6)

Body uses (1)

prob_ucbIndex_ge

Actions: Source · Open Issue

Proof

by
  rw [measureReal_def]
  grw [prob_ucbIndex_ge h hν hσ2 hc a n]
  swap; · finiteness
  simp only [one_div, ENNReal.toReal_inv]
  rw [← ENNReal.toReal_rpow]
  norm_cast

`pullCount_le_add_three`🔗

LemmaBandits.UCB.pullCount_le_add_three

Details

No docstring.

theorem

Bandits.UCB.pullCount_le_add_three.{u_1} {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] (a : Fin K)
  (n C : ℕ) (ω : Ω) :
  Learning.pullCount A a n ω ≤
    C + 1 +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                A s ω = a ∧
                  C < Learning.pullCount A a s ω ∧
                    ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
                        Learning.empMean A R (bestArm ν) s ω +
                          ucbWidth A c (bestArm ν) s ω ∧
                      Learning.empMean A R (A s ω) s ω -
                          ucbWidth A c (A s ω) s ω ≤
                        ∫ (x : ℝ), id x ∂ν (A s ω)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              C < Learning.pullCount A a s ω ∧
                Learning.empMean A R (bestArm ν) s ω +
                    ucbWidth A c (bestArm ν) s ω <
                  ∫ (x : ℝ), id x ∂ν (bestArm ν)}
            1 s +
      ∑ s ∈ Finset.range n,
        Set.indicator
          {s |
            C < Learning.pullCount A a s ω ∧
              ∫ (x : ℝ), id x ∂ν a <
                Learning.empMean A R a s ω - ucbWidth A c a s ω}
          1 s
Bandits.UCB.pullCount_le_add_three.{u_1}
  {K : ℕ} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} [Nonempty (Fin K)]
  (a : Fin K) (n C : ℕ) (ω : Ω) :
  Learning.pullCount A a n ω ≤
    C + 1 +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                A s ω = a ∧
                  C <
                      Learning.pullCount A
                        a s ω ∧
                    ∫ (x : ℝ),
                          id
                            x ∂ν
                            (bestArm ν) ≤
                        Learning.empMean A
                            R (bestArm ν)
                            s ω +
                          ucbWidth A c
                            (bestArm ν) s
                            ω ∧
                      Learning.empMean A R
                            (A s ω) s ω -
                          ucbWidth A c
                            (A s ω) s ω ≤
                        ∫ (x : ℝ),
                          id x ∂ν (A s ω)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              C <
                  Learning.pullCount A a s
                    ω ∧
                Learning.empMean A R
                      (bestArm ν) s ω +
                    ucbWidth A c
                      (bestArm ν) s ω <
                  ∫ (x : ℝ),
                    id x ∂ν (bestArm ν)}
            1 s +
      ∑ s ∈ Finset.range n,
        Set.indicator
          {s |
            C <
                Learning.pullCount A a s
                  ω ∧
              ∫ (x : ℝ), id x ∂ν a <
                Learning.empMean A R a s
                    ω -
                  ucbWidth A c a s ω}
          1 s

Code

lemma pullCount_le_add_three [Nonempty (Fin K)] (a : Fin K) (n C : ℕ) (ω : Ω) :
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | C < pullCount A a s ω ∧ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω <
          (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | C < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s

Type uses (4)

Body uses (1)

pullCount_le_add

Used by (1)

pullCount_le_add_three_ae

Actions: Source · Open Issue

Proof

by
  refine (pullCount_le_add a n C ω).trans ?_
  simp_rw [add_assoc]
  gcongr
  simp_rw [← add_assoc]
  let A' := {s | A s ω = a ∧ C < pullCount A a s ω}
  let B := {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}
  let C' := {s | C < pullCount A a s ω ∧
    empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω < (ν (bestArm ν))[id]}
  let D := {s | C < pullCount A a s ω ∧ (ν a)[id] < empMean A R a s ω - ucbWidth A c a s ω}
  change ∑ s ∈ range n, A'.indicator 1 s ≤
    ∑ s ∈ range n, B.indicator 1 s + ∑ s ∈ range n, C'.indicator 1 s +
      ∑ s ∈ range n, D.indicator 1 s
  have h_union : A' ⊆ B ∪ C' ∪ D := by simp [A', B, C', D]; grind
  calc
    (∑ s ∈ range n, A'.indicator 1 s)
    _ ≤ (∑ s ∈ range n, (B ∪ C' ∪ D).indicator (fun _ ↦ (1 : ℕ)) s) := by
      gcongr with n hn
      by_cases h : n ∈ A'
      · have : n ∈ B ∪ C' ∪ D := h_union h
        simp [h, this]
      · simp [h]
    _ ≤ ∑ s ∈ range n, (B.indicator 1 s + C'.indicator 1 s + D.indicator 1 s) := by
      gcongr with s
      simp [Set.indicator_apply]
      grind
    _ = ∑ s ∈ range n, B.indicator 1 s + ∑ s ∈ range n, C'.indicator 1 s +
          ∑ s ∈ range n, D.indicator 1 s := by
      rw [Finset.sum_add_distrib, Finset.sum_add_distrib]

`pullCount_le_add_three_ae`🔗

LemmaBandits.UCB.pullCount_le_add_three_ae

Details

No docstring.

theorem

Bandits.UCB.pullCount_le_add_three_ae.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) (n C : ℕ) (hC : C ≠ 0) :
  ∀ᵐ (ω : Ω) ∂P,
    Learning.pullCount A a n ω ≤
      C + 1 +
            ∑ s ∈ Finset.range n,
              Set.indicator
                {s |
                  A s ω = a ∧
                    C < Learning.pullCount A a s ω ∧
                      ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
                          Learning.empMean A R (bestArm ν) s ω +
                            ucbWidth A c (bestArm ν) s ω ∧
                        Learning.empMean A R (A s ω) s ω -
                            ucbWidth A c (A s ω) s ω ≤
                          ∫ (x : ℝ), id x ∂ν (A s ω)}
                1 s +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                0 < Learning.pullCount A (bestArm ν) s ω ∧
                  Learning.empMean A R (bestArm ν) s ω +
                      ucbWidth A c (bestArm ν) s ω <
                    ∫ (x : ℝ), id x ∂ν (bestArm ν)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              0 < Learning.pullCount A a s ω ∧
                ∫ (x : ℝ), id x ∂ν a <
                  Learning.empMean A R a s ω - ucbWidth A c a s ω}
            1 s
Bandits.UCB.pullCount_le_add_three_ae.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK c)
      (Learning.stationaryEnv ν) P)
  (a : Fin K) (n C : ℕ) (hC : C ≠ 0) :
  ∀ᵐ (ω : Ω) ∂P,
    Learning.pullCount A a n ω ≤
      C + 1 +
            ∑ s ∈ Finset.range n,
              Set.indicator
                {s |
                  A s ω = a ∧
                    C <
                        Learning.pullCount
                          A a s ω ∧
                      ∫ (x : ℝ),
                            id
                              x ∂ν
                              (bestArm
                                ν) ≤
                          Learning.empMean
                              A R
                              (bestArm ν)
                              s ω +
                            ucbWidth A c
                              (bestArm ν)
                              s ω ∧
                        Learning.empMean A
                              R (A s ω) s
                              ω -
                            ucbWidth A c
                              (A s ω) s
                              ω ≤
                          ∫ (x : ℝ),
                            id
                              x ∂ν
                              (A s ω)}
                1 s +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                0 <
                    Learning.pullCount A
                      (bestArm ν) s ω ∧
                  Learning.empMean A R
                        (bestArm ν) s ω +
                      ucbWidth A c
                        (bestArm ν) s ω <
                    ∫ (x : ℝ),
                      id x ∂ν (bestArm ν)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              0 <
                  Learning.pullCount A a s
                    ω ∧
                ∫ (x : ℝ), id x ∂ν a <
                  Learning.empMean A R a s
                      ω -
                    ucbWidth A c a s ω}
            1 s

Code

lemma pullCount_le_add_three_ae [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK c) (stationaryEnv ν) P)
    (a : Fin K) (n C : ℕ) (hC : C ≠ 0) :
    ∀ᵐ ω ∂P,
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
        (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω ∧
        empMean A R (A s ω) s ω - ucbWidth A c (A s ω) s ω ≤ (ν (A s ω))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A c (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A c a s ω}.indicator 1 s

Type uses (7)

Body uses (2)

Used by (1)

pullCount_ae_le_add_two

Actions: Source · Open Issue

Proof

by
  filter_upwards [pullCount_pos_of_pullCount_gt_one h a] with ω hω
  refine (pullCount_le_add_three (R := R) a n C ω (ν := ν) (c := c)).trans ?_
  gcongr 5 with k hk j k hk j
  · gcongr 1
    exact fun h_gt ↦ hω _ (lt_of_le_of_lt (by grind) h_gt) _
  · exact fun h_gt ↦ hω _ (lt_of_le_of_lt (by grind) h_gt) _

`some_sum_eq_zero`🔗

LemmaBandits.UCB.some_sum_eq_zero

Details

No docstring.

theorem

Bandits.UCB.some_sum_eq_zero.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ) (hC : C ≠ 0)
  (hC' : 8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C) :
  ∀ᵐ (ω : Ω) ∂P,
    ∑ s ∈ Finset.range n,
        Set.indicator
          {s |
            A s ω = a ∧
              C < Learning.pullCount A a s ω ∧
                ∫ (x : ℝ), id x ∂ν (bestArm ν) ≤
                    Learning.empMean A R (bestArm ν) s ω +
                      ucbWidth A (c * ↑σ2) (bestArm ν) s ω ∧
                  Learning.empMean A R (A s ω) s ω -
                      ucbWidth A (c * ↑σ2) (A s ω) s ω ≤
                    ∫ (x : ℝ), id x ∂ν (A s ω)}
          1 s =
      0
Bandits.UCB.some_sum_eq_zero.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hc : 0 ≤ c) (a : Fin K)
  (h_gap : 0 < gap ν a) (n C : ℕ)
  (hC : C ≠ 0)
  (hC' :
    8 * c * ↑σ2 * Real.log (↑n + 1) /
        gap ν a ^ 2 ≤
      ↑C) :
  ∀ᵐ (ω : Ω) ∂P,
    ∑ s ∈ Finset.range n,
        Set.indicator
          {s |
            A s ω = a ∧
              C <
                  Learning.pullCount A a s
                    ω ∧
                ∫ (x : ℝ),
                      id
                        x ∂ν (bestArm ν) ≤
                    Learning.empMean A R
                        (bestArm ν) s ω +
                      ucbWidth A (c * ↑σ2)
                        (bestArm ν) s ω ∧
                  Learning.empMean A R
                        (A s ω) s ω -
                      ucbWidth A (c * ↑σ2)
                        (A s ω) s ω ≤
                    ∫ (x : ℝ),
                      id x ∂ν (A s ω)}
          1 s =
      0

Code

lemma some_sum_eq_zero [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ)
    (hC : C ≠ 0) (hC' : 8 * c * σ2 * log (n + 1) / gap ν a ^ 2 ≤ C) :
    ∀ᵐ ω ∂P,
    ∑ s ∈ range n, {s | A s ω = a ∧ C < pullCount A a s ω ∧
      (ν (bestArm ν))[id] ≤ empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω ∧
      empMean A R (A s ω) s ω - ucbWidth A (c * σ2) (A s ω) s ω
        ≤ (ν (A s ω))[id]}.indicator 1 s = 0

Type uses (8)

Body uses (3)

Used by (1)

pullCount_ae_le_add_two

Actions: Source · Open Issue

Proof

by
  have h_ae := forall_ucbIndex_le_ucbIndex_arm h (bestArm ν) (ν := ν) (c := c * σ2) (hK := hK)
  have h_gt := time_gt_of_pullCount_gt_one h a (ν := ν) (c := c * σ2) (hK := hK)
  filter_upwards [h_ae, h_gt] with ω h_le h_time_ge
  simp only [id_eq, tsub_le_iff_right, sum_eq_zero_iff, mem_range, Set.indicator_apply_eq_zero,
    Set.mem_setOf_eq, Pi.one_apply, one_ne_zero, imp_false, not_and, not_le]
  intro k hn h_arm hC_lt h_le_best
  by_contra! h_le_arm
  have h := pullCount_arm_le (by positivity : 0 ≤ c * σ2) h_le_best (by simpa) ?_ ?_ ?_
  rotate_left
  · refine h_le _ ?_
    refine (h_time_ge _ ?_).le
    refine lt_of_le_of_lt ?_ hC_lt
    grind
  · rwa [h_arm]
  · rw [h_arm]
    exact zero_le.trans_lt hC_lt
  refine lt_irrefl (8 * c * σ2 * log (n + 1) / gap ν a ^ 2) ?_
  refine hC'.trans_lt (lt_of_lt_of_le ?_ (h.trans ?_))
  · rw [h_arm]
    exact mod_cast hC_lt
  · rw [h_arm]
    simp_rw [← mul_assoc]
    gcongr

`pullCount_ae_le_add_two`🔗

LemmaBandits.UCB.pullCount_ae_le_add_two

Details

No docstring.

theorem

Bandits.UCB.pullCount_ae_le_add_two.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal} [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a) (n C : ℕ) (hC : C ≠ 0)
  (hC' : 8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 ≤ ↑C) :
  ∀ᵐ (ω : Ω) ∂P,
    Learning.pullCount A a n ω ≤
      C + 1 +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                0 < Learning.pullCount A (bestArm ν) s ω ∧
                  Learning.empMean A R (bestArm ν) s ω +
                      ucbWidth A (c * ↑σ2) (bestArm ν) s ω <
                    ∫ (x : ℝ), id x ∂ν (bestArm ν)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              0 < Learning.pullCount A a s ω ∧
                ∫ (x : ℝ), id x ∂ν a <
                  Learning.empMean A R a s ω -
                    ucbWidth A (c * ↑σ2) a s ω}
            1 s
Bandits.UCB.pullCount_ae_le_add_two.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal} [Nonempty (Fin K)]
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hc : 0 ≤ c) (a : Fin K)
  (h_gap : 0 < gap ν a) (n C : ℕ)
  (hC : C ≠ 0)
  (hC' :
    8 * c * ↑σ2 * Real.log (↑n + 1) /
        gap ν a ^ 2 ≤
      ↑C) :
  ∀ᵐ (ω : Ω) ∂P,
    Learning.pullCount A a n ω ≤
      C + 1 +
          ∑ s ∈ Finset.range n,
            Set.indicator
              {s |
                0 <
                    Learning.pullCount A
                      (bestArm ν) s ω ∧
                  Learning.empMean A R
                        (bestArm ν) s ω +
                      ucbWidth A (c * ↑σ2)
                        (bestArm ν) s ω <
                    ∫ (x : ℝ),
                      id x ∂ν (bestArm ν)}
              1 s +
        ∑ s ∈ Finset.range n,
          Set.indicator
            {s |
              0 <
                  Learning.pullCount A a s
                    ω ∧
                ∫ (x : ℝ), id x ∂ν a <
                  Learning.empMean A R a s
                      ω -
                    ucbWidth A (c * ↑σ2) a
                      s ω}
            1 s

Code

lemma pullCount_ae_le_add_two [Nonempty (Fin K)]
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hc : 0 ≤ c) (a : Fin K) (h_gap : 0 < gap ν a)
    (n C : ℕ) (hC : C ≠ 0) (hC' : 8 * c * σ2 * log (n + 1) / gap ν a ^ 2 ≤ C) :
    ∀ᵐ ω ∂P,
    pullCount A a n ω ≤ C + 1 +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator 1 s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω}.indicator 1 s

Type uses (8)

Body uses (2)

Used by (1)

expectation_pullCount_le'

Actions: Source · Open Issue

Proof

by
  filter_upwards [some_sum_eq_zero h hc a h_gap n C hC hC',
    pullCount_le_add_three_ae h a n C hC] with ω hω_zero hω_le
  refine (hω_le).trans_eq ?_
  rw [hω_zero]

`constSum`🔗

DefinitionBandits.UCB.constSum

Details

A sum that appears in the UCB regret upper bound.

def

Bandits.UCB.constSum (c : ℝ) (n : ℕ) : ENNReal
Bandits.UCB.constSum (c : ℝ) (n : ℕ) :
  ENNReal

Code

noncomputable
def constSum (c : ℝ) (n : ℕ) : ℝ≥0∞ := ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1)

Used by (4)

Actions: Source · Open Issue

`constSum_lt_top`🔗

LemmaBandits.UCB.constSum_lt_top

Details

No docstring.

theorem

Bandits.UCB.constSum_lt_top (c : ℝ) (n : ℕ) : constSum c n < ⊤
Bandits.UCB.constSum_lt_top (c : ℝ)
  (n : ℕ) : constSum c n < ⊤

Code

lemma constSum_lt_top (c : ℝ) (n : ℕ) : constSum c n < ∞

Type uses (1)

constSum

Used by (1)

expectation_pullCount_le

Actions: Source · Open Issue

Proof

by
  rw [constSum, ENNReal.sum_lt_top]
  intro k hk
  simp only [one_div, ENNReal.inv_lt_top]
  positivity

`expectation_pullCount_le'`🔗

LemmaBandits.UCB.expectation_pullCount_le'

Details

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

theorem

Bandits.UCB.expectation_pullCount_le'.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a)
  (n : ℕ) :
  ∫⁻ (ω : Ω), ↑(Learning.pullCount A a n ω) ∂P ≤
    ENNReal.ofReal (8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 + 1) +
        1 +
      2 * constSum c n
Bandits.UCB.expectation_pullCount_le'.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K)
  (h_gap : 0 < gap ν a) (n : ℕ) :
  ∫⁻ (ω : Ω),
      ↑(Learning.pullCount A a n ω) ∂P ≤
    ENNReal.ofReal
          (8 * c * ↑σ2 *
                Real.log (↑n + 1) /
              gap ν a ^ 2 +
            1) +
        1 +
      2 * constSum c n

Code

lemma expectation_pullCount_le'
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :
    ∫⁻ ω, pullCount A a n ω ∂P ≤
      ENNReal.ofReal (8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n

Type uses (6)

Body uses (12)

Used by (1)

expectation_pullCount_le

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  by_cases hn_zero : n = 0
  · simp [hn_zero]
  let C a : ℕ := ⌈8 * c * σ2 * log (n + 1) / gap ν a ^ 2⌉₊
  have : Nonempty (Fin K) := Fin.pos_iff_nonempty.mp hK
  have h_set_1 b : MeasurableSet {a_1 | 0 < pullCount A a b a_1 ∧
      (ν a)[id] < empMean A R a b a_1 - ucbWidth A (c * σ2) a b a_1} := by
    change MeasurableSet ({a_1 | 0 < pullCount A a b a_1} ∩
      {a_1 | (ν a)[id] < empMean A R a b a_1 - ucbWidth A (c * σ2) a b a_1})
    exact (measurableSet_lt (by fun_prop) (by fun_prop)).inter
      (measurableSet_lt (by fun_prop) (by fun_prop))
  have h_set_2 b : MeasurableSet {a | 0 < pullCount A (bestArm ν) b a ∧
      empMean A R (bestArm ν) b a + ucbWidth A (c * σ2) (bestArm ν) b a < (ν (bestArm ν))[id]} := by
    change MeasurableSet ({a | 0 < pullCount A (bestArm ν) b a} ∩
      {a | empMean A R (bestArm ν) b a + ucbWidth A (c * σ2) (bestArm ν) b a < (ν (bestArm ν))[id]})
    exact (measurableSet_lt (by fun_prop) (by fun_prop)).inter
      (measurableSet_lt (by fun_prop) (by fun_prop))
  have h_meas_1 b : Measurable fun h ↦ {s | 0 < pullCount A a s h ∧ (ν a)[id] <
      empMean A R a s h - ucbWidth A (c * σ2) a s h}.indicator (1 : ℕ → ℕ) b := by
    simp only [id_eq, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply]
    exact Measurable.ite (h_set_1 _) (by fun_prop) (by fun_prop)
  have h_meas_2 b : Measurable fun h ↦ {s | 0 < pullCount A (bestArm ν) s h ∧
      empMean A R (bestArm ν) s h + ucbWidth A (c * σ2) (bestArm ν) s h <
          (ν (bestArm ν))[id]}.indicator (1 : ℕ → ℕ) b := by
    simp only [id_eq, Set.indicator_apply, Set.mem_setOf_eq, Pi.one_apply]
    exact Measurable.ite (h_set_2 _) (by fun_prop) (by fun_prop)
  calc ∫⁻ ω, pullCount A a n ω ∂P
  _ ≤ ∫⁻ ω, C a + 1 +
      ∑ s ∈ range n,
        {s | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω <
            (ν (bestArm ν))[id]}.indicator (1 : ℕ → ℕ) s +
      ∑ s ∈ range n,
        {s | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω}.indicator (1 : ℕ → ℕ) s ∂P := by
    refine lintegral_mono_ae ?_
    have hCa : C a ≠ 0 := by
      simp only [ne_eq, Nat.ceil_eq_zero, not_le, C]
      have : 0 < log (n + 1) := log_pos (by simp; grind)
      positivity
    filter_upwards [pullCount_ae_le_add_two h hc.le a h_gap n (C a) hCa (Nat.le_ceil _)] with ω hω
    simp only [id_eq, Nat.cast_sum]
    norm_cast
  _ ≤ (C a : ℝ≥0∞) + 1 +
      ∑ s ∈ range n,
        P {ω | 0 < pullCount A (bestArm ν) s ω ∧
          empMean A R (bestArm ν) s ω + ucbWidth A (c * σ2) (bestArm ν) s ω < (ν (bestArm ν))[id]} +
      ∑ s ∈ range n,
        P {ω | 0 < pullCount A a s ω ∧ (ν a)[id] <
          empMean A R a s ω - ucbWidth A (c * σ2) a s ω} := by
    simp only [id_eq, Nat.cast_sum]
    rw [lintegral_add_left (by fun_prop), lintegral_add_left (by fun_prop)]
    simp only [lintegral_const, measure_univ, mul_one]
    rw [lintegral_finsetSum _ (by fun_prop), lintegral_finsetSum _ (by fun_prop)]
    gcongr with k hk k hk
    · rw [← lintegral_indicator_one]
      swap; · exact h_set_2 _
      gcongr with h
      simp [Set.indicator_apply]
    · rw [← lintegral_indicator_one]
      swap; · exact h_set_1 _
      gcongr with h
      simp [Set.indicator_apply]
  _ ≤ (C a : ℝ≥0∞) + 1 +
      ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1) +
      ∑ s ∈ range n, 1 / ((s : ℝ≥0∞) + 1) ^ (c - 1) := by
    gcongr with s hs s hs
    · refine (measure_mono ?_).trans (prob_ucbIndex_le h hν hσ2 (by positivity) (bestArm ν) s)
      grind
    · refine (measure_mono ?_).trans (prob_ucbIndex_ge h hν hσ2 (by positivity) a s)
      grind
  _ ≤ ENNReal.ofReal (8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 1) + 1 + 2 * constSum c n := by
    rw [two_mul, add_assoc, constSum]
    gcongr
    simp only [C]
    rw [← ENNReal.ofReal_natCast]
    refine ENNReal.ofReal_le_ofReal ?_
    refine (Nat.ceil_lt_add_one ?_).le
    have : 0 ≤ log (n + 1) := log_nonneg (by simp)
    positivity

`expectation_pullCount_le`🔗

LemmaBandits.UCB.expectation_pullCount_le

Details

Bound on the expectation of the number of pulls of each arm by the UCB algorithm.

theorem

Bandits.UCB.expectation_pullCount_le.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a)
  (n : ℕ) :
  ∫ (x : Ω), (fun ω => ↑(Learning.pullCount A a n ω)) x ∂P ≤
    8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a ^ 2 + 2 +
      2 * ENNReal.toReal (constSum c n)
Bandits.UCB.expectation_pullCount_le.{u_1}
  {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K)
  (h_gap : 0 < gap ν a) (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ↑(Learning.pullCount A a n ω))
        x ∂P ≤
    8 * c * ↑σ2 * Real.log (↑n + 1) /
          gap ν a ^ 2 +
        2 +
      2 * ENNReal.toReal (constSum c n)

Code

lemma expectation_pullCount_le
    (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (a : Fin K) (h_gap : 0 < gap ν a) (n : ℕ) :
    P[fun ω ↦ (pullCount A a n ω : ℝ)] ≤
      8 * c * σ2 * log (n + 1) / gap ν a ^ 2 + 2 + 2 * (constSum c n).toReal

Type uses (6)

Body uses (5)

Used by (1)

regret_le

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have h := expectation_pullCount_le' h hν hσ2 hc a h_gap n (hK := hK)
  simp_rw [← ENNReal.ofReal_natCast] at h
  rw [← ofReal_integral_eq_lintegral_ofReal] at h
  rotate_left
  · exact integrable_pullCount hA _ _
  · exact ae_of_all _ fun _ ↦ by simp
  simp only
  have : 0 ≤ log (n + 1) := log_nonneg (by simp)
  rw [← ENNReal.ofReal_toReal (a := 2 * constSum c n), ← ENNReal.ofReal_one, ← ENNReal.ofReal_add,
    ← ENNReal.ofReal_add, ENNReal.ofReal_le_ofReal_iff] at h
  rotate_left
  · positivity
  · positivity
  · simp
  · have : constSum c n ≠ ∞ := (constSum_lt_top c n).ne
    finiteness
  · simp
  · have : constSum c n ≠ ∞ := (constSum_lt_top c n).ne
    finiteness
  refine h.trans_eq ?_
  simp only [ENNReal.toReal_mul, ENNReal.toReal_ofNat, add_left_inj]
  ring

`regret_le`🔗

TheoremBandits.UCB.regret_le

Details

Regret bound for the UCB algorithm.

theorem

Bandits.UCB.regret_le.{u_1} {K : ℕ} {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] {Ω : Type u_1}
  {mΩ : MeasurableSpace Ω} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂ν a) σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (n : ℕ) :
  ∫ (x : Ω), regret ν A n x ∂P ≤
    ∑ a,
      (8 * c * ↑σ2 * Real.log (↑n + 1) / gap ν a +
        gap ν a * (2 + 2 * ENNReal.toReal (constSum c n)))
Bandits.UCB.regret_le.{u_1} {K : ℕ}
  {hK : 0 < K} {c : ℝ}
  {ν : ProbabilityTheory.Kernel (Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  {Ω : Type u_1} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {σ2 : NNReal}
  (h :
    Learning.IsAlgEnvSeq A R
      (ucbAlgorithm hK (c * ↑σ2))
      (Learning.stationaryEnv ν) P)
  (hν :
    ∀ (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂ν a)
        σ2 (ν a))
  (hσ2 : σ2 ≠ 0) (hc : 0 < c) (n : ℕ) :
  ∫ (x : Ω), regret ν A n x ∂P ≤
    ∑ a,
      (8 * c * ↑σ2 * Real.log (↑n + 1) /
          gap ν a +
        gap ν a *
          (2 +
            2 *
              ENNReal.toReal
                (constSum c n)))

Code

theorem regret_le (h : IsAlgEnvSeq A R (ucbAlgorithm hK (c * σ2)) (stationaryEnv ν) P)
    (hν : ∀ a, HasSubgaussianMGF (fun x ↦ x - (ν a)[id]) σ2 (ν a))
    (hσ2 : σ2 ≠ 0) (hc : 0 < c) (n : ℕ) :
    P[regret ν A n] ≤
      ∑ a, (8 * c * σ2 * log (n + 1) / gap ν a + gap ν a * (2 + 2 * (constSum c n).toReal))

Type uses (6)

Body uses (3)

Actions: Source · Open Issue

Proof

by
  refine (integral_regret_le_of_forall_integral_pullCount_le h
    (fun a h_gap ↦ expectation_pullCount_le h hν hσ2 hc a
      (lt_of_le_of_ne' gap_nonneg h_gap) n)).trans_eq ?_
  congr with a
  by_cases h_gap : gap ν a = 0
  · simp [h_gap]
  · field

LeanMachineLearning exposition

2.8. Online.Bandit.Algorithms.UCB🔗

`ucbWidth'`🔗

`nextArm`🔗

`measurable_nextArm`🔗

`ucbAlgorithm`🔗

`isAlgEnvSeqUntil_roundRobinAlgorithm`🔗

`ucbWidth`🔗

`measurable_ucbWidth`🔗

`ucbWidth_eq_ucbWidth'`🔗

`arm_zero`🔗

`arm_ae_eq_ucbNextArm`🔗

`arm_ae_all_eq`🔗

`ucbIndex_le_ucbIndex_arm`🔗

`forall_arm_eq_mod_of_lt`🔗

`forall_ucbIndex_le_ucbIndex_arm`🔗

`forall_arm_prop`🔗

`time_gt_of_pullCount_gt_one`🔗

`pullCount_pos_of_pullCount_gt_one`🔗

`gap_arm_le_two_mul_ucbWidth`🔗

`pullCount_arm_le`🔗

`prob_ucbIndex_le`🔗

`prob_ucbIndex_ge`🔗

`probReal_ucbIndex_le`🔗

`probReal_ucbIndex_ge`🔗

`pullCount_le_add_three`🔗

`pullCount_le_add_three_ae`🔗

`some_sum_eq_zero`🔗

`pullCount_ae_le_add_two`🔗

`constSum`🔗

`constSum_lt_top`🔗

`expectation_pullCount_le'`🔗

`expectation_pullCount_le`🔗

`regret_le`🔗

2.8. Online.Bandit.Algorithms.UCB🔗

ucbWidth'🔗

nextArm🔗

measurable_nextArm🔗

ucbAlgorithm🔗

isAlgEnvSeqUntil_roundRobinAlgorithm🔗

ucbWidth🔗

measurable_ucbWidth🔗

ucbWidth_eq_ucbWidth'🔗

arm_zero🔗

arm_ae_eq_ucbNextArm🔗

arm_ae_all_eq🔗

ucbIndex_le_ucbIndex_arm🔗

forall_arm_eq_mod_of_lt🔗

forall_ucbIndex_le_ucbIndex_arm🔗

forall_arm_prop🔗

time_gt_of_pullCount_gt_one🔗

pullCount_pos_of_pullCount_gt_one🔗

gap_arm_le_two_mul_ucbWidth🔗

pullCount_arm_le🔗

prob_ucbIndex_le🔗

prob_ucbIndex_ge🔗

probReal_ucbIndex_le🔗

probReal_ucbIndex_ge🔗

pullCount_le_add_three🔗

pullCount_le_add_three_ae🔗

some_sum_eq_zero🔗

pullCount_ae_le_add_two🔗

constSum🔗

constSum_lt_top🔗

expectation_pullCount_le'🔗

expectation_pullCount_le🔗

regret_le🔗

`ucbWidth'`🔗

`nextArm`🔗

`measurable_nextArm`🔗

`ucbAlgorithm`🔗

`isAlgEnvSeqUntil_roundRobinAlgorithm`🔗

`ucbWidth`🔗

`measurable_ucbWidth`🔗

`ucbWidth_eq_ucbWidth'`🔗

`arm_zero`🔗

`arm_ae_eq_ucbNextArm`🔗

`arm_ae_all_eq`🔗

`ucbIndex_le_ucbIndex_arm`🔗

`forall_arm_eq_mod_of_lt`🔗

`forall_ucbIndex_le_ucbIndex_arm`🔗

`forall_arm_prop`🔗

`time_gt_of_pullCount_gt_one`🔗

`pullCount_pos_of_pullCount_gt_one`🔗

`gap_arm_le_two_mul_ucbWidth`🔗

`pullCount_arm_le`🔗

`prob_ucbIndex_le`🔗

`prob_ucbIndex_ge`🔗

`probReal_ucbIndex_le`🔗

`probReal_ucbIndex_ge`🔗

`pullCount_le_add_three`🔗

`pullCount_le_add_three_ae`🔗

`some_sum_eq_zero`🔗

`pullCount_ae_le_add_two`🔗

`constSum`🔗

`constSum_lt_top`🔗

`expectation_pullCount_le'`🔗

`expectation_pullCount_le`🔗

`regret_le`🔗