Online.Bandit.Algorithms.Regret.BayesRegretTS

`ucb`🔗

DefinitionBandits.ClippedUCB.ucb

Details

Clipped upper confidence bound used in the regret analysis of Thompson sampling.

def

Bandits.ClippedUCB.ucb.{u_1} {K : ℕ} {Ω : Type u_1} (A : ℕ → Ω → Fin K)
  (R : ℕ → Ω → ℝ) (l u σ2 δ : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ
Bandits.ClippedUCB.ucb.{u_1} {K : ℕ}
  {Ω : Type u_1} (A : ℕ → Ω → Fin K)
  (R : ℕ → Ω → ℝ) (l u σ2 δ : ℝ)
  (a : Fin K) (n : ℕ) (ω : Ω) : ℝ

Code

noncomputable
def ucb (A : ℕ → Ω → Fin K) (R : ℕ → Ω → ℝ) (l u σ2 δ : ℝ) (a : Fin K) (n : ℕ) (ω : Ω) : ℝ :=
  if pullCount A a n ω = 0 then u
  else max l (min u (empMean A R a n ω + √(2 * σ2 * Real.log (1 / δ) / (pullCount A a n ω))))

Body uses (2)

Used by (12)

Actions: Source · Open Issue

`ucb_zero`🔗

LemmaBandits.ClippedUCB.ucb_zero

Details

No docstring.

theorem

Bandits.ClippedUCB.ucb_zero.{u_1} {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {a : Fin K} {ω : Ω} :
  ucb A R l u σ2 δ a 0 ω = u
Bandits.ClippedUCB.ucb_zero.{u_1} {K : ℕ}
  {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {a : Fin K} {ω : Ω} :
  ucb A R l u σ2 δ a 0 ω = u

Code

lemma ucb_zero {a : Fin K} {ω : Ω} : ucb A R l u σ2 δ a 0 ω = u

Type uses (1)

ucb

Body uses (3)

Used by (1)

integral_ucb_action_eq_integral_ucb_bestAction

Actions: Source · Open Issue

Proof

by
  simp [ucb]

`ucb_mem_Icc`🔗

LemmaBandits.ClippedUCB.ucb_mem_Icc

Details

No docstring.

theorem

Bandits.ClippedUCB.ucb_mem_Icc.{u_1} {K : ℕ} {l u σ2 δ : ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} (h : l ≤ u)
  {a : Fin K} {n : ℕ} {ω : Ω} : ucb A R l u σ2 δ a n ω ∈ Set.Icc l u
Bandits.ClippedUCB.ucb_mem_Icc.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  (h : l ≤ u) {a : Fin K} {n : ℕ}
  {ω : Ω} :
  ucb A R l u σ2 δ a n ω ∈ Set.Icc l u

Code

lemma ucb_mem_Icc (h : l ≤ u) {a : Fin K} {n : ℕ} {ω : Ω} :
    ucb A R l u σ2 δ a n ω ∈ Set.Icc l u

Type uses (1)

ucb

Body uses (2)

Used by (1)

sum_ucb_sub_mean_le

Actions: Source · Open Issue

Proof

by
  unfold ucb
  grind

`measurable_ucb`🔗

LemmaBandits.ClippedUCB.measurable_ucb

Details

No docstring.

theorem

Bandits.ClippedUCB.measurable_ucb.{u_1} {K : ℕ} {l u σ2 δ : ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} [MeasurableSpace Ω]
  {a : Fin K} {n : ℕ} (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t)) : Measurable (ucb A R l u σ2 δ a n)
Bandits.ClippedUCB.measurable_ucb.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [MeasurableSpace Ω] {a : Fin K} {n : ℕ}
  (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t)) :
  Measurable (ucb A R l u σ2 δ a n)

Code

lemma measurable_ucb [MeasurableSpace Ω] {a : Fin K} {n : ℕ} (hA : ∀ t, Measurable (A t))
    (hR : ∀ t, Measurable (R t)) : Measurable (ucb A R l u σ2 δ a n)

Type uses (1)

ucb

Body uses (4)

Used by (1)

measurable_uncurry_ucb_comp

Actions: Source · Open Issue

Proof

Measurable.ite (by measurability) (by fun_prop) (by fun_prop)

`measurable_uncurry_ucb_comp`🔗

LemmaBandits.ClippedUCB.measurable_uncurry_ucb_comp

Details

No docstring.

theorem

Bandits.ClippedUCB.measurable_uncurry_ucb_comp.{u_1} {K : ℕ}
  {l u σ2 δ : ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [MeasurableSpace Ω] (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t)) {f : Ω → Fin K} (hf : Measurable f)
  {g : Ω → ℕ} (hg : Measurable g) :
  Measurable fun ω => ucb A R l u σ2 δ (f ω) (g ω) ω
Bandits.ClippedUCB.measurable_uncurry_ucb_comp.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [MeasurableSpace Ω]
  (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t))
  {f : Ω → Fin K} (hf : Measurable f)
  {g : Ω → ℕ} (hg : Measurable g) :
  Measurable fun ω =>
    ucb A R l u σ2 δ (f ω) (g ω) ω

Code

lemma measurable_uncurry_ucb_comp [MeasurableSpace Ω] (hA : ∀ t, Measurable (A t))
    (hR : ∀ t, Measurable (R t)) {f : Ω → Fin K} (hf : Measurable f) {g : Ω → ℕ}
    (hg : Measurable g) : Measurable (fun ω ↦ ucb A R l u σ2 δ (f ω) (g ω) ω)

Type uses (1)

ucb

Body uses (1)

measurable_ucb

Used by (1)

integrable_uncurry_ucb_comp

Actions: Source · Open Issue

Proof

by
  change Measurable ((fun aω ↦ ucb A R l u σ2 δ aω.1 (g aω.2) aω.2) ∘ fun ω ↦ (f ω, ω))
  apply Measurable.comp _ (by fun_prop)
  apply measurable_from_prod_countable_right
  intro a
  change Measurable ((fun tω ↦ ucb A R l u σ2 δ a tω.1 tω.2) ∘ fun ω ↦ (g ω, ω))
  apply Measurable.comp _ (by fun_prop)
  exact measurable_from_prod_countable_right (fun _ ↦ measurable_ucb hA hR)

`integrable_uncurry_ucb_comp`🔗

LemmaBandits.ClippedUCB.integrable_uncurry_ucb_comp

Details

No docstring.

theorem

Bandits.ClippedUCB.integrable_uncurry_ucb_comp.{u_1} {K : ℕ}
  {l u σ2 δ : ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [MeasurableSpace Ω] (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t)) {f : Ω → Fin K} (hf : Measurable f)
  {g : Ω → ℕ} (hg : Measurable g) {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsFiniteMeasure P] :
  MeasureTheory.Integrable (fun ω => ucb A R l u σ2 δ (f ω) (g ω) ω) P
Bandits.ClippedUCB.integrable_uncurry_ucb_comp.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [MeasurableSpace Ω]
  (hA : ∀ (t : ℕ), Measurable (A t))
  (hR : ∀ (t : ℕ), Measurable (R t))
  {f : Ω → Fin K} (hf : Measurable f)
  {g : Ω → ℕ} (hg : Measurable g)
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsFiniteMeasure P] :
  MeasureTheory.Integrable
    (fun ω =>
      ucb A R l u σ2 δ (f ω) (g ω) ω)
    P

Code

lemma integrable_uncurry_ucb_comp [MeasurableSpace Ω] (hA : ∀ t, Measurable (A t))
    (hR : ∀ t, Measurable (R t)) {f : Ω → Fin K} (hf : Measurable f) {g : Ω → ℕ}
    (hg : Measurable g) {P : Measure Ω} [IsFiniteMeasure P] :
    Integrable (fun ω ↦ ucb A R l u σ2 δ (f ω) (g ω) ω) P

Type uses (1)

ucb

Body uses (3)

Used by (3)

Actions: Source · Open Issue

Proof

by
  refine ⟨(measurable_uncurry_ucb_comp hA hR hf hg).aestronglyMeasurable, ?_⟩
  apply HasFiniteIntegral.of_bounded (C := max |l| |u|)
  filter_upwards with ω
  rw [Real.norm_eq_abs]
  unfold ucb
  grind

`ucb'`🔗

DefinitionBandits.ClippedUCB.ucb'

Details

Clipped upper confidence bound (history-based version).

def

Bandits.ClippedUCB.ucb' {K : ℕ} (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ) (l u σ2 δ : ℝ) (a : Fin K) : ℝ
Bandits.ClippedUCB.ucb' {K : ℕ} (n : ℕ)
  (h : ↥(Finset.Iic n) → Fin K × ℝ)
  (l u σ2 δ : ℝ) (a : Fin K) : ℝ

Code

noncomputable
def ucb' (n : ℕ) (h : Iic n → Fin K × ℝ) (l u σ2 δ : ℝ) (a : Fin K) : ℝ :=
  if pullCount' n h a = 0 then u
  else max l (min u (empMean' n h a + √(2 * σ2 * Real.log (1 / δ) / (pullCount' n h a))))

Body uses (2)

Used by (3)

Actions: Source · Open Issue

`measurable_uncurry_ucb'`🔗

LemmaBandits.ClippedUCB.measurable_uncurry_ucb'

Details

No docstring.

theorem

Bandits.ClippedUCB.measurable_uncurry_ucb' {K : ℕ} {l u σ2 δ : ℝ}
  {n : ℕ} :
  Measurable fun p => ucb' n (Prod.fst p) l u σ2 δ (Prod.snd p)
Bandits.ClippedUCB.measurable_uncurry_ucb'
  {K : ℕ} {l u σ2 δ : ℝ} {n : ℕ} :
  Measurable fun p =>
    ucb' n (Prod.fst p) l u σ2 δ
      (Prod.snd p)

Code

lemma measurable_uncurry_ucb' {n : ℕ} :
    Measurable (fun p : (Iic n → Fin K × ℝ) × Fin K ↦ ucb' n p.1 l u σ2 δ p.2)

Type uses (1)

ucb'

Body uses (5)

Used by (1)

integral_ucb_action_eq_integral_ucb_bestAction

Actions: Source · Open Issue

Proof

Measurable.ite (by measurability) (by fun_prop) (by fun_prop)

`ucb_succ_eq_ucb'`🔗

LemmaBandits.ClippedUCB.ucb_succ_eq_ucb'

Details

No docstring.

theorem

Bandits.ClippedUCB.ucb_succ_eq_ucb'.{u_1} {K : ℕ} {l u σ2 δ : ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {a : Fin K} {n : ℕ}
  {ω : Ω} :
  ucb A R l u σ2 δ a (n + 1) ω =
    ucb' n (Learning.history A R n ω) l u σ2 δ a
Bandits.ClippedUCB.ucb_succ_eq_ucb'.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {a : Fin K} {n : ℕ} {ω : Ω} :
  ucb A R l u σ2 δ a (n + 1) ω =
    ucb' n (Learning.history A R n ω) l u
      σ2 δ a

Code

lemma ucb_succ_eq_ucb' {a : Fin K} {n : ℕ} {ω : Ω} :
    ucb A R l u σ2 δ a (n + 1) ω = ucb' n (history A R n ω) l u σ2 δ a

Type uses (3)

Body uses (6)

Used by (1)

integral_ucb_action_eq_integral_ucb_bestAction

Actions: Source · Open Issue

Proof

by
  have hp : pullCount A a (n + 1) ω = pullCount' n (history A R n ω) a :=
    pullCount_add_one_eq_pullCount'
  have he : empMean A R a (n + 1) ω = empMean' n (history A R n ω) a :=
    empMean_add_one_eq_empMean'
  rw [ucb, ucb', hp, he]

`sum_ucb_sub_mean_le`🔗

LemmaBandits.ClippedUCB.sum_ucb_sub_mean_le

Details

No docstring.

theorem

Bandits.ClippedUCB.sum_ucb_sub_mean_le.{u_1} {K : ℕ} {l u σ2 δ : ℝ}
  {Ω : Type u_1} {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ} {n : ℕ} {ω : Ω}
  (μ : Fin K → ℝ) (hμ : ∀ (a : Fin K), μ a ∈ Set.Icc l u) (hi : l ≤ u)
  (hc :
    ∀ s < n,
      Learning.pullCount A (A s ω) s ω ≠ 0 →
        Learning.empMean A R (A s ω) s ω - μ (A s ω) <
          √(2 * σ2 * Real.log (1 / δ) /
              ↑(Learning.pullCount A (A s ω) s ω))) :
  ∑ s ∈ Finset.range n, (ucb A R l u σ2 δ (A s ω) s ω - μ (A s ω)) ≤
    (u - l) * ↑K + 4 * √(2 * σ2 * Real.log (1 / δ) * ↑K * ↑n)
Bandits.ClippedUCB.sum_ucb_sub_mean_le.{u_1}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  {n : ℕ} {ω : Ω} (μ : Fin K → ℝ)
  (hμ : ∀ (a : Fin K), μ a ∈ Set.Icc l u)
  (hi : l ≤ u)
  (hc :
    ∀ s < n,
      Learning.pullCount A (A s ω) s ω ≠
          0 →
        Learning.empMean A R (A s ω) s ω -
            μ (A s ω) <
          √(2 * σ2 * Real.log (1 / δ) /
              ↑(Learning.pullCount A
                  (A s ω) s ω))) :
  ∑ s ∈ Finset.range n,
      (ucb A R l u σ2 δ (A s ω) s ω -
        μ (A s ω)) ≤
    (u - l) * ↑K +
      4 *
        √(2 * σ2 * Real.log (1 / δ) * ↑K *
            ↑n)

Code

lemma sum_ucb_sub_mean_le {n : ℕ} {ω : Ω} (μ : Fin K → ℝ) (hμ : ∀ a, μ a ∈ Set.Icc l u) (hi : l ≤ u)
    (hc : ∀ s < n, pullCount A (A s ω) s ω ≠ 0 → empMean A R (A s ω) s ω - μ (A s ω)
      < √(2 * σ2 * Real.log (1 / δ) / (pullCount A (A s ω) s ω))) :
    ∑ s ∈ range n, (ucb A R l u σ2 δ (A s ω) s ω - μ (A s ω))
      ≤ (u - l) * K + 4 * √(2 * σ2 * Real.log (1 / δ) * K * n)

Type uses (3)

Body uses (3)

Used by (1)

integral_sum_range_ucb_action_sub_actionMean_action_le

Actions: Source · Open Issue

Proof

by
  let S₀ := {s ∈ range n | pullCount A (A s ω) s ω = 0}
  let S₁ := {s ∈ range n | pullCount A (A s ω) s ω ≠ 0}
  have hu : S₀ ∪ S₁ = range n := filter_union_filter_not_eq _ _
  have hd : Disjoint S₀ S₁ := disjoint_filter_filter_not _ _ _
  rw [← hu, sum_union hd]
  gcongr
  · calc ∑ s ∈ S₀, (ucb A R l u σ2 δ (A s ω) s ω - μ (A s ω))
        ≤ ∑ s ∈ S₀, (u - l) :=
          have (s : ℕ) : ucb A R l u σ2 δ (A s ω) s ω ∈ Set.Icc l u := ucb_mem_Icc hi
          sum_le_sum (by grind)
      _ = ∑ s ∈ range n, if pullCount A (A s ω) s ω = 0 then (u - l) else 0 := by
          rw [sum_filter]
      _ = ∑ a, ∑ j ∈ range (pullCount A a n ω), if j = 0 then (u - l) else 0 :=
          sum_comp_pullCount (fun j => if j = 0 then (u - l) else 0) n ω
      _ ≤ ∑ a, (u - l) := by
          gcongr
          rw [sum_ite_eq']
          grind
      _ = (u - l) * K := by
          rw [Fin.sum_const, nsmul_eq_mul, mul_comm]
  · calc ∑ s ∈ S₁, (ucb A R l u σ2 δ (A s ω) s ω - μ (A s ω))
          ≤ ∑ s ∈ S₁, 2 * √(2 * σ2 * Real.log (1 / δ) / (pullCount A (A s ω) s ω)) := by
            gcongr with s hs
            unfold ucb
            have : 0 ≤ √(2 * σ2 * Real.log (1 / δ) / (pullCount A (A s ω) s ω)) := by positivity
            grind
        _ ≤ ∑ s ∈ range n, 2 * √(2 * σ2 * Real.log (1 / δ) / (pullCount A (A s ω) s ω)) :=
            sum_le_sum_of_subset_of_nonneg (filter_subset _ _) (fun _ _ _ => by positivity)
        _ = 2 * √(2 * σ2 * Real.log (1 / δ)) * ∑ s ∈ range n, (1 / √(pullCount A (A s ω) s ω)) := by
            rw [mul_sum]
            congr with s
            rw [Real.sqrt_div' _ (by positivity)]
            ring
        _ = 2 * √(2 * σ2 * Real.log (1 / δ)) *
              ∑ a, ∑ j ∈ range (pullCount A a n ω), (1 / √j) := by
            rw [sum_comp_pullCount (fun j => 1 / √j)]
        _ ≤ 2 * √(2 * σ2 * Real.log (1 / δ)) * (2 * ∑ a, √(pullCount A a n ω)) := by -- loose
            rw [mul_sum _ _ 2]
            gcongr with a
            by_cases ha : pullCount A a n ω = 0
            · simp [ha]
            · have hi := sum_inv_sqrt_le (Nat.pos_of_ne_zero ha)
              rw [sum_range_succ] at hi
              have : 0 ≤ 1 / √(pullCount A a n ω) := by positivity
              linarith
        _ ≤ 2 * √(2 * σ2 * Real.log (1 / δ)) * (2 * √(K * ∑ a, (pullCount A a n ω))) := by
            gcongr
            have h := sum_sqrt_le Finset.univ (fun a => Nat.cast_nonneg (pullCount A a n ω))
            rw [Finset.card_fin] at h
            exact_mod_cast h
        _ = 2 * √(2 * σ2 * Real.log (1 / δ)) * (2 * √(K * n)) := by
            congr
            exact sum_pullCount (ω := ω)
        _ = 4 * √(2 * σ2 * Real.log (1 / δ) * K * n) := by
            ring_nf
            rw [← Real.sqrt_mul' _ (by positivity)]
            ring_nf

`integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le`🔗

LemmaBandits.ClippedUCB.integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le

Details

No docstring.

theorem

Bandits.ClippedUCB.integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le.{u_1,
    u_2}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔] {E : Ω → 𝓔} {Q : MeasureTheory.Measure 𝓔}
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsBayesAlgEnvSeq Q κ alg E A R P) (hlu : l ≤ u)
  (hm : ∀ (e : 𝓔) (a : Fin K), ∫ (x : ℝ), id x ∂κ (e, a) ∈ Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) ⟨σ2, ⋯⟩ (κ (e, a)))
  (hδ : 0 < δ) (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ∑ t ∈ Finset.range n,
            (Learning.IsBayesAlgEnvSeq.actionMean κ E
                (Learning.IsBayesAlgEnvSeq.bestAction κ E ω) ω -
              ucb A R l u σ2 δ
                (Learning.IsBayesAlgEnvSeq.bestAction κ E ω) t ω))
        x ∂P ≤
    (u - l) * (↑n - 1) * ↑n * δ
Bandits.ClippedUCB.integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le.{u_1,
    u_2}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [Nonempty (Fin K)] [MeasurableSpace Ω]
  {𝓔 : Type u_2} [MeasurableSpace 𝓔]
  {E : Ω → 𝓔}
  {Q : MeasureTheory.Measure 𝓔}
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsBayesAlgEnvSeq Q κ alg E A
      R P)
  (hlu : l ≤ u)
  (hm :
    ∀ (e : 𝓔) (a : Fin K),
      ∫ (x : ℝ), id x ∂κ (e, a) ∈
        Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂κ (e, a))
        ⟨σ2, ⋯⟩ (κ (e, a)))
  (hδ : 0 < δ) (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ∑ t ∈ Finset.range n,
            (Learning.IsBayesAlgEnvSeq.actionMean
                κ E
                (Learning.IsBayesAlgEnvSeq.bestAction
                  κ E ω)
                ω -
              ucb A R l u σ2 δ
                (Learning.IsBayesAlgEnvSeq.bestAction
                  κ E ω)
                t ω))
        x ∂P ≤
    (u - l) * (↑n - 1) * ↑n * δ

Code

lemma integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le {alg : Algorithm (Fin K) ℝ}
    (h : IsBayesAlgEnvSeq Q κ alg E A R P)
    (hlu : l ≤ u) (hm : ∀ e a, (κ (e, a))[id] ∈ (Set.Icc l u)) (hσ2 : 0 < σ2)
    (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) ⟨σ2, hσ2.le⟩ (κ (e, a)))
    (hδ : 0 < δ) (n : ℕ) :
    P[fun ω ↦ ∑ t ∈ range n,
      (actionMean κ E (bestAction κ E ω) ω - ucb A R l u σ2 δ (bestAction κ E ω) t ω)] ≤
        (u - l) * (n - 1) * n * δ

Type uses (5)

Body uses (9)

Used by (1)

integral_regret_le

Actions: Source · Open Issue

Proof

by
  by_cases hn : n = 0
  · simp [hn]
  let F := {ω | ∃ t < n, pullCount A (bestAction κ E ω) t ω ≠ 0 ∧
    empMean A R (bestAction κ E ω) t ω - actionMean κ E (bestAction κ E ω) ω ≤
      -√(2 * σ2 * Real.log (1 / δ) / (pullCount A (bestAction κ E ω) t ω))}
  have := h.measurable_action
  have := h.measurable_param
  have := h.measurable_feedback
  have hF : MeasurableSet F := by measurability
  have : Integrable (fun ω ↦ actionMean κ E (bestAction κ E ω) ω) P :=
      IsBayesAlgEnvSeq.integrable_uncurry_actionMean_comp (by fun_prop) (by fun_prop) hm
  calc
    _ ≤ ∫ ω in F, ∑ t ∈ range n,
          (actionMean κ E (bestAction κ E ω) ω - ucb A R l u σ2 δ (bestAction κ E ω) t ω) ∂P := by
        rw [← integral_add_compl hF (by fun_prop)]
        apply add_le_of_nonpos_right
        apply setIntegral_nonpos hF.compl
        intro ω hω
        apply sum_nonpos
        intro t ht
        rw [Set.mem_compl_iff, Set.mem_setOf_eq] at hω
        push Not at hω
        grind [hω t (mem_range.mp ht), ucb, actionMean]
    _ ≤ ∫ ω in F, ∑ t ∈ range n, (u - l) ∂P := by
        apply setIntegral_mono_on (Integrable.integrableOn (by fun_prop))
          (Integrable.integrableOn (by fun_prop)) hF
        intro ω hω
        apply sum_le_sum
        intro t ht
        grind [actionMean, ucb]
    _ = P.real F * (n * (u - l)) := by
        simp_rw [sum_const, card_range, nsmul_eq_mul, setIntegral_const, smul_eq_mul]
    _ ≤ ((n - 1) * δ) * (n * (u - l)) := by
        gcongr
        have : (1 : ℝ) ≤ n := by simp [Nat.one_le_iff_ne_zero, hn]
        apply ENNReal.toReal_le_of_le_ofReal (by nlinarith)
        exact h.prob_empMean_bestAction_sub_actionMean_le_le hσ2 hs hδ n
    _ = _ := by
      ring

`integral_sum_range_ucb_action_sub_actionMean_action_le`🔗

LemmaBandits.ClippedUCB.integral_sum_range_ucb_action_sub_actionMean_action_le

Details

No docstring.

theorem

Bandits.ClippedUCB.integral_sum_range_ucb_action_sub_actionMean_action_le.{u_1,
    u_2}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} [Nonempty (Fin K)] [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔] {E : Ω → 𝓔} {Q : MeasureTheory.Measure 𝓔}
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h : Learning.IsBayesAlgEnvSeq Q κ alg E A R P) (hlu : l ≤ u)
  (hm : ∀ (e : 𝓔) (a : Fin K), ∫ (x : ℝ), id x ∂κ (e, a) ∈ Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) ⟨σ2, ⋯⟩ (κ (e, a)))
  (hδ : 0 < δ) (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ∑ t ∈ Finset.range n,
            (ucb A R l u σ2 δ (A t ω) t ω -
              Learning.IsBayesAlgEnvSeq.actionMean κ E (A t ω) ω))
        x ∂P ≤
    (u - l) * ↑K + 4 * √(2 * σ2 * Real.log (1 / δ) * ↑K * ↑n) +
      (u - l) * ↑K * (↑n - 1) * ↑n * δ
Bandits.ClippedUCB.integral_sum_range_ucb_action_sub_actionMean_action_le.{u_1,
    u_2}
  {K : ℕ} {l u σ2 δ : ℝ} {Ω : Type u_1}
  {A : ℕ → Ω → Fin K} {R : ℕ → Ω → ℝ}
  [Nonempty (Fin K)] [MeasurableSpace Ω]
  {𝓔 : Type u_2} [MeasurableSpace 𝓔]
  {E : Ω → 𝓔}
  {Q : MeasureTheory.Measure 𝓔}
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm (Fin K) ℝ}
  (h :
    Learning.IsBayesAlgEnvSeq Q κ alg E A
      R P)
  (hlu : l ≤ u)
  (hm :
    ∀ (e : 𝓔) (a : Fin K),
      ∫ (x : ℝ), id x ∂κ (e, a) ∈
        Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂κ (e, a))
        ⟨σ2, ⋯⟩ (κ (e, a)))
  (hδ : 0 < δ) (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ∑ t ∈ Finset.range n,
            (ucb A R l u σ2 δ (A t ω) t
                ω -
              Learning.IsBayesAlgEnvSeq.actionMean
                κ E (A t ω) ω))
        x ∂P ≤
    (u - l) * ↑K +
        4 *
          √(2 * σ2 * Real.log (1 / δ) *
                ↑K *
              ↑n) +
      (u - l) * ↑K * (↑n - 1) * ↑n * δ

Code

lemma integral_sum_range_ucb_action_sub_actionMean_action_le {alg : Algorithm (Fin K) ℝ}
    (h : IsBayesAlgEnvSeq Q κ alg E A R P)
    (hlu : l ≤ u) (hm : ∀ e a, (κ (e, a))[id] ∈ (Set.Icc l u)) (hσ2 : 0 < σ2)
    (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) ⟨σ2, hσ2.le⟩ (κ (e, a)))
    (hδ : 0 < δ) (n : ℕ) :
    P[fun ω ↦ ∑ t ∈ range n, (ucb A R l u σ2 δ (A t ω) t ω - actionMean κ E (A t ω) ω)] ≤
      (u - l) * K + 4 * √(2 * σ2 * Real.log (1 / δ) * K * n) + (u - l) * K * (n - 1) * n * δ

Type uses (4)

Body uses (9)

Used by (1)

integral_regret_le

Actions: Source · Open Issue

Proof

by
  by_cases hn : n = 0
  · simp [hn, hlu, mul_nonneg]
  let F := {ω | ∃ t < n, ∃ a, pullCount A a t ω ≠ 0 ∧
    √(2 * σ2 * Real.log (1 / δ) / pullCount A a t ω) ≤ empMean A R a t ω - actionMean κ E a ω}
  have := h.measurable_action
  have := h.measurable_param
  have := h.measurable_feedback
  have hF : MeasurableSet F := by measurability
  have : ∀ t, Integrable (fun ω ↦ actionMean κ E (A t ω) ω) P :=
    fun t ↦ IsBayesAlgEnvSeq.integrable_uncurry_actionMean_comp (by fun_prop) (by fun_prop) hm
  calc
    _ ≤ (∫ ω in F, ∑ t ∈ range n, (u - l) ∂P) +
          ∫ ω in Fᶜ, (u - l) * K + 4 * √(2 * σ2 * Real.log (1 / δ) * K * n) ∂P := by
        rw [← integral_add_compl hF (by fun_prop)]
        apply add_le_add
        · apply setIntegral_mono_on (Integrable.integrableOn (by fun_prop))
            (Integrable.integrableOn (by fun_prop)) hF
          intro ω hω
          apply sum_le_sum
          intro t ht
          grind [ucb, actionMean]
        · apply setIntegral_mono_on (Integrable.integrableOn (by fun_prop))
            (Integrable.integrableOn (by fun_prop)) hF.compl
          intro ω hω
          rw [Set.mem_compl_iff, Set.mem_setOf_eq] at hω
          push Not at hω
          exact sum_ucb_sub_mean_le (fun a ↦ (κ (E ω, a))[id]) (hm (E ω)) hlu
            (fun t ht hpc ↦ hω t ht (A t ω) hpc)
    _ = P.real F * (n * (u - l)) +
          P.real Fᶜ * ((u - l) * K + 4 * √(2 * σ2 * Real.log (1 / δ) * K * n)) := by
        simp_rw [sum_const, card_range, nsmul_eq_mul, setIntegral_const, smul_eq_mul]
    _ ≤ (K * (n - 1) * δ) * (n * (u - l)) +
          1 * ((u - l) * K + 4 * √(2 * σ2 * Real.log (1 / δ) * K * n)) := by
        have : 0 ≤ u - l := sub_nonneg.2 hlu
        gcongr
        · have : (0 : ℝ) ≤ n - 1 := by simp [Nat.one_le_iff_ne_zero, hn]
          apply ENNReal.toReal_le_of_le_ofReal (by positivity)
          exact h.prob_empMean_sub_actionMean_ge_le hσ2 hs hδ n
        · exact measureReal_le_one
    _ = _ := by
        ring

`integral_ucb_action_eq_integral_ucb_bestAction`🔗

LemmaBandits.TS.integral_ucb_action_eq_integral_ucb_bestAction

Details

No docstring.

theorem

Bandits.TS.integral_ucb_action_eq_integral_ucb_bestAction.{u_1, u_2}
  {K : ℕ} [Nonempty (Fin K)] {l u σ2 δ : ℝ} {Ω : Type u_1}
  [MeasurableSpace Ω] {𝓔 : Type u_2} [MeasurableSpace 𝓔]
  [StandardBorelSpace 𝓔] [Nonempty 𝓔] {Q : MeasureTheory.Measure 𝓔}
  [MeasureTheory.IsProbabilityMeasure Q]
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] (hK : 0 < K)
  (h : Learning.IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P)
  (n : ℕ) :
  ∫ (x : Ω), (fun ω => ClippedUCB.ucb A R l u σ2 δ (A n ω) n ω) x ∂P =
    ∫ (x : Ω),
      (fun ω =>
          ClippedUCB.ucb A R l u σ2 δ
            (Learning.IsBayesAlgEnvSeq.bestAction κ E ω) n ω)
        x ∂P
Bandits.TS.integral_ucb_action_eq_integral_ucb_bestAction.{u_1,
    u_2}
  {K : ℕ} [Nonempty (Fin K)]
  {l u σ2 δ : ℝ} {Ω : Type u_1}
  [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔]
  [StandardBorelSpace 𝓔] [Nonempty 𝓔]
  {Q : MeasureTheory.Measure 𝓔}
  [MeasureTheory.IsProbabilityMeasure Q]
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  (hK : 0 < K)
  (h :
    Learning.IsBayesAlgEnvSeq Q κ
      (tsAlgorithm hK Q κ) E A R P)
  (n : ℕ) :
  ∫ (x : Ω),
      (fun ω =>
          ClippedUCB.ucb A R l u σ2 δ
            (A n ω) n ω)
        x ∂P =
    ∫ (x : Ω),
      (fun ω =>
          ClippedUCB.ucb A R l u σ2 δ
            (Learning.IsBayesAlgEnvSeq.bestAction
              κ E ω)
            n ω)
        x ∂P

Code

lemma integral_ucb_action_eq_integral_ucb_bestAction (hK : 0 < K)
    (h : IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P) (n : ℕ) :
    P[fun ω ↦ ucb A R l u σ2 δ (A n ω) n ω] =
      P[fun ω ↦ ucb A R l u σ2 δ (bestAction κ E ω) n ω]

Type uses (4)

Body uses (10)

Used by (1)

integral_regret_eq_add

Actions: Source · Open Issue

Proof

by
  have := h.measurable_action
  have := h.measurable_param
  have := h.measurable_feedback
  by_cases hn : n = 0
  · simp [hn]
  obtain ⟨n, rfl⟩ := Nat.exists_eq_succ_of_ne_zero hn
  let uc (ha : (Iic n → Fin K × ℝ) × Fin K) := ucb' n ha.1 l u σ2 δ ha.2
  calc
    _  = P[fun ω ↦ uc (history A R n ω, A (n + 1) ω)] := by
        simp_rw [uc, ucb_succ_eq_ucb']
    _ = ∫ ha, uc ha ∂P.map (fun ω ↦ (history A R n ω, A (n + 1) ω)) := by
        rw [← integral_map (by fun_prop) (by fun_prop)]
    _ = ∫ ha, uc ha ∂P.map (fun ω ↦ (history A R n ω, bestAction κ E ω)) := by
        rw [← compProd_map_condDistrib (by fun_prop), ← compProd_map_condDistrib (by fun_prop),
            Measure.compProd_congr (hasCondDistrib_action hK h n).condDistrib_eq]
    _ = P[fun ω ↦ ucb A R l u σ2 δ (bestAction κ E ω) (n + 1) ω] := by
        rw [integral_map (by fun_prop) (by fun_prop)]
        simp_rw [uc, ucb_succ_eq_ucb']

`integral_regret_eq_add`🔗

LemmaBandits.TS.integral_regret_eq_add

Details

No docstring.

theorem

Bandits.TS.integral_regret_eq_add.{u_1, u_2} {K : ℕ} [Nonempty (Fin K)]
  {l u σ2 δ : ℝ} {Ω : Type u_1} [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔] [StandardBorelSpace 𝓔] [Nonempty 𝓔]
  {Q : MeasureTheory.Measure 𝓔} [MeasureTheory.IsProbabilityMeasure Q]
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] (hK : 0 < K)
  (h : Learning.IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P)
  (hm : ∀ (e : 𝓔) (a : Fin K), ∫ (x : ℝ), id x ∂κ (e, a) ∈ Set.Icc l u)
  (n : ℕ) :
  ∫ (x : Ω), Learning.IsBayesAlgEnvSeq.regret κ E A n x ∂P =
    ∫ (x : Ω),
        (fun ω =>
            ∑ t ∈ Finset.range n,
              (Learning.IsBayesAlgEnvSeq.actionMean κ E
                  (Learning.IsBayesAlgEnvSeq.bestAction κ E ω) ω -
                ClippedUCB.ucb A R l u σ2 δ
                  (Learning.IsBayesAlgEnvSeq.bestAction κ E ω) t ω))
          x ∂P +
      ∫ (x : Ω),
        (fun ω =>
            ∑ t ∈ Finset.range n,
              (ClippedUCB.ucb A R l u σ2 δ (A t ω) t ω -
                Learning.IsBayesAlgEnvSeq.actionMean κ E (A t ω) ω))
          x ∂P
Bandits.TS.integral_regret_eq_add.{u_1,
    u_2}
  {K : ℕ} [Nonempty (Fin K)]
  {l u σ2 δ : ℝ} {Ω : Type u_1}
  [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔]
  [StandardBorelSpace 𝓔] [Nonempty 𝓔]
  {Q : MeasureTheory.Measure 𝓔}
  [MeasureTheory.IsProbabilityMeasure Q]
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  (hK : 0 < K)
  (h :
    Learning.IsBayesAlgEnvSeq Q κ
      (tsAlgorithm hK Q κ) E A R P)
  (hm :
    ∀ (e : 𝓔) (a : Fin K),
      ∫ (x : ℝ), id x ∂κ (e, a) ∈
        Set.Icc l u)
  (n : ℕ) :
  ∫ (x : Ω),
      Learning.IsBayesAlgEnvSeq.regret κ E
        A n x ∂P =
    ∫ (x : Ω),
        (fun ω =>
            ∑ t ∈ Finset.range n,
              (Learning.IsBayesAlgEnvSeq.actionMean
                  κ E
                  (Learning.IsBayesAlgEnvSeq.bestAction
                    κ E ω)
                  ω -
                ClippedUCB.ucb A R l u σ2
                  δ
                  (Learning.IsBayesAlgEnvSeq.bestAction
                    κ E ω)
                  t ω))
          x ∂P +
      ∫ (x : Ω),
        (fun ω =>
            ∑ t ∈ Finset.range n,
              (ClippedUCB.ucb A R l u σ2 δ
                  (A t ω) t ω -
                Learning.IsBayesAlgEnvSeq.actionMean
                  κ E (A t ω) ω))
          x ∂P

Code

lemma integral_regret_eq_add (hK : 0 < K) (h : IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P)
    (hm : ∀ e a, (κ (e, a))[id] ∈ (Set.Icc l u)) (n : ℕ) :
    P[IsBayesAlgEnvSeq.regret κ E A n] =
      P[fun ω ↦ ∑ t ∈ range n,
        (actionMean κ E (bestAction κ E ω) ω - ucb A R l u σ2 δ (bestAction κ E ω) t ω)] +
      P[fun ω ↦ ∑ t ∈ range n,
        (ucb A R l u σ2 δ (A t ω) t ω - actionMean κ E (A t ω) ω)]

Type uses (6)

Body uses (8)

Used by (1)

integral_regret_le

Actions: Source · Open Issue

Proof

by
  have hua (t : ℕ) : Integrable (fun ω ↦ ucb A R l u σ2 δ (A t ω) t ω) P :=
    integrable_uncurry_ucb_comp h.measurable_action h.measurable_feedback (h.measurable_action t)
      measurable_const
  have hub (t : ℕ) : Integrable (fun ω ↦ ucb A R l u σ2 δ (bestAction κ E ω) t ω) P :=
    integrable_uncurry_ucb_comp h.measurable_action h.measurable_feedback
      (IsBayesAlgEnvSeq.measurable_bestAction h.measurable_param) measurable_const
  have haa (t : ℕ) : Integrable (fun ω ↦ actionMean κ E (A t ω) ω) P :=
    IsBayesAlgEnvSeq.integrable_uncurry_actionMean_comp h.measurable_param
      (h.measurable_action t) hm
  have hab : Integrable (fun ω ↦ actionMean κ E (bestAction κ E ω) ω) P :=
      IsBayesAlgEnvSeq.integrable_uncurry_actionMean_comp h.measurable_param
        (IsBayesAlgEnvSeq.measurable_bestAction h.measurable_param) hm
  calc
    _  = (∑ t ∈ range n, ∫ ω, actionMean κ E (bestAction κ E ω) ω ∂P) -
            ∑ t ∈ range n, ∫ ω, actionMean κ E (A t ω) ω ∂P := by
        simp_rw [IsBayesAlgEnvSeq.regret_eq_sum_gap, IsBayesAlgEnvSeq.gap_eq_sub]
        rw [integral_finsetSum _ (by fun_prop), ← Finset.sum_sub_distrib]
        simp_rw [integral_sub hab (haa _)]
    _ = ((∑ t ∈ range n, ∫ ω, actionMean κ E (bestAction κ E ω) ω ∂P) -
            ∑ t ∈ range n, ∫ ω, ucb A R l u σ2 δ (bestAction κ E ω) t ω ∂P) +
          ((∑ t ∈ range n, ∫ ω, ucb A R l u σ2 δ (A t ω) t ω ∂P) -
            ∑ t ∈ range n, ∫ ω, actionMean κ E (A t ω) ω ∂P) := by
        simp [integral_ucb_action_eq_integral_ucb_bestAction hK h]
    _ = (∑ t ∈ range n, ∫ ω, actionMean κ E (bestAction κ E ω) ω -
              ucb A R l u σ2 δ (bestAction κ E ω) t ω ∂P) +
          ∑ t ∈ range n, ∫ ω, ucb A R l u σ2 δ (A t ω) t ω -
            actionMean κ E (A t ω) ω ∂P := by
        rw [← Finset.sum_sub_distrib, ← Finset.sum_sub_distrib]
        simp_rw [← integral_sub hab (hub _), ← integral_sub (hua _) (haa _)]
    _ = _ := by
        rw [← integral_finsetSum _ (by fun_prop), ← integral_finsetSum _ (by fun_prop)]

`integral_regret_le`🔗

TheoremBandits.TS.integral_regret_le

Details

If Thompson sampling has the correct prior over environments and every environment has K actions, each of which has a corresponding reward between l and u that is sub-Gaussian with variance proxy σ2 after its mean is subtracted, then the Bayesian regret at time n is at most (2 * K + 1) * (u - l) + 8 * √(σ2 * K * n * Real.log n).

theorem

Bandits.TS.integral_regret_le.{u_1, u_2} {K : ℕ} [Nonempty (Fin K)]
  {l u σ2 : ℝ} {Ω : Type u_1} [MeasurableSpace Ω] {𝓔 : Type u_2}
  [MeasurableSpace 𝓔] [StandardBorelSpace 𝓔] [Nonempty 𝓔]
  {Q : MeasureTheory.Measure 𝓔} [MeasureTheory.IsProbabilityMeasure Q]
  {κ : ProbabilityTheory.Kernel (𝓔 × Fin K) ℝ}
  [ProbabilityTheory.IsMarkovKernel κ] {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] (hK : 0 < K)
  (h : Learning.IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P)
  (hlu : l ≤ u)
  (hm : ∀ (e : 𝓔) (a : Fin K), ∫ (x : ℝ), id x ∂κ (e, a) ∈ Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x => x - ∫ (x : ℝ), id x ∂κ (e, a)) ⟨σ2, ⋯⟩ (κ (e, a)))
  (n : ℕ) :
  ∫ (x : Ω), Learning.IsBayesAlgEnvSeq.regret κ E A n x ∂P ≤
    (2 * ↑K + 1) * (u - l) + 8 * √(σ2 * ↑K * ↑n * Real.log ↑n)
Bandits.TS.integral_regret_le.{u_1, u_2}
  {K : ℕ} [Nonempty (Fin K)] {l u σ2 : ℝ}
  {Ω : Type u_1} [MeasurableSpace Ω]
  {𝓔 : Type u_2} [MeasurableSpace 𝓔]
  [StandardBorelSpace 𝓔] [Nonempty 𝓔]
  {Q : MeasureTheory.Measure 𝓔}
  [MeasureTheory.IsProbabilityMeasure Q]
  {κ :
    ProbabilityTheory.Kernel (𝓔 × Fin K)
      ℝ}
  [ProbabilityTheory.IsMarkovKernel κ]
  {E : Ω → 𝓔} {A : ℕ → Ω → Fin K}
  {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  (hK : 0 < K)
  (h :
    Learning.IsBayesAlgEnvSeq Q κ
      (tsAlgorithm hK Q κ) E A R P)
  (hlu : l ≤ u)
  (hm :
    ∀ (e : 𝓔) (a : Fin K),
      ∫ (x : ℝ), id x ∂κ (e, a) ∈
        Set.Icc l u)
  (hσ2 : 0 < σ2)
  (hs :
    ∀ (e : 𝓔) (a : Fin K),
      ProbabilityTheory.HasSubgaussianMGF
        (fun x =>
          x - ∫ (x : ℝ), id x ∂κ (e, a))
        ⟨σ2, ⋯⟩ (κ (e, a)))
  (n : ℕ) :
  ∫ (x : Ω),
      Learning.IsBayesAlgEnvSeq.regret κ E
        A n x ∂P ≤
    (2 * ↑K + 1) * (u - l) +
      8 * √(σ2 * ↑K * ↑n * Real.log ↑n)

Code

theorem integral_regret_le (hK : 0 < K) (h : IsBayesAlgEnvSeq Q κ (tsAlgorithm hK Q κ) E A R P)
    (hlu : l ≤ u) (hm : ∀ e a, (κ (e, a))[id] ∈ (Set.Icc l u)) (hσ2 : 0 < σ2)
    (hs : ∀ e a, HasSubgaussianMGF (fun x ↦ x - (κ (e, a))[id]) ⟨σ2, hσ2.le⟩ (κ (e, a))) (n : ℕ) :
    P[IsBayesAlgEnvSeq.regret κ E A n]
      ≤ (2 * K + 1) * (u - l) + 8 * √(σ2 * K * n * Real.log n)

Type uses (3)

Body uses (6)

Actions: Source · Open Issue

Proof

by
  by_cases hn : n = 0
  · simp [hn, IsBayesAlgEnvSeq.regret, Bandits.regret]
    nlinarith
  have hδ : (0 : ℝ) < 1 / n ^ 2 := by positivity
  calc P[IsBayesAlgEnvSeq.regret κ E A n]
      = _ :=
        integral_regret_eq_add hK h hm n
    _ ≤ _ :=
        add_le_add
          (integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le h hlu hm hσ2 hs hδ n)
          (integral_sum_range_ucb_action_sub_actionMean_action_le h hlu hm hσ2 hs hδ n)
    _ = K * (u - l) + (K + 1) * (u - l) * ((n - 1) / n)
          + 4 * √((2 : ℝ) ^ 2 * (σ2 * K * n * Real.log n)) := by
        field_simp
        rw [Real.log_pow]
        ring_nf
    _ = K * (u - l) + (K + 1) * (u - l) * ((n - 1) / n) + 8 * √(σ2 * K * n * Real.log n) := by
        rw [Real.sqrt_mul (by positivity), Real.sqrt_sq (by norm_num)]
        ring
    _ ≤ K * (u - l) + (K + 1) * (u - l) * 1 + 8 * √(σ2 * K * n * Real.log n) := by -- loose
        have : 0 ≤ u - l := sub_nonneg.2 hlu
        gcongr
        rw [div_le_one (by positivity)]
        linarith
    _ = _ := by
        ring

LeanMachineLearning exposition

2.6. Online.Bandit.Algorithms.Regret.BayesRegretTS🔗

`ucb`🔗

`ucb_zero`🔗

`ucb_mem_Icc`🔗

`measurable_ucb`🔗

`measurable_uncurry_ucb_comp`🔗

`integrable_uncurry_ucb_comp`🔗

`ucb'`🔗

`measurable_uncurry_ucb'`🔗

`ucb_succ_eq_ucb'`🔗

`sum_ucb_sub_mean_le`🔗

`integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le`🔗

`integral_sum_range_ucb_action_sub_actionMean_action_le`🔗

`integral_ucb_action_eq_integral_ucb_bestAction`🔗

`integral_regret_eq_add`🔗

`integral_regret_le`🔗

2.6. Online.Bandit.Algorithms.Regret.BayesRegretTS🔗

ucb🔗

ucb_zero🔗

ucb_mem_Icc🔗

measurable_ucb🔗

measurable_uncurry_ucb_comp🔗

integrable_uncurry_ucb_comp🔗

ucb'🔗

measurable_uncurry_ucb'🔗

ucb_succ_eq_ucb'🔗

sum_ucb_sub_mean_le🔗

integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le🔗

integral_sum_range_ucb_action_sub_actionMean_action_le🔗

integral_ucb_action_eq_integral_ucb_bestAction🔗

integral_regret_eq_add🔗

integral_regret_le🔗

`ucb`🔗

`ucb_zero`🔗

`ucb_mem_Icc`🔗

`measurable_ucb`🔗

`measurable_uncurry_ucb_comp`🔗

`integrable_uncurry_ucb_comp`🔗

`ucb'`🔗

`measurable_uncurry_ucb'`🔗

`ucb_succ_eq_ucb'`🔗

`sum_ucb_sub_mean_le`🔗

`integral_sum_range_actionMean_bestAction_sub_ucb_bestAction_le`🔗

`integral_sum_range_ucb_action_sub_actionMean_action_le`🔗

`integral_ucb_action_eq_integral_ucb_bestAction`🔗

`integral_regret_eq_add`🔗

`integral_regret_le`🔗