Online.Bandit.RewardByCountMeasure

`hasLaw_Z`🔗

LemmaBandits.hasLaw_Z

Details

No docstring.

theorem

Bandits.hasLaw_Z.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] (a : 𝓐) (m : ℕ) :
  ProbabilityTheory.HasLaw (fun ω => Prod.snd ω m a) (ν a)
    (MeasureTheory.Measure.prod P (streamMeasure ν))
Bandits.hasLaw_Z.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  (a : 𝓐) (m : ℕ) :
  ProbabilityTheory.HasLaw
    (fun ω => Prod.snd ω m a) (ν a)
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))

Code

lemma hasLaw_Z (a : 𝓐) (m : ℕ) :
  HasLaw (fun ω ↦ ω.2 m a) (ν a) 𝔓 where
  map_eq

Type uses (1)

streamMeasure

Body uses (1)

instIsProbabilityMeasureForallNatForallStreamMeasureOfIsMarkovKernel

Used by (1)

condDistrib_rewardByCount_stepsUntil

Actions: Source · Open Issue

Proof

by
    calc (𝔓).map (fun ω ↦ ω.2 m a)
    _ = ((𝔓).snd).map (fun ω ↦ ω m a) := by
      rw [Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
      rfl
    _ = (streamMeasure ν).map (fun ω ↦ ω m a) := by simp
    _ = ((Measure.infinitePi fun _ ↦ Measure.infinitePi ν).map (fun ω ↦ ω m)).map
        (fun ω ↦ ω a) := by
      rw [streamMeasure, Measure.map_map (by fun_prop) (by fun_prop)]
      rfl
    _ = ν a := by simp_rw [(measurePreserving_eval_infinitePi _ _).map_eq]

`term𝓛[_|_;_]`🔗

DefinitionBandits.«term𝓛[_|_;_]»

Details

Law of Y conditioned on the event s.

def

Bandits.«term𝓛[_|_;_]» : Lean.ParserDescr
Bandits.«term𝓛[_|_;_]» : Lean.ParserDescr

Code

notation "𝓛[" Y " | " s "; " μ "]" => Measure.map Y (μ[|s])

Actions: Source · Open Issue

`term𝓛[_|_In_;_]`🔗

DefinitionBandits.«term𝓛[_|_In_;_]»

Details

Law of Y conditioned on the event that X is in s.

def

Bandits.«term𝓛[_|_In_;_]» : Lean.ParserDescr
Bandits.«term𝓛[_|_In_;_]» :
  Lean.ParserDescr

Code

notation "𝓛[" Y " | " X " in " s "; " μ "]" => Measure.map Y (μ[|X ⁻¹' s])

Actions: Source · Open Issue

`term𝓛[_|_←_;_]`🔗

DefinitionBandits.«term𝓛[_|_←_;_]»

Details

Law of Y conditioned on the event that X equals x.

def

Bandits.«term𝓛[_|_←_;_]» : Lean.ParserDescr
Bandits.«term𝓛[_|_←_;_]» :
  Lean.ParserDescr

Code

notation "𝓛[" Y " | " X " ← " x "; " μ "]" => Measure.map Y (μ[|X ⁻¹' {x}])

Actions: Source · Open Issue

`condDistrib_reward''`🔗

LemmaBandits.condDistrib_reward''

Details

No docstring.

theorem

Bandits.condDistrib_reward''.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (n : ℕ) :
  ⇑𝓛[fun ω => R n (Prod.fst ω) | fun ω => A n (Prod.fst ω);
        MeasureTheory.Measure.prod P
          (streamMeasure
            ν)] =ᵐ[MeasureTheory.Measure.map (fun ω => A n (Prod.fst ω))
      (MeasureTheory.Measure.prod P (streamMeasure ν))]
    ⇑ν
Bandits.condDistrib_reward''.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (n : ℕ) :
  ⇑𝓛[fun ω => R n (Prod.fst ω) | fun ω =>
        A n (Prod.fst ω);
        MeasureTheory.Measure.prod P
          (streamMeasure
            ν)] =ᵐ[MeasureTheory.Measure.map
      (fun ω => A n (Prod.fst ω))
      (MeasureTheory.Measure.prod P
        (streamMeasure ν))]
    ⇑ν

Code

lemma condDistrib_reward'' [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (n : ℕ) :
    𝓛[fun ω ↦ R n ω.1 | fun ω ↦ A n ω.1; 𝔓] =ᵐ[(𝔓).map (fun ω ↦ A n ω.1)] ν

Type uses (5)

Body uses (2)

Used by (1)

reward_cond_action

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  have h_ra' : 𝓛[R n | A n; P] =ᵐ[P.map (A n)] ν := h.condDistrib_feedback_stationaryEnv n
  have h_law : (𝔓).map (fun ω ↦ A n ω.1) = P.map (A n) := by
    change ((𝔓).map (A n ∘ Prod.fst)) = _
    rw [← Measure.map_map (by fun_prop) (by fun_prop), ← Measure.fst, Measure.fst_prod]
  rw [h_law]
  have h_prod : 𝓛[fun ω ↦ R n ω.1 | fun ω ↦ A n ω.1; 𝔓]
      =ᵐ[P.map (A n)] 𝓛[R n | A n; P] :=
    condDistrib_fst_prod _ (by fun_prop) _
  filter_upwards [h_ra', h_prod] with ω h_eq h_prod
  rw [h_prod, h_eq]

`reward_cond_action`🔗

LemmaBandits.reward_cond_action

Details

No docstring.

theorem

Bandits.reward_cond_action.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n : ℕ)
  (hμa :
    (MeasureTheory.Measure.map (fun ω => A n (Prod.fst ω))
          (MeasureTheory.Measure.prod P (streamMeasure ν)))
        {a} ≠
      0) :
  𝓛[fun ω => R n (Prod.fst ω) | fun ω => A n (Prod.fst ω) in {a};
      MeasureTheory.Measure.prod P (streamMeasure ν)] =
    ν a
Bandits.reward_cond_action.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} {A : ℕ → Ω → 𝓐}
  {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n : ℕ)
  (hμa :
    (MeasureTheory.Measure.map
          (fun ω => A n (Prod.fst ω))
          (MeasureTheory.Measure.prod P
            (streamMeasure ν)))
        {a} ≠
      0) :
  𝓛[fun ω => R n (Prod.fst ω) | fun ω =>
      A n (Prod.fst ω) in {a};
      MeasureTheory.Measure.prod P
        (streamMeasure ν)] =
    ν a

Code

lemma reward_cond_action [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (n : ℕ)
    (hμa : (𝔓).map (fun ω ↦ A n ω.1) {a} ≠ 0) :
    𝓛[fun ω ↦ R n ω.1 | fun ω ↦ A n ω.1 ← a; 𝔓] = ν a

Type uses (4)

Body uses (5)

Used by (1)

reward_cond_stepsUntil

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  have h_ra : 𝓛[fun ω ↦ R n ω.1 | fun ω ↦ A n ω.1; 𝔓] =ᵐ[(𝔓).map (fun ω ↦ A n ω.1)] ν :=
    condDistrib_reward'' h n
  have h_eq := condDistrib_ae_eq_cond (μ := 𝔓)
    (X := fun ω ↦ A n ω.1) (Y := fun ω ↦ R n ω.1) (by fun_prop) (by fun_prop)
  rw [Filter.EventuallyEq, ae_iff_of_countable] at h_ra h_eq
  specialize h_ra a hμa
  specialize h_eq a hμa
  rw [h_ra] at h_eq
  exact h_eq.symm

`condIndepFun_reward_stepsUntil_action'`🔗

LemmaBandits.condIndepFun_reward_stepsUntil_action'

Details

No docstring.

theorem

Bandits.condIndepFun_reward_stepsUntil_action'.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap (A n) inferInstance) ⋯ (R n)
    (Set.indicator {ω | Learning.stepsUntil A a m ω = ↑n} fun x => 1) P
Bandits.condIndepFun_reward_stepsUntil_action'.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap (A n)
      inferInstance)
    ⋯ (R n)
    (Set.indicator
      {ω |
        Learning.stepsUntil A a m ω = ↑n}
      fun x => 1)
    P

Code

lemma condIndepFun_reward_stepsUntil_action' [StandardBorelSpace Ω]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m n : ℕ) :
    R n ⟂ᵢ[A n, h.measurable_action n; P] {ω | stepsUntil A a m ω = ↑n}.indicator (fun _ ↦ 1)

Type uses (5)

Body uses (6)

Used by (1)

condIndepFun_reward_stepsUntil_action

Actions: Source · Open Issue

Proof

by
  -- the indicator of `stepsUntil ... = n` is a function of `hist (n-1)` and `action n`.
  -- It thus suffices to use the independence of `reward n` and `hist (n-1)` conditionally
  -- on `action n`.
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  by_cases hn : n = 0
  · have h_indep : R 0 ⟂ᵢ[A 0, hA 0; P] A 0 :=
      condIndepFun_self_right (by fun_prop) (by fun_prop)
    simp only [hn]
    refine h_indep.of_measurable_right (hX := hA 0) ?_
    exact measurable_comap_indicator_stepsUntil_eq_zero a m
  · have h_indep : R n ⟂ᵢ[A n, hA n; P] fun ω ↦ (history A R (n - 1) ω, A n ω) :=
      IsAlgEnvSeq.condIndepFun_feedback_history_action_action' h n (by grind)
    refine h_indep.of_measurable_right (hX := hA n) ?_
    exact measurable_comap_indicator_stepsUntil_eq hA hR a m n

`condIndepFun_reward_stepsUntil_action`🔗

LemmaBandits.condIndepFun_reward_stepsUntil_action

Details

No docstring.

theorem

Bandits.condIndepFun_reward_stepsUntil_action.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap (fun ω => A n (Prod.fst ω)) m𝓐) ⋯
    (fun ω => R n (Prod.fst ω))
    (Set.indicator {ω | Learning.stepsUntil A a m (Prod.fst ω) = ↑n}
      fun x => 1)
    (MeasureTheory.Measure.prod P (streamMeasure ν))
Bandits.condIndepFun_reward_stepsUntil_action.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) :
  ProbabilityTheory.CondIndepFun
    (MeasurableSpace.comap
      (fun ω => A n (Prod.fst ω)) m𝓐)
    ⋯ (fun ω => R n (Prod.fst ω))
    (Set.indicator
      {ω |
        Learning.stepsUntil A a m
            (Prod.fst ω) =
          ↑n}
      fun x => 1)
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))

Code

lemma condIndepFun_reward_stepsUntil_action [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P)
    (a : 𝓐) (m n : ℕ) :
    CondIndepFun (m𝓐.comap (fun ω ↦ A n ω.1)) ((h.measurable_action n).comp measurable_fst).comap_le
      (fun ω ↦ R n ω.1) ({ω | stepsUntil A a m ω.1 = ↑n}.indicator (fun _ ↦ 1)) 𝔓

Type uses (7)

Body uses (4)

Used by (1)

reward_cond_stepsUntil

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  exact condIndepFun_fst_prod (ν := streamMeasure ν)
    (measurable_indicator_stepsUntil_eq hA hR a m n) (by fun_prop) (by fun_prop)
    (condIndepFun_reward_stepsUntil_action' h a m n)

`reward_cond_stepsUntil`🔗

LemmaBandits.reward_cond_stepsUntil

Details

No docstring.

theorem

Bandits.reward_cond_stepsUntil.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) (hm : m ≠ 0)
  (hμn :
    (MeasureTheory.Measure.prod P (streamMeasure ν))
        ((fun ω => Learning.stepsUntil A a m (Prod.fst ω)) ⁻¹' {↑n}) ≠
      0) :
  𝓛[fun ω => R n (Prod.fst ω) | fun ω =>
      Learning.stepsUntil A a m (Prod.fst ω) in {↑n};
      MeasureTheory.Measure.prod P (streamMeasure ν)] =
    ν a
Bandits.reward_cond_stepsUntil.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m n : ℕ) (hm : m ≠ 0)
  (hμn :
    (MeasureTheory.Measure.prod P
          (streamMeasure ν))
        ((fun ω =>
            Learning.stepsUntil A a m
              (Prod.fst ω)) ⁻¹'
          {↑n}) ≠
      0) :
  𝓛[fun ω => R n (Prod.fst ω) | fun ω =>
      Learning.stepsUntil A a m
        (Prod.fst ω) in
      {↑n};
      MeasureTheory.Measure.prod P
        (streamMeasure ν)] =
    ν a

Code

lemma reward_cond_stepsUntil [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m n : ℕ)
    (hm : m ≠ 0) (hμn : 𝔓 ((fun ω ↦ stepsUntil A a m ω.1) ⁻¹' {↑n}) ≠ 0) :
    𝓛[fun ω ↦ R n ω.1 | fun ω ↦ stepsUntil A a m ω.1 ← ↑n; 𝔓] = ν a

Type uses (5)

Body uses (8)

Used by (1)

condDistrib_rewardByCount_stepsUntil

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  have hμna :
      𝔓 ((fun ω ↦ stepsUntil A a m ω.1) ⁻¹' {↑n} ∩ (fun ω ↦ A n ω.1) ⁻¹' {a}) ≠ 0 := by
    suffices ((fun ω : Ω × (ℕ → 𝓐 → ℝ) ↦
          stepsUntil A a m ω.1) ⁻¹' {↑n} ∩ (fun ω ↦ A n ω.1) ⁻¹' {a})
        = (fun ω ↦ stepsUntil A a m ω.1) ⁻¹' {↑n} by simpa [this] using hμn
    ext ω
    simp only [Set.mem_inter_iff, Set.mem_preimage, Set.mem_singleton_iff, and_iff_left_iff_imp]
    exact action_eq_of_stepsUntil_eq_coe hm
  have hμa : (𝔓).map (fun ω ↦ A n ω.1) {a} ≠ 0 := by
    rw [Measure.map_apply (by fun_prop) (measurableSet_singleton _)]
    refine fun h_zero ↦ hμn (measure_mono_null (fun ω ↦ ?_) h_zero)
    simp only [Set.mem_preimage, Set.mem_singleton_iff]
    exact action_eq_of_stepsUntil_eq_coe hm
  calc 𝓛[fun ω ↦ R n ω.1 | fun ω ↦ stepsUntil A a m ω.1 ← (n : ℕ∞); 𝔓]
  _ = (𝔓[|(fun ω ↦ stepsUntil A a m ω.1) ⁻¹' {↑n} ∩ (fun ω ↦ A n ω.1) ⁻¹' {a}]).map
      (fun ω ↦ R n ω.1) := by
    congr with ω
    simp only [Set.mem_preimage, Set.mem_singleton_iff, Set.mem_inter_iff, iff_self_and]
    exact action_eq_of_stepsUntil_eq_coe hm
  _ = (𝔓[|(fun ω ↦ A n ω.1) ⁻¹' {a}
      ∩ {ω : Ω × (ℕ → 𝓐 → ℝ) | stepsUntil A a m ω.1 = ↑n}.indicator 1 ⁻¹' {1} ]).map
      (fun ω ↦ R n ω.1) := by
    congr 2 with ω
    simp only [Set.mem_inter_iff, Set.mem_preimage, Set.mem_singleton_iff, Set.indicator_apply,
      Set.mem_setOf_eq, Pi.one_apply, ite_eq_left_iff, zero_ne_one, imp_false, Decidable.not_not]
    rw [and_comm]
  _ = 𝓛[fun ω ↦ R n ω.1 | fun ω ↦ A n ω.1 ← a; 𝔓] := by
    rw [cond_of_condIndepFun (by fun_prop)]
    · exact condIndepFun_reward_stepsUntil_action h a m n
    · refine measurable_one.indicator ?_
      exact measurableSet_eq_fun (by fun_prop) (by fun_prop)
    · fun_prop
    · convert hμna using 2
      rw [Set.inter_comm]
      congr 1 with ω
      simp [Set.indicator_apply]
  _ = ν a := reward_cond_action h a n hμa

`condDistrib_rewardByCount_stepsUntil`🔗

LemmaBandits.condDistrib_rewardByCount_stepsUntil

Details

The conditional distribution of the reward received at the m-th pull of action a given the time at which number of pulls is m is the constant kernel with value ν a.

theorem

Bandits.condDistrib_rewardByCount_stepsUntil.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
  ⇑𝓛[Learning.rewardByCount A R a m | fun ω =>
        Learning.stepsUntil A a m (Prod.fst ω);
        MeasureTheory.Measure.prod P
          (streamMeasure
            ν)] =ᵐ[MeasureTheory.Measure.map
      (fun ω => Learning.stepsUntil A a m (Prod.fst ω))
      (MeasureTheory.Measure.prod P (streamMeasure ν))]
    ⇑(ProbabilityTheory.Kernel.const ℕ∞ (ν a))
Bandits.condDistrib_rewardByCount_stepsUntil.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
  ⇑𝓛[Learning.rewardByCount A R a m |
        fun ω =>
        Learning.stepsUntil A a m
          (Prod.fst ω);
        MeasureTheory.Measure.prod P
          (streamMeasure
            ν)] =ᵐ[MeasureTheory.Measure.map
      (fun ω =>
        Learning.stepsUntil A a m
          (Prod.fst ω))
      (MeasureTheory.Measure.prod P
        (streamMeasure ν))]
    ⇑(ProbabilityTheory.Kernel.const ℕ∞
        (ν a))

Code

lemma condDistrib_rewardByCount_stepsUntil [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
    condDistrib (rewardByCount A R a m) (fun ω ↦ stepsUntil A a m ω.1) 𝔓
      =ᵐ[(𝔓).map (fun ω ↦ stepsUntil A a m ω.1)] Kernel.const _ (ν a)

Type uses (7)

Body uses (10)

Used by (1)

hasLaw_rewardByCount

Actions: Source · Open Issue

Proof

by
  have hA := h.measurable_action
  have hR := h.measurable_feedback
  refine (condDistrib_ae_eq_cond (μ := 𝔓)
    (X := fun ω ↦ stepsUntil A a m ω.1) (by fun_prop) (by fun_prop)).trans ?_
  rw [Filter.EventuallyEq, ae_iff_of_countable]
  intro n hn
  simp only [Kernel.const_apply]
  cases n with
  | top =>
    rw [Measure.map_congr (g := fun ω ↦ ω.2 m a)]
    swap
    · refine ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) ?_
      simp only [Set.mem_preimage, Set.mem_singleton_iff]
      exact fun ω ↦ rewardByCount_of_stepsUntil_eq_top
    rw [cond_of_indepFun _ (by fun_prop) (by fun_prop) (measurableSet_singleton _)]
    · exact (hasLaw_Z a m).map_eq
    · rwa [Measure.map_apply (by fun_prop) (measurableSet_singleton _)] at hn
    · exact indepFun_prod (X := fun ω : Ω ↦ stepsUntil A a m ω)
        (Y := fun ω : ℕ → 𝓐 → ℝ ↦ ω m a) (by fun_prop) (by fun_prop)
  | coe n =>
    rw [Measure.map_congr (g := fun ω ↦ R n ω.1)]
    swap
    · refine ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) ?_
      simp only [Set.mem_preimage, Set.mem_singleton_iff]
      exact fun ω ↦ rewardByCount_of_stepsUntil_eq_coe
    refine reward_cond_stepsUntil h a m n hm ?_
    rwa [Measure.map_apply (by fun_prop) (measurableSet_singleton _)] at hn

`hasLaw_rewardByCount`🔗

LemmaBandits.hasLaw_rewardByCount

Details

The reward received at the m-th pull of action a has law ν a.

theorem

Bandits.hasLaw_rewardByCount.{u_1, u_2} {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ} {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
  ProbabilityTheory.HasLaw (Learning.rewardByCount A R a m) (ν a)
    (MeasureTheory.Measure.prod P (streamMeasure ν))
Bandits.hasLaw_rewardByCount.{u_1, u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
  ProbabilityTheory.HasLaw
    (Learning.rewardByCount A R a m) (ν a)
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))

Code

lemma hasLaw_rewardByCount [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (m : ℕ) (hm : m ≠ 0) :
    HasLaw (rewardByCount A R a m) (ν a) 𝔓 where
  aemeasurable

Type uses (5)

Body uses (7)

Used by (2)

Actions: Source · Open Issue

Proof

(measurable_rewardByCount h.measurable_action h.measurable_feedback a m).aemeasurable
  map_eq := by
    have hA := h.measurable_action
    have hR := h.measurable_feedback
    have h_condDistrib :
        condDistrib (rewardByCount A R a m) (fun ω ↦ stepsUntil A a m ω.1) 𝔓
        =ᵐ[(𝔓).map (fun ω ↦ stepsUntil A a m ω.1)]
          Kernel.const _ (ν a) := condDistrib_rewardByCount_stepsUntil h a m hm
    calc (𝔓).map (rewardByCount A R a m)
    _ = (condDistrib (rewardByCount A R a m) (fun ω ↦ stepsUntil A a m ω.1) 𝔓)
        ∘ₘ ((𝔓).map (fun ω ↦ stepsUntil A a m ω.1)) := by
      rw [condDistrib_comp_map (by fun_prop) (by fun_prop)]
    _ = (Kernel.const _ (ν a)) ∘ₘ ((𝔓).map (fun ω ↦ stepsUntil A a m ω.1)) :=
      Measure.comp_congr h_condDistrib
    _ = ν a := by
      have : IsProbabilityMeasure ((𝔓).map (fun ω ↦ stepsUntil A a m ω.1)) :=
        Measure.isProbabilityMeasure_map (by fun_prop)
      simp

`identDistrib_rewardByCount`🔗

LemmaBandits.identDistrib_rewardByCount

Details

No docstring.

theorem

Bandits.identDistrib_rewardByCount.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m : ℕ) (hn : n ≠ 0) (hm : m ≠ 0) :
  ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n)
    (Learning.rewardByCount A R a m)
    (MeasureTheory.Measure.prod P (streamMeasure ν))
    (MeasureTheory.Measure.prod P (streamMeasure ν))
Bandits.identDistrib_rewardByCount.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m : ℕ) (hn : n ≠ 0)
  (hm : m ≠ 0) :
  ProbabilityTheory.IdentDistrib
    (Learning.rewardByCount A R a n)
    (Learning.rewardByCount A R a m)
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))

Code

lemma identDistrib_rewardByCount [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (n m : ℕ)
    (hn : n ≠ 0) (hm : m ≠ 0) :
    IdentDistrib (rewardByCount A R a n) (rewardByCount A R a m) 𝔓 𝔓 where
  aemeasurable_fst

Type uses (5)

Body uses (4)

Actions: Source · Open Issue

Proof

(measurable_rewardByCount h.measurable_action h.measurable_feedback a n).aemeasurable
  aemeasurable_snd :=
    (measurable_rewardByCount h.measurable_action h.measurable_feedback a m).aemeasurable
  map_eq := by rw [(hasLaw_rewardByCount h a n hn).map_eq, (hasLaw_rewardByCount h a m hm).map_eq]

`identDistrib_rewardByCount_id`🔗

LemmaBandits.identDistrib_rewardByCount_id

Details

No docstring.

theorem

Bandits.identDistrib_rewardByCount_id.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n : ℕ) (hn : n ≠ 0) :
  ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) id
    (MeasureTheory.Measure.prod P (streamMeasure ν)) (ν a)
Bandits.identDistrib_rewardByCount_id.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n : ℕ) (hn : n ≠ 0) :
  ProbabilityTheory.IdentDistrib
    (Learning.rewardByCount A R a n) id
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))
    (ν a)

Code

lemma identDistrib_rewardByCount_id [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (n : ℕ) (hn : n ≠ 0) :
    IdentDistrib (rewardByCount A R a n) id 𝔓 (ν a) where
  aemeasurable_fst

Type uses (5)

Body uses (4)

Used by (1)

identDistrib_rewardByCount_eval

Actions: Source · Open Issue

Proof

(measurable_rewardByCount h.measurable_action h.measurable_feedback a n).aemeasurable
  aemeasurable_snd := Measurable.aemeasurable <| by fun_prop
  map_eq := by rw [(hasLaw_rewardByCount h a n hn).map_eq, Measure.map_id]

`identDistrib_rewardByCount_eval`🔗

LemmaBandits.identDistrib_rewardByCount_eval

Details

No docstring.

theorem

Bandits.identDistrib_rewardByCount_eval.{u_1, u_2} {𝓐 : Type u_1}
  {Ω : Type u_2} {m𝓐 : MeasurableSpace 𝓐} {mΩ : MeasurableSpace Ω}
  [DecidableEq 𝓐] {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω} [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ} {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν] [StandardBorelSpace 𝓐]
  [Nonempty 𝓐] [StandardBorelSpace Ω] [Countable 𝓐]
  (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m : ℕ) (hn : n ≠ 0) :
  ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n)
    (fun ω => ω m a) (MeasureTheory.Measure.prod P (streamMeasure ν))
    (streamMeasure ν)
Bandits.identDistrib_rewardByCount_eval.{u_1,
    u_2}
  {𝓐 : Type u_1} {Ω : Type u_2}
  {m𝓐 : MeasurableSpace 𝓐}
  {mΩ : MeasurableSpace Ω} [DecidableEq 𝓐]
  {A : ℕ → Ω → 𝓐} {R : ℕ → Ω → ℝ}
  {P : MeasureTheory.Measure Ω}
  [MeasureTheory.IsProbabilityMeasure P]
  {alg : Learning.Algorithm 𝓐 ℝ}
  {ν : ProbabilityTheory.Kernel 𝓐 ℝ}
  [ProbabilityTheory.IsMarkovKernel ν]
  [StandardBorelSpace 𝓐] [Nonempty 𝓐]
  [StandardBorelSpace Ω] [Countable 𝓐]
  (h :
    Learning.IsAlgEnvSeq A R alg
      (Learning.stationaryEnv ν) P)
  (a : 𝓐) (n m : ℕ) (hn : n ≠ 0) :
  ProbabilityTheory.IdentDistrib
    (Learning.rewardByCount A R a n)
    (fun ω => ω m a)
    (MeasureTheory.Measure.prod P
      (streamMeasure ν))
    (streamMeasure ν)

Code

lemma identDistrib_rewardByCount_eval [StandardBorelSpace Ω] [Countable 𝓐]
    (h : IsAlgEnvSeq A R alg (stationaryEnv ν) P) (a : 𝓐) (n m : ℕ) (hn : n ≠ 0) :
    IdentDistrib (rewardByCount A R a n) (fun ω ↦ ω m a) 𝔓 (streamMeasure ν)

Type uses (5)

Body uses (2)

Actions: Source · Open Issue

Proof

(identDistrib_rewardByCount_id h a n hn).trans
    (identDistrib_eval_eval_id_streamMeasure ν m a).symm

LeanMachineLearning exposition

2.9. Online.Bandit.RewardByCountMeasure🔗

`hasLaw_Z`🔗

`term𝓛[_|_;_]`🔗

`term𝓛[_|_In_;_]`🔗

`term𝓛[_|_←_;_]`🔗

`condDistrib_reward''`🔗

`reward_cond_action`🔗

`condIndepFun_reward_stepsUntil_action'`🔗

`condIndepFun_reward_stepsUntil_action`🔗

`reward_cond_stepsUntil`🔗

`condDistrib_rewardByCount_stepsUntil`🔗

`hasLaw_rewardByCount`🔗

`identDistrib_rewardByCount`🔗

`identDistrib_rewardByCount_id`🔗

`identDistrib_rewardByCount_eval`🔗

2.9. Online.Bandit.RewardByCountMeasure🔗

hasLaw_Z🔗

term𝓛[_|_;_]🔗

term𝓛[_|_In_;_]🔗

term𝓛[_|_←_;_]🔗

condDistrib_reward''🔗

reward_cond_action🔗

condIndepFun_reward_stepsUntil_action'🔗

condIndepFun_reward_stepsUntil_action🔗

reward_cond_stepsUntil🔗

condDistrib_rewardByCount_stepsUntil🔗

hasLaw_rewardByCount🔗

identDistrib_rewardByCount🔗

identDistrib_rewardByCount_id🔗

identDistrib_rewardByCount_eval🔗

`hasLaw_Z`🔗

`term𝓛[_|_;_]`🔗

`term𝓛[_|_In_;_]`🔗

`term𝓛[_|_←_;_]`🔗

`condDistrib_reward''`🔗

`reward_cond_action`🔗

`condIndepFun_reward_stepsUntil_action'`🔗

`condIndepFun_reward_stepsUntil_action`🔗

`reward_cond_stepsUntil`🔗

`condDistrib_rewardByCount_stepsUntil`🔗

`hasLaw_rewardByCount`🔗

`identDistrib_rewardByCount`🔗

`identDistrib_rewardByCount_id`🔗

`identDistrib_rewardByCount_eval`🔗