2.9.Β Online.Bandit.RewardByCountMeasure
Laws of stepsUntil and rewardByCount
Module LeanMachineLearning.Online.Bandit.RewardByCountMeasure contains 14 exposed declarations.
hasLaw_Zπ
Bandits.hasLaw_ZNo docstring.
Bandits.hasLaw_Z.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] (a : π) (m : β) : ProbabilityTheory.HasLaw (fun Ο => Prod.snd Ο m a) (Ξ½ a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))Bandits.hasLaw_Z.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] (a : π) (m : β) : ProbabilityTheory.HasLaw (fun Ο => Prod.snd Ο m a) (Ξ½ a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))
Code
lemma hasLaw_Z (a : π) (m : β) : HasLaw (fun Ο β¦ Ο.2 m a) (Ξ½ a) π where map_eq
Type uses (1)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
calc (π).map (fun Ο β¦ Ο.2 m a)
_ = ((π).snd).map (fun Ο β¦ Ο m a) := by
rw [Measure.snd, Measure.map_map (by fun_prop) (by fun_prop)]
rfl
_ = (streamMeasure Ξ½).map (fun Ο β¦ Ο m a) := by simp
_ = ((Measure.infinitePi fun _ β¦ Measure.infinitePi Ξ½).map (fun Ο β¦ Ο m)).map
(fun Ο β¦ Ο a) := by
rw [streamMeasure, Measure.map_map (by fun_prop) (by fun_prop)]
rfl
_ = Ξ½ a := by simp_rw [(measurePreserving_eval_infinitePi _ _).map_eq]
termπ[_|_;_]π
Bandits.Β«termπ[_|_;_]Β»
Law of Y conditioned on the event s.
Bandits.Β«termπ[_|_;_]Β» : Lean.ParserDescrBandits.Β«termπ[_|_;_]Β» : Lean.ParserDescr
Code
notation "π[" Y " | " s "; " ΞΌ "]" => Measure.map Y (ΞΌ[|s])
Actions: Source Β· Open Issue
termπ[_|_In_;_]π
Bandits.Β«termπ[_|_In_;_]Β»
Law of Y conditioned on the event that X is in s.
Bandits.Β«termπ[_|_In_;_]Β» : Lean.ParserDescrBandits.Β«termπ[_|_In_;_]Β» : Lean.ParserDescr
Code
notation "π[" Y " | " X " in " s "; " ΞΌ "]" => Measure.map Y (ΞΌ[|X β»ΒΉ' s])
Actions: Source Β· Open Issue
termπ[_|_β_;_]π
Bandits.Β«termπ[_|_β_;_]Β»
Law of Y conditioned on the event that X equals x.
Bandits.Β«termπ[_|_β_;_]Β» : Lean.ParserDescrBandits.Β«termπ[_|_β_;_]Β» : Lean.ParserDescr
Code
notation "π[" Y " | " X " β " x "; " ΞΌ "]" => Measure.map Y (ΞΌ[|X β»ΒΉ' {x}])Actions: Source Β· Open Issue
condDistrib_reward''π
Bandits.condDistrib_reward''No docstring.
Bandits.condDistrib_reward''.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (n : β) : βπ[fun Ο => R n (Prod.fst Ο) | fun Ο => A n (Prod.fst Ο); MeasureTheory.Measure.prod P (streamMeasure Ξ½)] =α΅[MeasureTheory.Measure.map (fun Ο => A n (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))] βΞ½Bandits.condDistrib_reward''.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (n : β) : βπ[fun Ο => R n (Prod.fst Ο) | fun Ο => A n (Prod.fst Ο); MeasureTheory.Measure.prod P (streamMeasure Ξ½)] =α΅[MeasureTheory.Measure.map (fun Ο => A n (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))] βΞ½
Code
lemma condDistrib_reward'' [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (n : β) :
π[fun Ο β¦ R n Ο.1 | fun Ο β¦ A n Ο.1; π] =α΅[(π).map (fun Ο β¦ A n Ο.1)] Ξ½Type uses (5)
Body uses (2)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
have h_ra' : π[R n | A n; P] =α΅[P.map (A n)] Ξ½ := h.condDistrib_feedback_stationaryEnv n
have h_law : (π).map (fun Ο β¦ A n Ο.1) = P.map (A n) := by
change ((π).map (A n β Prod.fst)) = _
rw [β Measure.map_map (by fun_prop) (by fun_prop), β Measure.fst, Measure.fst_prod]
rw [h_law]
have h_prod : π[fun Ο β¦ R n Ο.1 | fun Ο β¦ A n Ο.1; π]
=α΅[P.map (A n)] π[R n | A n; P] :=
condDistrib_fst_prod _ (by fun_prop) _
filter_upwards [h_ra', h_prod] with Ο h_eq h_prod
rw [h_prod, h_eq]
reward_cond_actionπ
Bandits.reward_cond_actionNo docstring.
Bandits.reward_cond_action.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n : β) (hΞΌa : (MeasureTheory.Measure.map (fun Ο => A n (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))) {a} β 0) : π[fun Ο => R n (Prod.fst Ο) | fun Ο => A n (Prod.fst Ο) in {a}; MeasureTheory.Measure.prod P (streamMeasure Ξ½)] = Ξ½ aBandits.reward_cond_action.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n : β) (hΞΌa : (MeasureTheory.Measure.map (fun Ο => A n (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))) {a} β 0) : π[fun Ο => R n (Prod.fst Ο) | fun Ο => A n (Prod.fst Ο) in {a}; MeasureTheory.Measure.prod P (streamMeasure Ξ½)] = Ξ½ a
Code
lemma reward_cond_action [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (n : β)
(hΞΌa : (π).map (fun Ο β¦ A n Ο.1) {a} β 0) :
π[fun Ο β¦ R n Ο.1 | fun Ο β¦ A n Ο.1 β a; π] = Ξ½ aType uses (4)
Body uses (5)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
have h_ra : π[fun Ο β¦ R n Ο.1 | fun Ο β¦ A n Ο.1; π] =α΅[(π).map (fun Ο β¦ A n Ο.1)] Ξ½ :=
condDistrib_reward'' h n
have h_eq := condDistrib_ae_eq_cond (ΞΌ := π)
(X := fun Ο β¦ A n Ο.1) (Y := fun Ο β¦ R n Ο.1) (by fun_prop) (by fun_prop)
rw [Filter.EventuallyEq, ae_iff_of_countable] at h_ra h_eq
specialize h_ra a hΞΌa
specialize h_eq a hΞΌa
rw [h_ra] at h_eq
exact h_eq.symm
condIndepFun_reward_stepsUntil_action'π
Bandits.condIndepFun_reward_stepsUntil_action'No docstring.
Bandits.condIndepFun_reward_stepsUntil_action'.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) : ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (A n) inferInstance) β― (R n) (Set.indicator {Ο | Learning.stepsUntil A a m Ο = βn} fun x => 1) PBandits.condIndepFun_reward_stepsUntil_action'.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) : ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (A n) inferInstance) β― (R n) (Set.indicator {Ο | Learning.stepsUntil A a m Ο = βn} fun x => 1) P
Code
lemma condIndepFun_reward_stepsUntil_action' [StandardBorelSpace Ξ©]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (m n : β) :
R n βα΅’[A n, h.measurable_action n; P] {Ο | stepsUntil A a m Ο = βn}.indicator (fun _ β¦ 1)Type uses (5)
Body uses (6)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
-- the indicator of `stepsUntil ... = n` is a function of `hist (n-1)` and `action n`.
-- It thus suffices to use the independence of `reward n` and `hist (n-1)` conditionally
-- on `action n`.
have hA := h.measurable_action
have hR := h.measurable_feedback
by_cases hn : n = 0
Β· have h_indep : R 0 βα΅’[A 0, hA 0; P] A 0 :=
condIndepFun_self_right (by fun_prop) (by fun_prop)
simp only [hn]
refine h_indep.of_measurable_right (hX := hA 0) ?_
exact measurable_comap_indicator_stepsUntil_eq_zero a m
Β· have h_indep : R n βα΅’[A n, hA n; P] fun Ο β¦ (history A R (n - 1) Ο, A n Ο) :=
IsAlgEnvSeq.condIndepFun_feedback_history_action_action' h n (by grind)
refine h_indep.of_measurable_right (hX := hA n) ?_
exact measurable_comap_indicator_stepsUntil_eq hA hR a m n
condIndepFun_reward_stepsUntil_actionπ
Bandits.condIndepFun_reward_stepsUntil_actionNo docstring.
Bandits.condIndepFun_reward_stepsUntil_action.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) : ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun Ο => A n (Prod.fst Ο)) mπ) β― (fun Ο => R n (Prod.fst Ο)) (Set.indicator {Ο | Learning.stepsUntil A a m (Prod.fst Ο) = βn} fun x => 1) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))Bandits.condIndepFun_reward_stepsUntil_action.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) : ProbabilityTheory.CondIndepFun (MeasurableSpace.comap (fun Ο => A n (Prod.fst Ο)) mπ) β― (fun Ο => R n (Prod.fst Ο)) (Set.indicator {Ο | Learning.stepsUntil A a m (Prod.fst Ο) = βn} fun x => 1) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))
Code
lemma condIndepFun_reward_stepsUntil_action [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P)
(a : π) (m n : β) :
CondIndepFun (mπ.comap (fun Ο β¦ A n Ο.1)) ((h.measurable_action n).comp measurable_fst).comap_le
(fun Ο β¦ R n Ο.1) ({Ο | stepsUntil A a m Ο.1 = βn}.indicator (fun _ β¦ 1)) πType uses (7)
Body uses (4)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
exact condIndepFun_fst_prod (Ξ½ := streamMeasure Ξ½)
(measurable_indicator_stepsUntil_eq hA hR a m n) (by fun_prop) (by fun_prop)
(condIndepFun_reward_stepsUntil_action' h a m n)
reward_cond_stepsUntilπ
Bandits.reward_cond_stepsUntilNo docstring.
Bandits.reward_cond_stepsUntil.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) (hm : m β 0) (hΞΌn : (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) ((fun Ο => Learning.stepsUntil A a m (Prod.fst Ο)) β»ΒΉ' {βn}) β 0) : π[fun Ο => R n (Prod.fst Ο) | fun Ο => Learning.stepsUntil A a m (Prod.fst Ο) in {βn}; MeasureTheory.Measure.prod P (streamMeasure Ξ½)] = Ξ½ aBandits.reward_cond_stepsUntil.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m n : β) (hm : m β 0) (hΞΌn : (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) ((fun Ο => Learning.stepsUntil A a m (Prod.fst Ο)) β»ΒΉ' {βn}) β 0) : π[fun Ο => R n (Prod.fst Ο) | fun Ο => Learning.stepsUntil A a m (Prod.fst Ο) in {βn}; MeasureTheory.Measure.prod P (streamMeasure Ξ½)] = Ξ½ a
Code
lemma reward_cond_stepsUntil [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (m n : β)
(hm : m β 0) (hΞΌn : π ((fun Ο β¦ stepsUntil A a m Ο.1) β»ΒΉ' {βn}) β 0) :
π[fun Ο β¦ R n Ο.1 | fun Ο β¦ stepsUntil A a m Ο.1 β βn; π] = Ξ½ aType uses (5)
Body uses (8)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
have hΞΌna :
π ((fun Ο β¦ stepsUntil A a m Ο.1) β»ΒΉ' {βn} β© (fun Ο β¦ A n Ο.1) β»ΒΉ' {a}) β 0 := by
suffices ((fun Ο : Ξ© Γ (β β π β β) β¦
stepsUntil A a m Ο.1) β»ΒΉ' {βn} β© (fun Ο β¦ A n Ο.1) β»ΒΉ' {a})
= (fun Ο β¦ stepsUntil A a m Ο.1) β»ΒΉ' {βn} by simpa [this] using hΞΌn
ext Ο
simp only [Set.mem_inter_iff, Set.mem_preimage, Set.mem_singleton_iff, and_iff_left_iff_imp]
exact action_eq_of_stepsUntil_eq_coe hm
have hΞΌa : (π).map (fun Ο β¦ A n Ο.1) {a} β 0 := by
rw [Measure.map_apply (by fun_prop) (measurableSet_singleton _)]
refine fun h_zero β¦ hΞΌn (measure_mono_null (fun Ο β¦ ?_) h_zero)
simp only [Set.mem_preimage, Set.mem_singleton_iff]
exact action_eq_of_stepsUntil_eq_coe hm
calc π[fun Ο β¦ R n Ο.1 | fun Ο β¦ stepsUntil A a m Ο.1 β (n : ββ); π]
_ = (π[|(fun Ο β¦ stepsUntil A a m Ο.1) β»ΒΉ' {βn} β© (fun Ο β¦ A n Ο.1) β»ΒΉ' {a}]).map
(fun Ο β¦ R n Ο.1) := by
congr with Ο
simp only [Set.mem_preimage, Set.mem_singleton_iff, Set.mem_inter_iff, iff_self_and]
exact action_eq_of_stepsUntil_eq_coe hm
_ = (π[|(fun Ο β¦ A n Ο.1) β»ΒΉ' {a}
β© {Ο : Ξ© Γ (β β π β β) | stepsUntil A a m Ο.1 = βn}.indicator 1 β»ΒΉ' {1} ]).map
(fun Ο β¦ R n Ο.1) := by
congr 2 with Ο
simp only [Set.mem_inter_iff, Set.mem_preimage, Set.mem_singleton_iff, Set.indicator_apply,
Set.mem_setOf_eq, Pi.one_apply, ite_eq_left_iff, zero_ne_one, imp_false, Decidable.not_not]
rw [and_comm]
_ = π[fun Ο β¦ R n Ο.1 | fun Ο β¦ A n Ο.1 β a; π] := by
rw [cond_of_condIndepFun (by fun_prop)]
Β· exact condIndepFun_reward_stepsUntil_action h a m n
Β· refine measurable_one.indicator ?_
exact measurableSet_eq_fun (by fun_prop) (by fun_prop)
Β· fun_prop
Β· convert hΞΌna using 2
rw [Set.inter_comm]
congr 1 with Ο
simp [Set.indicator_apply]
_ = Ξ½ a := reward_cond_action h a n hΞΌa
condDistrib_rewardByCount_stepsUntilπ
Bandits.condDistrib_rewardByCount_stepsUntil
The conditional distribution of the reward received at the m-th pull of action a
given the time at which number of pulls is m is the constant kernel with value Ξ½ a.
Bandits.condDistrib_rewardByCount_stepsUntil.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) : βπ[Learning.rewardByCount A R a m | fun Ο => Learning.stepsUntil A a m (Prod.fst Ο); MeasureTheory.Measure.prod P (streamMeasure Ξ½)] =α΅[MeasureTheory.Measure.map (fun Ο => Learning.stepsUntil A a m (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))] β(ProbabilityTheory.Kernel.const ββ (Ξ½ a))Bandits.condDistrib_rewardByCount_stepsUntil.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) : βπ[Learning.rewardByCount A R a m | fun Ο => Learning.stepsUntil A a m (Prod.fst Ο); MeasureTheory.Measure.prod P (streamMeasure Ξ½)] =α΅[MeasureTheory.Measure.map (fun Ο => Learning.stepsUntil A a m (Prod.fst Ο)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))] β(ProbabilityTheory.Kernel.const ββ (Ξ½ a))
Code
lemma condDistrib_rewardByCount_stepsUntil [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) :
condDistrib (rewardByCount A R a m) (fun Ο β¦ stepsUntil A a m Ο.1) π
=α΅[(π).map (fun Ο β¦ stepsUntil A a m Ο.1)] Kernel.const _ (Ξ½ a)Type uses (7)
Body uses (10)
Used by (1)
Actions: Source Β· Open Issue
Proof
by
have hA := h.measurable_action
have hR := h.measurable_feedback
refine (condDistrib_ae_eq_cond (ΞΌ := π)
(X := fun Ο β¦ stepsUntil A a m Ο.1) (by fun_prop) (by fun_prop)).trans ?_
rw [Filter.EventuallyEq, ae_iff_of_countable]
intro n hn
simp only [Kernel.const_apply]
cases n with
| top =>
rw [Measure.map_congr (g := fun Ο β¦ Ο.2 m a)]
swap
Β· refine ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) ?_
simp only [Set.mem_preimage, Set.mem_singleton_iff]
exact fun Ο β¦ rewardByCount_of_stepsUntil_eq_top
rw [cond_of_indepFun _ (by fun_prop) (by fun_prop) (measurableSet_singleton _)]
Β· exact (hasLaw_Z a m).map_eq
Β· rwa [Measure.map_apply (by fun_prop) (measurableSet_singleton _)] at hn
Β· exact indepFun_prod (X := fun Ο : Ξ© β¦ stepsUntil A a m Ο)
(Y := fun Ο : β β π β β β¦ Ο m a) (by fun_prop) (by fun_prop)
| coe n =>
rw [Measure.map_congr (g := fun Ο β¦ R n Ο.1)]
swap
Β· refine ae_cond_of_forall_mem ((measurableSet_singleton _).preimage (by fun_prop)) ?_
simp only [Set.mem_preimage, Set.mem_singleton_iff]
exact fun Ο β¦ rewardByCount_of_stepsUntil_eq_coe
refine reward_cond_stepsUntil h a m n hm ?_
rwa [Measure.map_apply (by fun_prop) (measurableSet_singleton _)] at hn
hasLaw_rewardByCountπ
Bandits.hasLaw_rewardByCount
The reward received at the m-th pull of action a has law Ξ½ a.
Bandits.hasLaw_rewardByCount.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) : ProbabilityTheory.HasLaw (Learning.rewardByCount A R a m) (Ξ½ a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))Bandits.hasLaw_rewardByCount.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) : ProbabilityTheory.HasLaw (Learning.rewardByCount A R a m) (Ξ½ a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))
Code
lemma hasLaw_rewardByCount [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (m : β) (hm : m β 0) :
HasLaw (rewardByCount A R a m) (Ξ½ a) π where
aemeasurableType uses (5)
Body uses (7)
Actions: Source Β· Open Issue
Proof
(measurable_rewardByCount h.measurable_action h.measurable_feedback a m).aemeasurable
map_eq := by
have hA := h.measurable_action
have hR := h.measurable_feedback
have h_condDistrib :
condDistrib (rewardByCount A R a m) (fun Ο β¦ stepsUntil A a m Ο.1) π
=α΅[(π).map (fun Ο β¦ stepsUntil A a m Ο.1)]
Kernel.const _ (Ξ½ a) := condDistrib_rewardByCount_stepsUntil h a m hm
calc (π).map (rewardByCount A R a m)
_ = (condDistrib (rewardByCount A R a m) (fun Ο β¦ stepsUntil A a m Ο.1) π)
ββ ((π).map (fun Ο β¦ stepsUntil A a m Ο.1)) := by
rw [condDistrib_comp_map (by fun_prop) (by fun_prop)]
_ = (Kernel.const _ (Ξ½ a)) ββ ((π).map (fun Ο β¦ stepsUntil A a m Ο.1)) :=
Measure.comp_congr h_condDistrib
_ = Ξ½ a := by
have : IsProbabilityMeasure ((π).map (fun Ο β¦ stepsUntil A a m Ο.1)) :=
Measure.isProbabilityMeasure_map (by fun_prop)
simp
identDistrib_rewardByCountπ
Bandits.identDistrib_rewardByCountNo docstring.
Bandits.identDistrib_rewardByCount.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n m : β) (hn : n β 0) (hm : m β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (Learning.rewardByCount A R a m) (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))Bandits.identDistrib_rewardByCount.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n m : β) (hn : n β 0) (hm : m β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (Learning.rewardByCount A R a m) (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (MeasureTheory.Measure.prod P (streamMeasure Ξ½))
Code
lemma identDistrib_rewardByCount [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (n m : β)
(hn : n β 0) (hm : m β 0) :
IdentDistrib (rewardByCount A R a n) (rewardByCount A R a m) π π where
aemeasurable_fstType uses (5)
Body uses (4)
Actions: Source Β· Open Issue
Proof
(measurable_rewardByCount h.measurable_action h.measurable_feedback a n).aemeasurable
aemeasurable_snd :=
(measurable_rewardByCount h.measurable_action h.measurable_feedback a m).aemeasurable
map_eq := by rw [(hasLaw_rewardByCount h a n hn).map_eq, (hasLaw_rewardByCount h a m hm).map_eq]
identDistrib_rewardByCount_idπ
Bandits.identDistrib_rewardByCount_idNo docstring.
Bandits.identDistrib_rewardByCount_id.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n : β) (hn : n β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) id (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (Ξ½ a)Bandits.identDistrib_rewardByCount_id.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n : β) (hn : n β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) id (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (Ξ½ a)
Code
lemma identDistrib_rewardByCount_id [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (n : β) (hn : n β 0) :
IdentDistrib (rewardByCount A R a n) id π (Ξ½ a) where
aemeasurable_fstType uses (5)
Body uses (4)
Used by (1)
Actions: Source Β· Open Issue
Proof
(measurable_rewardByCount h.measurable_action h.measurable_feedback a n).aemeasurable aemeasurable_snd := Measurable.aemeasurable <| by fun_prop map_eq := by rw [(hasLaw_rewardByCount h a n hn).map_eq, Measure.map_id]
identDistrib_rewardByCount_evalπ
Bandits.identDistrib_rewardByCount_evalNo docstring.
Bandits.identDistrib_rewardByCount_eval.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n m : β) (hn : n β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (fun Ο => Ο m a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (streamMeasure Ξ½)Bandits.identDistrib_rewardByCount_eval.{u_1, u_2} {π : Type u_1} {Ξ© : Type u_2} {mπ : MeasurableSpace π} {mΞ© : MeasurableSpace Ξ©} [DecidableEq π] {A : β β Ξ© β π} {R : β β Ξ© β β} {P : MeasureTheory.Measure Ξ©} [MeasureTheory.IsProbabilityMeasure P] {alg : Learning.Algorithm π β} {Ξ½ : ProbabilityTheory.Kernel π β} [ProbabilityTheory.IsMarkovKernel Ξ½] [StandardBorelSpace π] [Nonempty π] [StandardBorelSpace Ξ©] [Countable π] (h : Learning.IsAlgEnvSeq A R alg (Learning.stationaryEnv Ξ½) P) (a : π) (n m : β) (hn : n β 0) : ProbabilityTheory.IdentDistrib (Learning.rewardByCount A R a n) (fun Ο => Ο m a) (MeasureTheory.Measure.prod P (streamMeasure Ξ½)) (streamMeasure Ξ½)
Code
lemma identDistrib_rewardByCount_eval [StandardBorelSpace Ξ©] [Countable π]
(h : IsAlgEnvSeq A R alg (stationaryEnv Ξ½) P) (a : π) (n m : β) (hn : n β 0) :
IdentDistrib (rewardByCount A R a n) (fun Ο β¦ Ο m a) π (streamMeasure Ξ½)Type uses (5)
Actions: Source Β· Open Issue
Proof
(identDistrib_rewardByCount_id h a n hn).trans
(identDistrib_eval_eval_id_streamMeasure Ξ½ m a).symm-
Bandits.hasLaw_Z -
Bandits.Β«termπ[_|_;_]Β» -
Bandits.Β«termπ[_|_In_;_]Β» -
Bandits.Β«termπ[_|_β_;_]Β» -
Bandits.condDistrib_reward'' -
Bandits.reward_cond_action -
Bandits.condIndepFun_reward_stepsUntil_action' -
Bandits.condIndepFun_reward_stepsUntil_action -
Bandits.reward_cond_stepsUntil -
Bandits.condDistrib_rewardByCount_stepsUntil -
Bandits.hasLaw_rewardByCount -
Bandits.identDistrib_rewardByCount -
Bandits.identDistrib_rewardByCount_id -
Bandits.identDistrib_rewardByCount_eval