Size
4 MB
Subscribers
None

# View Options

This file is larger than 256 KB, so syntax highlighting was skipped.
 diff --git a/thys/MDP-Algorithms/Algorithms.thy b/thys/MDP-Algorithms/Algorithms.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Algorithms.thy @@ -0,0 +1,10 @@ +(* Author: Maximilian Schäffeler *) + +theory Algorithms + imports + Value_Iteration + Policy_Iteration + Modified_Policy_Iteration + Splitting_Methods +begin +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/Blinfun_Matrix.thy b/thys/MDP-Algorithms/Blinfun_Matrix.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Blinfun_Matrix.thy @@ -0,0 +1,239 @@ +theory Blinfun_Matrix + imports + "MDP-Rewards.Blinfun_Util" + Matrix_Util +begin + +section \Bounded Linear Functions and Matrices\ + +definition "blinfun_to_matrix (f :: ('b::finite \\<^sub>b real) \\<^sub>L ('c::finite \\<^sub>b _)) = + matrix (\v. (\ j. f (Bfun (($) v)) j))" + +definition "matrix_to_blinfun X = Blinfun (\v. Bfun (\i. (X *v (\ i. (apply_bfun v i)))$ i))" + +lemma plus_vec_eq: "(\ i. f i + g i) = (\ i. f i) + (\ i. g i)" + by (simp add: Finite_Cartesian_Product.plus_vec_def) + +lemma matrix_to_blinfun_mult: "matrix_to_blinfun m (v :: 'c::finite \\<^sub>b real) i = (m *v (\ i. v i)) $i" +proof - + have [simp]: "(\ i. c * x i) = c *\<^sub>R (\ i. x i)" for c x + by (simp add: vector_scalar_mult_def scalar_mult_eq_scaleR[symmetric]) + + have "bounded_linear (\v. bfun.Bfun (($) (m *v vec_lambda (apply_bfun v))))" + proof (rule bounded_linear_compose[of "\x. bfun.Bfun (\y. x $y)"], goal_cases) + case 1 + then show ?case + using bounded_linear_bfun_nth[of id, simplified] bounded_linear_ident eq_id_iff + by metis + next + case 2 + then show ?case + using norm_vec_le_norm_bfun + by (auto simp: matrix_vector_right_distrib plus_vec_eq + intro!: bounded_linear_intro bounded_linear_compose[OF matrix_vector_mul_bounded_linear]) + qed + thus ?thesis + by (auto simp: Blinfun_inverse matrix_to_blinfun_def Bfun_inverse) +qed + +lemma blinfun_to_matrix_mult: "(blinfun_to_matrix f *v (\ i. apply_bfun v i))$ i = f v i" +proof - + have "(blinfun_to_matrix f *v (\ i. v i)) $i = (\j\UNIV. (f ((v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0)))) i)" + unfolding blinfun_to_matrix_def matrix_def + by (auto simp: matrix_vector_mult_def mult.commute axis_def blinfun.scaleR_right vec_lambda_inverse) + also have "\ = (\j\UNIV. (f ((v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0))))) i" + by (auto intro: finite_induct) + also have "\ = f (\j\UNIV. (v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0))) i" + by (auto simp: blinfun.sum_right) + also have "\ = f v i" + proof - + have "(\j\UNIV. (v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0))) x = v x" for x + proof - + have "(\j\UNIV. (v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0))) x = + (\j\UNIV. (v j *\<^sub>R bfun.Bfun (\i. if i = j then 1 else 0) x))" + by (auto intro: finite_induct) + also have "\ = (\j\UNIV. (v j *\<^sub>R (\i. if i = j then 1 else 0) x))" + by (subst Bfun_inverse) (metis vec_bfun vec_lambda_inverse[OF UNIV_I, symmetric])+ + also have "\ = (\j\UNIV. ((if x = j then v j * 1 else v j * 0)))" + by (auto simp: if_distrib intro!: sum.cong) + also have "\ = (\j\UNIV. ((if x = j then v j else 0)))" + by (meson more_arith_simps(6) mult_zero_right) + also have "\ = v x" + by auto + finally show ?thesis. + qed + thus ?thesis + using bfun_eqI + by fastforce + qed + finally show ?thesis. +qed + +lemma blinfun_to_matrix_mult': "(blinfun_to_matrix f *v v)$ i = f (Bfun (\i. v $i)) i" + by (metis bfun.Bfun_inverse blinfun_to_matrix_mult vec_bfun vec_nth_inverse) + +lemma blinfun_to_matrix_mult'': "(blinfun_to_matrix f *v v) = (\ i. f (Bfun (\i. v$ i)) i)" + by (metis blinfun_to_matrix_mult' vec_lambda_unique) + +lemma matrix_to_blinfun_inv: "matrix_to_blinfun (blinfun_to_matrix f) = f" + by (auto simp: matrix_to_blinfun_mult blinfun_to_matrix_mult intro!: blinfun_eqI) + +lemma blinfun_to_matrix_add: "blinfun_to_matrix (f + g) = blinfun_to_matrix f + blinfun_to_matrix g" + by (simp add: matrix_eq blinfun_to_matrix_mult'' matrix_vector_mult_add_rdistrib blinfun.add_left plus_vec_eq) + +lemma blinfun_to_matrix_diff: "blinfun_to_matrix (f - g) = blinfun_to_matrix f - blinfun_to_matrix g" + using blinfun_to_matrix_add + by (metis add_right_imp_eq diff_add_cancel) + +lemma blinfun_to_matrix_scaleR: "blinfun_to_matrix (c *\<^sub>R f) = c *\<^sub>R blinfun_to_matrix f" + by (auto simp: matrix_eq blinfun_to_matrix_mult'' scaleR_matrix_vector_assoc[symmetric] + blinfun.scaleR_left vector_scalar_mult_def[of c, unfolded scalar_mult_eq_scaleR]) + +lemma matrix_to_blinfun_add: + "matrix_to_blinfun ((f :: real^_^_) + g) = matrix_to_blinfun f + matrix_to_blinfun g" + by (auto intro!: blinfun_eqI simp: matrix_to_blinfun_mult blinfun.add_left matrix_vector_mult_add_rdistrib) + +lemma matrix_to_blinfun_diff: + "matrix_to_blinfun ((f :: real^_^_) - g) = matrix_to_blinfun f - matrix_to_blinfun g" + using matrix_to_blinfun_add diff_eq_eq + by metis + +lemma matrix_to_blinfun_scaleR: + "matrix_to_blinfun (c *\<^sub>R (f :: real^_^_)) = c *\<^sub>R matrix_to_blinfun f" + by (auto intro!: blinfun_eqI simp: matrix_to_blinfun_mult blinfun.scaleR_left + matrix_vector_mult_add_rdistrib scaleR_matrix_vector_assoc[symmetric]) + +lemma matrix_to_blinfun_comp: "matrix_to_blinfun ((m:: real^_^_) ** n) = (matrix_to_blinfun m) o\<^sub>L (matrix_to_blinfun n)" + by (auto intro!: blinfun_eqI simp: matrix_vector_mul_assoc[symmetric] matrix_to_blinfun_mult) + +lemma blinfun_to_matrix_comp: "blinfun_to_matrix (f o\<^sub>L g) = (blinfun_to_matrix f) ** (blinfun_to_matrix g)" + by (simp add: matrix_eq apply_bfun_inverse blinfun_to_matrix_mult'' matrix_vector_mul_assoc[symmetric]) + +lemma blinfun_to_matrix_id: "blinfun_to_matrix id_blinfun = mat 1" + by (simp add: Bfun_inverse blinfun_to_matrix_mult'' matrix_eq) + +lemma matrix_to_blinfun_id: "matrix_to_blinfun (mat 1 :: (real ^_^_)) = id_blinfun" + by (auto intro!: blinfun_eqI simp: matrix_to_blinfun_mult) + +lemma matrix_to_blinfun_inv\<^sub>L: + assumes "invertible m" + shows "matrix_to_blinfun (matrix_inv (m :: real^_^_)) = inv\<^sub>L (matrix_to_blinfun m)" + "invertible\<^sub>L (matrix_to_blinfun m)" +proof - + have "m ** matrix_inv m = mat 1" "matrix_inv m ** m = mat 1" + using assms + by (auto simp: matrix_inv_right matrix_inv_left) + hence "matrix_to_blinfun (matrix_inv m) o\<^sub>L matrix_to_blinfun m = id_blinfun" + "matrix_to_blinfun m o\<^sub>L matrix_to_blinfun (matrix_inv m) = id_blinfun" + by (auto simp: matrix_to_blinfun_id matrix_to_blinfun_comp[symmetric]) + thus "matrix_to_blinfun (matrix_inv m) = inv\<^sub>L (matrix_to_blinfun m)" "invertible\<^sub>L (matrix_to_blinfun m)" + by (auto intro: inv\<^sub>L_I) +qed + + +lemma blinfun_to_matrix_inverse: + assumes "invertible\<^sub>L X" + shows "invertible (blinfun_to_matrix (X :: ('b::finite \\<^sub>b real) \\<^sub>L 'c::finite \\<^sub>b real))" + "blinfun_to_matrix (inv\<^sub>L X) = matrix_inv (blinfun_to_matrix X)" +proof - + have "X o\<^sub>L inv\<^sub>L X = id_blinfun" + by (simp add: assms) + hence 1: "blinfun_to_matrix X ** blinfun_to_matrix (inv\<^sub>L X) = mat 1" + by (metis blinfun_to_matrix_comp blinfun_to_matrix_id) + have "inv\<^sub>L X o\<^sub>L X = id_blinfun" + by (simp add: assms) + hence 2: "blinfun_to_matrix (inv\<^sub>L X) ** blinfun_to_matrix (X) = mat 1" + by (metis blinfun_to_matrix_comp blinfun_to_matrix_id) + thus "invertible (blinfun_to_matrix X)" + using "1" invertible_def by blast + thus "blinfun_to_matrix (inv\<^sub>L X) = matrix_inv (blinfun_to_matrix X)" + using 1 2 matrix_inv_right matrix_mul_assoc matrix_mul_lid + by metis +qed + +lemma blinfun_to_matrix_inv[simp]: "blinfun_to_matrix (matrix_to_blinfun f) = f" + by (auto simp: matrix_eq blinfun_to_matrix_mult'' matrix_to_blinfun_mult bfun.Bfun_inverse) + +lemma invertible_invertible\<^sub>L_I: "invertible (blinfun_to_matrix f) \ invertible\<^sub>L f" + "invertible\<^sub>L (matrix_to_blinfun X) \ invertible (X :: real^_^_)" + using matrix_to_blinfun_inv\<^sub>L(2) blinfun_to_matrix_inverse(1) matrix_to_blinfun_inv blinfun_to_matrix_inv + by metis+ + +lemma bounded_linear_blinfun_to_matrix: "bounded_linear (blinfun_to_matrix :: ('a \\<^sub>b real) \\<^sub>L ('b \\<^sub>b real) \ real^'a^'b)" +proof (intro bounded_linear_intro[of _ "real CARD('a::finite) * real CARD('b::finite)"]) + show "\x y. blinfun_to_matrix (x + y) = blinfun_to_matrix x + blinfun_to_matrix y" + by (auto simp: blinfun_to_matrix_add blinfun_to_matrix_scaleR) +next + show "\r x. blinfun_to_matrix (r *\<^sub>R x) = r *\<^sub>R blinfun_to_matrix x" + by (auto simp: blinfun_to_matrix_def matrix_def blinfun.scaleR_left vec_eq_iff) +next + have *: "\j. (\i. if i = j then 1::real else 0) \ bfun" + by auto + hence **: "\j. norm (Bfun (\i. if i = j then 1::real else 0)) = 1" + by (auto simp: Bfun_inverse[OF *] norm_bfun_def' intro!: cSup_eq_maximum ) + show "norm (blinfun_to_matrix x) \ norm x * (real CARD('a) * real CARD('b))" for x :: "('a \\<^sub>b real) \\<^sub>L 'b \\<^sub>b real" + proof - + have "norm (blinfun_to_matrix x) \ (\i\UNIV. \ia\UNIV. $$x (bfun.Bfun (\i. if i = ia then 1 else 0))) i$$" + unfolding norm_vec_def blinfun_to_matrix_def matrix_def axis_def + by(auto simp: vec_lambda_inverse intro!: order.trans[OF L2_set_le_sum_abs] order.trans[OF sum_mono[OF L2_set_le_sum_abs]]) + also have "\ \ (\i$$UNIV::'b set). \ia\(UNIV :: 'a set). norm x)" + using norm_blinfun abs_le_norm_bfun + by (fastforce simp: ** intro!: sum_mono intro: order.trans) + also have "\ = norm x * (real CARD('a) * real CARD('b))" + by auto + finally show ?thesis. + qed +qed + +lemma summable_blinfun_to_matrix: + assumes "summable (f :: nat \ ('c::finite \\<^sub>b _) \\<^sub>L ('c \\<^sub>b _))" + shows "summable (\i. blinfun_to_matrix (f i))" + by (simp add: assms bounded_linear.summable bounded_linear_blinfun_to_matrix) + +abbreviation "nonneg_blinfun Q \ 0 \ (blinfun_to_matrix Q)" + +lemma nonneg_blinfun_mono: "nonneg_blinfun Q \ u \ v \ Q u \ Q v" + using nonneg_mat_mono[of "blinfun_to_matrix Q" "vec_lambda u" "vec_lambda v"] + by (fastforce simp: blinfun_to_matrix_mult'' apply_bfun_inverse Finite_Cartesian_Product.less_eq_vec_def) + +lemma nonneg_blinfun_nonneg: "nonneg_blinfun Q \ 0 \ v \ 0 \ Q v" + using nonneg_blinfun_mono blinfun.zero_right + by metis + +lemma nonneg_id_blinfun: "nonneg_blinfun id_blinfun" + by (auto simp: blinfun_to_matrix_id) + +lemma norm_nonneg_blinfun_one: + assumes "0 \ blinfun_to_matrix X" + shows "norm X = norm (blinfun_apply X 1)" + by (simp add: norm_blinfun_mono_eq_one assms nonneg_blinfun_nonneg) + +lemma matrix_le_norm_mono: + assumes "0 \ (blinfun_to_matrix C)" + and "(blinfun_to_matrix C) \ (blinfun_to_matrix D)" + shows "norm C \ norm D" +proof - + have "0 \ C 1" "0 \ D 1" + using assms zero_le_one + by (fastforce intro!: nonneg_blinfun_nonneg)+ + have "\v. v \ 0 \ blinfun_to_matrix C *v v \ blinfun_to_matrix D *v v" + using assms nonneg_mat_mono[of "blinfun_to_matrix D - blinfun_to_matrix C"] + by (fastforce simp: matrix_vector_mult_diff_rdistrib) + hence *: "\v. v \ 0 \ C v \ D v" + by (auto simp: less_eq_vec_def less_eq_bfun_def blinfun_to_matrix_mult[symmetric]) + show ?thesis + using assms(1) assms(2) \0 \ C 1\ \0 \ D 1\ less_eq_bfunD[OF *, of 1] + by (fastforce intro!: cSUP_mono simp: norm_nonneg_blinfun_one norm_bfun_def' less_eq_bfun_def) +qed + +lemma blinfun_to_matrix_matpow: "blinfun_to_matrix (X ^^ i) = matpow (blinfun_to_matrix X) i" + by (induction i) (auto simp: blinfun_to_matrix_id blinfun_to_matrix_comp blinfunpow_assoc simp del: blinfunpow.simps(2)) + +lemma nonneg_blinfun_iff: "nonneg_blinfun X \ (\v\0. X v \ 0)" + using nonneg_mat_iff[of "blinfun_to_matrix X"] nonneg_blinfun_nonneg + by (auto simp: blinfun_to_matrix_mult'' bfun.Bfun_inverse less_eq_vec_def less_eq_bfun_def) + +lemma blinfun_apply_mono: "(0::real^_^_) \ blinfun_to_matrix X \ 0 \ v \ blinfun_to_matrix X \ blinfun_to_matrix Y \ X v \ Y v" + by (metis blinfun.diff_left blinfun_to_matrix_diff diff_ge_0_iff_ge nonneg_blinfun_nonneg) + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/Matrix_Util.thy b/thys/MDP-Algorithms/Matrix_Util.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Matrix_Util.thy @@ -0,0 +1,194 @@ +theory Matrix_Util + imports "HOL-Analysis.Analysis" +begin + +section \Matrices\ + +proposition scalar_matrix_assoc': + fixes C :: "('b::real_algebra_1)^'m^'n" + shows "k *\<^sub>R (C ** D) = C ** (k *\<^sub>R D)" + by (simp add: matrix_matrix_mult_def sum_distrib_left mult_ac vec_eq_iff scaleR_sum_right) + +subsection \Nonnegative Matrices\ + +lemma nonneg_matrix_nonneg [dest]: "0 \ m \ 0 \ m  i  j" + by (simp add: Finite_Cartesian_Product.less_eq_vec_def) + +lemma matrix_mult_mono: + assumes "0 \ E" "0 \ C" "(E :: real^'c^'c) \ B" "C \ D" + shows "E ** C \ B ** D" + using order.trans[OF assms(1) assms(3)] assms + unfolding Finite_Cartesian_Product.less_eq_vec_def + by (auto intro!: sum_mono mult_mono simp: matrix_matrix_mult_def) + +lemma nonneg_matrix_mult: "0 \ (C :: ('b::{field, ordered_ring})^_^_) \ 0 \ D \ 0 \ C ** D" + unfolding Finite_Cartesian_Product.less_eq_vec_def + by (auto simp: matrix_matrix_mult_def intro!: sum_nonneg) + +lemma zero_le_mat_iff [simp]: "0 \ mat (x :: 'c :: {zero, order}) \ 0 \ x" + by (auto simp: Finite_Cartesian_Product.less_eq_vec_def mat_def) + +lemma nonneg_mat_ge_zero: "0 \ Q \ 0 \ v \ 0 \ Q *v (v :: real^'c)" + unfolding Finite_Cartesian_Product.less_eq_vec_def + by (auto intro!: sum_nonneg simp: matrix_vector_mult_def) + +lemma nonneg_mat_mono: "0 \ Q \ u \ v \ Q *v u \ Q *v (v :: real^'c)" + using nonneg_mat_ge_zero[of Q "v - u"] + by (simp add: vec.diff) + +lemma nonneg_mult_imp_nonneg_mat: + assumes "\v. v \ 0 \ X *v v \ 0" + shows "X \ (0 :: real ^ _ ^_)" +proof - + { assume "\ (0 \ X)" + then obtain i j where neg: "X  i  j < 0" + by (metis less_eq_vec_def not_le zero_index) + let ?v = "\ k. if j = k then 1::real else 0" + have "(X *v ?v)  i < 0" + using neg + by (auto simp: matrix_vector_mult_def if_distrib cong: if_cong) + hence "?v \ 0 \ \ ((X *v ?v) \ 0)" + by (auto simp: less_eq_vec_def not_le) + hence "\v. v \ 0 \ \ X *v v \ 0" + by blast + } + thus ?thesis + using assms by auto +qed + +lemma nonneg_mat_iff: + "(X \ (0 :: real ^ _ ^_)) \ (\v. v \ 0 \ X *v v \ 0)" + using nonneg_mat_ge_zero nonneg_mult_imp_nonneg_mat by auto + +lemma mat_le_iff: "(X \ Y) \ (\x\0. (X::real^_^_) *v x \ Y *v x)" + by (metis diff_ge_0_iff_ge matrix_vector_mult_diff_rdistrib nonneg_mat_iff) + +subsection \Matrix Powers\ + +(* copied from Perron-Frobenius *) +primrec matpow :: "'a::semiring_1^'n^'n \ nat \ 'a^'n^'n" where + matpow_0: "matpow A 0 = mat 1" | + matpow_Suc: "matpow A (Suc n) = (matpow A n) ** A" + +lemma nonneg_matpow: "0 \ X \ 0 \ matpow (X :: real ^ _ ^ _) i" + by (induction i) (auto simp: nonneg_matrix_mult) + +lemma matpow_mono: "0 \ C \ C \ D \ matpow (C :: real^_^_) n \ matpow D n" + by (induction n) (auto intro!: matrix_mult_mono nonneg_matpow) + +lemma matpow_scaleR: "matpow (c *\<^sub>R (X :: 'b :: real_algebra_1^_^_)) n = (c^n) *\<^sub>R (matpow X) n" +proof (induction n arbitrary: X c) + case (Suc n) + have "matpow (c *\<^sub>R X) (Suc n) = (c^n)*\<^sub>R (matpow X) n ** c *\<^sub>R X" + using Suc by auto + also have "\ = c *\<^sub>R ((c^n) *\<^sub>R (matpow X) n ** X)" + using scalar_matrix_assoc' + by (auto simp: scalar_matrix_assoc') + finally show ?case + by (simp add: scalar_matrix_assoc) +qed auto + +lemma matrix_vector_mult_code': "(X *v x)  i = (\j\UNIV. X  i  j * x  j)" + by (simp add: matrix_vector_mult_def) + +lemma matrix_vector_mult_mono: "(0::real^_^_) \ X \ 0 \ v \ X \ Y \ X *v v \ Y *v v" + by (metis diff_ge_0_iff_ge matrix_vector_mult_diff_rdistrib nonneg_mat_iff) + +subsection \Triangular Matrices\ + +definition "lower_triangular_mat X \ (\i j. (i :: 'b::{finite, linorder}) < j \ X  i  j = 0)" + +definition "strict_lower_triangular_mat X \ (\i j. (i :: 'b::{finite, linorder}) \ j \ X  i  j = 0)" + +definition "upper_triangular_mat X \ (\i j. j < i \ X  i  j = 0)" + +lemma stlI: "strict_lower_triangular_mat X \ lower_triangular_mat X" + unfolding strict_lower_triangular_mat_def lower_triangular_mat_def + by auto + +lemma lower_triangular_mat_mat: "lower_triangular_mat (mat x)" + unfolding lower_triangular_mat_def mat_def + by auto + +lemma lower_triangular_mult: + assumes "lower_triangular_mat X" "lower_triangular_mat Y" + shows "lower_triangular_mat (X ** Y)" + using assms + unfolding matrix_matrix_mult_def lower_triangular_mat_def + by (auto intro!: sum.neutral) (metis mult_not_zero neqE less_trans) + +lemma lower_triangular_pow: + assumes "lower_triangular_mat X" + shows "lower_triangular_mat (matpow X i)" + using assms lower_triangular_mult lower_triangular_mat_mat + by (induction i) auto + +lemma lower_triangular_suminf: + assumes "\i. lower_triangular_mat (f i)" "summable (f :: nat \ 'b::real_normed_vector^_^_)" + shows "lower_triangular_mat (\i. f i)" + using assms + unfolding lower_triangular_mat_def + by (subst bounded_linear.suminf) (auto intro: bounded_linear_compose) + +lemma lower_triangular_pow_eq: + assumes "lower_triangular_mat X" "lower_triangular_mat Y" "\s'. s' \ s \ row s' X = row s' Y" "s' \ s" + shows "row s' (matpow X i) = row s' (matpow Y i)" + using assms +proof (induction i) + case (Suc i) + thus ?case + proof - + have ltX: "lower_triangular_mat (matpow X i)" + by (simp add: Suc(2) lower_triangular_pow) + have ltY: "lower_triangular_mat (matpow Y i)" + by (simp add: Suc(3) lower_triangular_pow) + have " (\k\UNIV. matpow X i  s'  k * X  k  j) = (\k\UNIV. matpow Y i  s'  k * Y  k  j)" for j + proof - + have "(\k\UNIV. matpow X i  s'  k * X  k  j) = (\k\UNIV. if s' < k then 0 else matpow Y i  s'  k * X  k  j)" + using Suc ltY + by (auto simp: row_def lower_triangular_mat_def intro!: sum.cong) + also have "\ = (\k \ UNIV . matpow Y i  s'  k * Y  k  j)" + using Suc ltY + by (auto simp: row_def lower_triangular_mat_def cong: if_cong intro!: sum.cong) + finally show ?thesis. + qed + thus ?thesis + by (auto simp: row_def matrix_matrix_mult_def) + qed +qed simp + +lemma lower_triangular_mat_mult: + assumes "lower_triangular_mat M" "\i. i \ j \ v  i = v'  i" + shows "(M *v v)  j = (M *v v')  j" +proof - + have "(M *v v)  j = (\i\UNIV. (if j < i then 0 else M  j  i * v  i))" + using assms unfolding lower_triangular_mat_def + by (auto simp: matrix_vector_mult_def intro!: sum.cong) + also have "\ = (\i\UNIV. (if j < i then 0 else M  j  i * v'  i))" + using assms + by (auto intro!: sum.cong) + also have "\ = (M *v v')  j" + using assms unfolding lower_triangular_mat_def + by (auto simp: matrix_vector_mult_def intro!: sum.cong) + finally show ?thesis. +qed + +subsection \Inverses\ + +(* from AFP/Rank_Nullity_Theorem *) +lemma matrix_inv: + assumes "invertible M" + shows matrix_inv_left: "matrix_inv M ** M = mat 1" + and matrix_inv_right: "M ** matrix_inv M = mat 1" + using \invertible M\ and someI_ex [of "\ N. M ** N = mat 1 \ N ** M = mat 1"] + unfolding invertible_def and matrix_inv_def + by simp_all + +(* from AFP/Rank_Nullity_Theorem *) +lemma matrix_inv_unique: + fixes A::"'a::{semiring_1}^'n^'n" + assumes AB: "A ** B = mat 1" and BA: "B ** A = mat 1" + shows "matrix_inv A = B" + by (metis AB BA invertible_def matrix_inv_right matrix_mul_assoc matrix_mul_lid) + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/Modified_Policy_Iteration.thy b/thys/MDP-Algorithms/Modified_Policy_Iteration.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Modified_Policy_Iteration.thy @@ -0,0 +1,742 @@ +(* Author: Maximilian Schäffeler *) + +theory Modified_Policy_Iteration + imports + Policy_Iteration + Value_Iteration +begin + +section \Modified Policy Iteration\ + +locale MDP_MPI = MDP_finite_type A K r l + MDP_act A K r l arb_act + for A and K :: "'s :: finite \ 'a :: finite \ 's pmf" and r l arb_act +begin + +subsection \The Advantage Function @{term B}\ + +definition "B v s = (\d \ D\<^sub>R. (r_dec d s + (l *\<^sub>R \ \<^sub>1 d - id_blinfun) v s))" + +text "The function @{const B} denotes the advantage of choosing the optimal action vs. + the current value estimate" + +lemma B_eq_\: "B v s = \ v s - v s" +proof - + have *: "B v s = (\d \ D\<^sub>R. L d v s - v s)" + unfolding B_def L_def + by (auto simp add: blinfun.bilinear_simps add_diff_eq) + show ?thesis + unfolding * + proof (rule antisym) + show "(\d\D\<^sub>R. L d v s - v s) \ \ v s - v s" + unfolding \_def + using ex_dec + by (fastforce intro!: cSUP_upper cSUP_least) + next + have "bdd_above ((\d. L d v s - v s)  D\<^sub>R)" + by (auto intro!: bounded_const bounded_minus_comp bounded_imp_bdd_above) + thus "\ v s - v s \ (\d \ D\<^sub>R. L d v s - v s)" + unfolding \_def diff_le_eq + by (intro cSUP_least) (auto intro: cSUP_upper2 simp: diff_le_eq[symmetric]) + qed +qed + +text \@{const B} is a bounded function.\ + +lift_definition B\<^sub>b :: "('s \\<^sub>b real) \ 's \\<^sub>b real" is "B" + using \\<^sub>b.rep_eq[symmetric] B_eq_\ + by (auto intro!: bfun_normI order.trans[OF abs_triangle_ineq4] add_mono abs_le_norm_bfun) + +lemma B\<^sub>b_eq_\\<^sub>b: "B\<^sub>b v = \\<^sub>b v - v" + by (auto simp: \\<^sub>b.rep_eq B\<^sub>b.rep_eq B_eq_$$ + +lemma \\<^sub>b_eq_SUP_L\<^sub>a: "\\<^sub>b v s = (\a \ A s. L\<^sub>a a v s)" + using L_eq_L\<^sub>a_det \\<^sub>b_eq_SUP_det SUP_step_det_eq + by auto + +subsection \Optimization of the Value Function over Multiple Steps\ + +definition "U m v s = (\d \ D\<^sub>R. (\\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d)^^m) v) s)" + +text \@{const U} expresses the value estimate obtained by optimizing the first @{term m} steps and + afterwards using the current estimate.\ + +lemma U_zero [simp]: "U 0 v = v" + unfolding U_def \_def + by (auto simp: \\<^sub>b_fin.rep_eq) + +lemma U_one_eq_\: "U 1 v s = \ v s" + unfolding U_def \_def + by (auto simp: \\<^sub>b_fin_eq_\

\<^sub>X L_def blinfun.bilinear_simps) + +lift_definition U\<^sub>b :: "nat \ ('s \\<^sub>b real) \ ('s \\<^sub>b real)" is U +proof - + fix n v + have "norm (\\<^sub>b_fin (mk_stationary d) m) \ (\iM)" for d m + using abs_\_fin_le \\<^sub>b_fin.rep_eq + by (auto intro!: norm_bound) + moreover have "norm (((l *\<^sub>R \

\<^sub>1 d)^^m) v) \ l ^ m * norm v" for d m + by (auto simp: \

\<^sub>X_const[symmetric] blinfun.bilinear_simps blincomp_scaleR_right simp del: \

\<^sub>X_sconst + intro!: boundedI order.trans[OF abs_le_norm_bfun] mult_left_mono) + ultimately have *: "norm (\\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d)^^m) v) \ (\iM) + l ^ m * norm v" for d m + using norm_triangle_mono by blast + show "U n v \ bfun" + using ex_dec order.trans[OF abs_le_norm_bfun *] + by (fastforce simp: U_def intro!: bfun_normI cSup_abs_le) +qed + +lemma U\<^sub>b_contraction: "dist (U\<^sub>b m v) (U\<^sub>b m u) \ l ^ m * dist v u" +proof - + have aux: "dist (U\<^sub>b m v s) (U\<^sub>b m u s) \ l ^ m * dist v u" if le: "U\<^sub>b m u s \ U\<^sub>b m v s" for s v u + proof - + let ?U = "\m v d. (\\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d) ^^ m) v) s" + have "U\<^sub>b m v s - U\<^sub>b m u s \ (\d \ D\<^sub>R. ?U m v d - ?U m u d)" + using bounded_stationary_\\<^sub>b_fin bounded_disc_\

\<^sub>1 le + unfolding U\<^sub>b.rep_eq U_def + by (intro le_SUP_diff') (auto intro: bounded_plus_comp) + also have "\ = (\d \ D\<^sub>R. ((l *\<^sub>R \

\<^sub>1 d) ^^ m) (v - u) s)" + by (simp add: L_def scale_right_diff_distrib blinfun.bilinear_simps) + also have "\ = (\d \ D\<^sub>R. l^m * ((\

\<^sub>1 d ^^ m) (v - u) s))" + by (simp add: blincomp_scaleR_right blinfun.scaleR_left) + also have "\ = l^m * (\d \ D\<^sub>R. ((\

\<^sub>1 d ^^ m) (v - u) s))" + using D\<^sub>R_ne bounded_P bounded_disc_\

\<^sub>1' + by (auto intro: bounded_SUP_mul) + also have "\ \ l^m * norm (\d \ D\<^sub>R. ((\

\<^sub>1 d ^^ m) (v - u) s))" + by (simp add: mult_left_mono) + also have "\ \ l^m * (\d \ D\<^sub>R. norm (((\

\<^sub>1 d ^^ m) (v - u) s)))" + using D\<^sub>R_ne ex_dec bounded_norm_comp bounded_disc_\

\<^sub>1' + by (fastforce intro!: mult_left_mono) + also have "\ \ l^m * (\d \ D\<^sub>R. norm ((\

\<^sub>1 d ^^ m) ((v - u))))" + using ex_dec + by (fastforce intro!: order.trans[OF norm_blinfun] abs_le_norm_bfun mult_left_mono cSUP_mono) + also have "\ \ l^m * (\d \ D\<^sub>R. norm ((v - u)))" + using norm_\

\<^sub>X_apply + by (auto simp: \

\<^sub>X_const[symmetric] cSUP_least mult_left_mono) + also have "\ = l ^m * dist v u" + by (auto simp: dist_norm) + finally have "U\<^sub>b m v s - U\<^sub>b m u s \ l^m * dist v u" . + thus ?thesis + by (simp add: dist_real_def le) + qed + moreover have "U\<^sub>b m v s \ U\<^sub>b m u s \ dist (U\<^sub>b m v s) (U\<^sub>b m u s) \ l^m * dist v u" for u v s + by (simp add: aux dist_commute) + ultimately have "dist (U\<^sub>b m v s) (U\<^sub>b m u s) \ l^m * dist v u" for u v s + using linear + by blast + thus "dist (U\<^sub>b m v) (U\<^sub>b m u) \ l^m * dist v u" + by (simp add: dist_bound) +qed + +lemma U\<^sub>b_conv: + "\!v. U\<^sub>b (Suc m) v = v" + "(\n. (U\<^sub>b (Suc m) ^^ n) v) \ (THE v. U\<^sub>b (Suc m) v = v)" +proof - + have *: "is_contraction (U\<^sub>b (Suc m))" + unfolding is_contraction_def + using U\<^sub>b_contraction[of "Suc m"] le_neq_trans[OF zero_le_disc] + by (cases "l = 0") + (auto intro!: power_Suc_less_one intro: exI[of _ "l^(Suc m)"]) + show "\!v. U\<^sub>b (Suc m) v = v" "(\n. (U\<^sub>b (Suc m) ^^ n) v) \ (THE v. U\<^sub>b (Suc m) v = v)" + using banach'[OF *] + by auto +qed + +lemma U\<^sub>b_convergent: "convergent (\n. (U\<^sub>b (Suc m) ^^ n) v)" + by (intro convergentI[OF U\<^sub>b_conv(2)]) + +lemma U\<^sub>b_mono: + assumes "v \ u" + shows "U\<^sub>b m v \ U\<^sub>b m u" +proof - + have "U\<^sub>b m v s \ U\<^sub>b m u s" for s + unfolding U\<^sub>b.rep_eq U_def + proof (intro cSUP_mono, goal_cases) + case 2 + thus ?case + by (simp add: bounded_imp_bdd_above bounded_disc_\

\<^sub>1 bounded_plus_comp bounded_stationary_\\<^sub>b_fin) + next + case (3 n) + thus ?case + using less_eq_bfunD[OF \

\<^sub>X_mono[OF assms]] + by (auto simp: \

\<^sub>X_const[symmetric] blincomp_scaleR_right blinfun.scaleR_left intro!: mult_left_mono exI) + qed auto + thus ?thesis + using assms + by auto +qed + +lemma U\<^sub>b_le_\\<^sub>b: "U\<^sub>b m v \ (\\<^sub>b ^^ m) v" +proof - + have "U\<^sub>b m v s = (\d \ D\<^sub>R. (L d^^ m) v s)" for m v s + by (auto simp: L_iter U\<^sub>b.rep_eq \\<^sub>b.rep_eq U_def \_def) + thus ?thesis + using L_iter_le_\\<^sub>b ex_dec + by (fastforce intro!: cSUP_least) +qed + + +lemma L_iter_le_U\<^sub>b: + assumes "d \ D\<^sub>R" + shows "(L d^^m) v \ U\<^sub>b m v" + using assms + by (fastforce intro!: cSUP_upper bounded_imp_bdd_above + simp: L_iter U\<^sub>b.rep_eq U_def bounded_disc_\

\<^sub>1 bounded_plus_comp bounded_stationary_\\<^sub>b_fin) + + +lemma lim_U\<^sub>b: "lim (\n. (U\<^sub>b (Suc m) ^^ n) v) = \\<^sub>b_opt" +proof - + have le_U: "\\<^sub>b_opt \ U\<^sub>b m \\<^sub>b_opt" for m + proof - + obtain d where d: "\_improving \\<^sub>b_opt (mk_dec_det d)" "d \ D\<^sub>D" + using ex_improving_det by auto + have "\\<^sub>b_opt = (L (mk_dec_det d) ^^ m) \\<^sub>b_opt" + by (induction m) (metis L_\_fix_iff \\<^sub>b_opt \_improving_imp_\\<^sub>b d(1) funpow_swap1)+ + thus ?thesis + using \d \ D\<^sub>D\ + by (auto intro!: order.trans[OF _ L_iter_le_U\<^sub>b]) + qed + have "U\<^sub>b m \\<^sub>b_opt \ \\<^sub>b_opt" for m + using \_inc_le_opt + by (auto intro!: order.trans[OF U\<^sub>b_le_\\<^sub>b] simp: funpow_swap1) + hence "U\<^sub>b (Suc m) \\<^sub>b_opt = \\<^sub>b_opt" + using le_U + by (simp add: antisym) + moreover have "(lim (\n. (U\<^sub>b (Suc m) ^^n) v)) = U\<^sub>b (Suc m) (lim (\n. (U\<^sub>b (Suc m) ^^n) v))" + using limI[OF U\<^sub>b_conv(2)] theI'[OF U\<^sub>b_conv(1)] + by auto + ultimately show ?thesis + using U\<^sub>b_conv(1) + by metis +qed + +lemma U\<^sub>b_tendsto: "(\n. (U\<^sub>b (Suc m) ^^ n) v) \ \\<^sub>b_opt" + using lim_U\<^sub>b U\<^sub>b_convergent convergent_LIMSEQ_iff + by metis + +lemma U\<^sub>b_fix_unique: "U\<^sub>b (Suc m) v = v \ v = \\<^sub>b_opt" + using theI'[OF U\<^sub>b_conv(1)] U\<^sub>b_conv(1) + by (auto simp: LIMSEQ_unique[OF U\<^sub>b_tendsto U\<^sub>b_conv(2)[of m]]) + +lemma dist_U\<^sub>b_opt: "dist (U\<^sub>b m v) \\<^sub>b_opt \ l^m * dist v \\<^sub>b_opt" +proof - + have "dist (U\<^sub>b m v) \\<^sub>b_opt = dist (U\<^sub>b m v) (U\<^sub>b m \\<^sub>b_opt)" + by (metis U\<^sub>b.abs_eq U\<^sub>b_fix_unique U_zero apply_bfun_inverse not0_implies_Suc) + also have "\ \ l^m * dist v \\<^sub>b_opt" + by (meson U\<^sub>b_contraction) + finally show ?thesis . +qed + +subsection \Expressing a Single Step of Modified Policy Iteration\ +text \The function @{term W} equals the value computed by the Modified Policy Iteration Algorithm + in a single iteration. + The right hand addend in the definition describes the advantage of using the optimal action for + the first m steps. + \ +definition "W d m v = v + (\i < m. (l *\<^sub>R \

\<^sub>1 d)^^i) (B\<^sub>b v)" + + +lemma W_eq_L_iter: + assumes "\_improving v d" + shows "W d m v = (L d^^m) v" +proof - + have "(\iR \

\<^sub>1 d)^^i) (\\<^sub>b v) = (\iR \

\<^sub>1 d)^^i) (L d v)" + using \_improving_imp_\\<^sub>b assms by auto + hence "W d m v = v + ((\iR \

\<^sub>1 d)^^i) (L d v)) - (\iR \

\<^sub>1 d)^^i) v" + by (auto simp: W_def B\<^sub>b_eq_\\<^sub>b blinfun.bilinear_simps algebra_simps ) + also have "\ = v + \\<^sub>b_fin (mk_stationary d) m + (\iR \

\<^sub>1 d)^^i) ((l *\<^sub>R \

\<^sub>1 d) v)) - (\iR \

\<^sub>1 d)^^i) v" + unfolding L_def + by (auto simp: \\<^sub>b_fin_eq blinfun.bilinear_simps blinfun.sum_left scaleR_right.sum) + also have "\ = v + \\<^sub>b_fin (mk_stationary d) m + (\iR \

\<^sub>1 d)^^Suc i) v) - (\iR \

\<^sub>1 d)^^i) v" + by (auto simp del: blinfunpow.simps simp: blinfunpow_assoc) + also have "\ = \\<^sub>b_fin (mk_stationary d) m + (\iR \

\<^sub>1 d)^^ i) v) - (\iR \

\<^sub>1 d)^^ i) v" + by (subst sum.lessThan_Suc_shift) auto + also have "\ = \\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d)^^m) v" + by (simp add: blinfun.sum_left) + also have "\ = (L d ^^ m) v" + using L_iter by auto + finally show ?thesis . +qed + +lemma W_le_U\<^sub>b: + assumes "v \ u" "\_improving v d" + shows "W d m v \ U\<^sub>b m u" +proof - + have "U\<^sub>b m u - W d m v \ \\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d) ^^ m) u - (\\<^sub>b_fin (mk_stationary d) m + ((l *\<^sub>R \

\<^sub>1 d)^^m) v)" + using \_improving_D_MR assms(2) bounded_stationary_\\<^sub>b_fin bounded_disc_\

\<^sub>1 + by (fastforce intro!: diff_mono bounded_imp_bdd_above cSUP_upper bounded_plus_comp simp: U\<^sub>b.rep_eq U_def L_iter W_eq_L_iter) + hence *: "U\<^sub>b m u - W d m v \ ((l *\<^sub>R \

\<^sub>1 d) ^^ m) (u - v)" + by (auto simp: blinfun.diff_right) + show "W d m v \ U\<^sub>b m u" + using order.trans[OF \

\<^sub>1_n_disc_pos[unfolded blincomp_scaleR_right[symmetric]] *] assms + by auto +qed + +lemma W_ge_\\<^sub>b: + assumes "v \ u" "0 \ B\<^sub>b u" "\_improving u d'" + shows "\\<^sub>b v \ W d' (Suc m) u" +proof - + have "\\<^sub>b v \ u + B\<^sub>b u" + using assms(1) \\<^sub>b_mono B\<^sub>b_eq_\\<^sub>b + by auto + also have "\ \ W d' (Suc m) u" + using L_mono \_improving_imp_\\<^sub>b assms(3) assms + by (induction m) (auto simp: W_eq_L_iter B\<^sub>b_eq_\\<^sub>b) + finally show ?thesis . +qed + +lemma B\<^sub>b_le: + assumes "\_improving v d" + shows "B\<^sub>b v + (l *\<^sub>R \

\<^sub>1 d - id_blinfun) (u - v) \ B\<^sub>b u" +proof - + have "r_dec\<^sub>b d + (l *\<^sub>R \

\<^sub>1 d - id_blinfun) u \ B\<^sub>b u" + using L_def L_le_\\<^sub>b assms + by (auto simp: B\<^sub>b_eq_\\<^sub>b \\<^sub>b.rep_eq \_def blinfun.bilinear_simps) + moreover have "B\<^sub>b v = r_dec\<^sub>b d + (l *\<^sub>R \

\<^sub>1 d - id_blinfun) v" + using assms + by (auto simp: B\<^sub>b_eq_\\<^sub>b \_improving_imp_\\<^sub>b[of _ d] L_def blinfun.bilinear_simps) + ultimately show ?thesis + by (simp add: blinfun.diff_right) +qed + +lemma \\<^sub>b_W_ge: + assumes "u \ \\<^sub>b u" "\_improving u d" + shows "W d m u \ \\<^sub>b (W d m u)" +proof - + have "0 \ ((l *\<^sub>R \

\<^sub>1 d) ^^ m) (B\<^sub>b u)" + by (metis B\<^sub>b_eq_\\<^sub>b \

\<^sub>1_n_disc_pos assms(1) blincomp_scaleR_right diff_ge_0_iff_ge) + also have "\ = ((l *\<^sub>R \

\<^sub>1 d)^^0 + (\i < m. (l *\<^sub>R \

\<^sub>1 d)^^(Suc i))) (B\<^sub>b u) - (\i < m. (l *\<^sub>R \

\<^sub>1 d)^^ i) (B\<^sub>b u)" + by (subst sum.lessThan_Suc_shift[symmetric]) (auto simp: blinfun.diff_left[symmetric]) + also have "\ = B\<^sub>b u + ((l *\<^sub>R \

\<^sub>1 d - id_blinfun) o\<^sub>L (\i < m. (l *\<^sub>R \

\<^sub>1 d)^^i)) (B\<^sub>b u)" + by (auto simp: blinfun.bilinear_simps sum_subtractf) + also have "\ = B\<^sub>b u + (l *\<^sub>R \

\<^sub>1 d - id_blinfun) (W d m u - u)" + by (auto simp: W_def sum.lessThan_Suc[unfolded lessThan_Suc_atMost]) + also have "\ \ B\<^sub>b (W d m u)" + using B\<^sub>b_le assms(2) by blast + finally have "0 \ B\<^sub>b (W d m u)" . + thus ?thesis using B\<^sub>b_eq_\\<^sub>b + by auto +qed + +subsection \Computing the Bellman Operator over Multiple Steps\ +definition L_pow :: "('s \\<^sub>b real) \ ('s \ 'a) \ nat \ ('s \\<^sub>b real)" where + "L_pow v d m = (L (mk_dec_det d) ^^ Suc m) v" + +lemma sum_telescope': "(\i\k. f (Suc i) - f i ) = f (Suc k) - (f 0 :: 'c :: ab_group_add)" + using sum_telescope[of "-f" k] + by auto + +(* eq 6.5.7 *) +lemma L_pow_eq: + assumes "\_improving v (mk_dec_det d)" + shows "L_pow v d m = v + (\i \ m. ((l *\<^sub>R \

\<^sub>1 (mk_dec_det d))^^i)) (B\<^sub>b v)" +proof - + let ?d = "(mk_dec_det d)" + have "(\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (B\<^sub>b v) = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (L ?d v) - (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) v" + using assms + by (auto simp: B\<^sub>b_eq_\\<^sub>b blinfun.bilinear_simps \_improving_imp_\\<^sub>b) + also have "\ = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (r_dec\<^sub>b ?d) + (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) ((l *\<^sub>R \

\<^sub>1 ?d) v) - (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) v" + by (simp add: L_def blinfun.bilinear_simps) + also have "\ = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (r_dec\<^sub>b ?d) + (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^Suc i)) v - (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) v" + by (auto simp: blinfun.sum_left blinfunpow_assoc simp del: blinfunpow.simps) + also have "\ = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (r_dec\<^sub>b ?d) + (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^Suc i) - (l *\<^sub>R \

\<^sub>1 ?d)^^i) v" + by (simp add: blinfun.diff_left sum_subtractf) + also have "\ = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (r_dec\<^sub>b ?d) + ((l *\<^sub>R \

\<^sub>1 ?d)^^Suc m) v - v" + by (subst sum_telescope') (auto simp: blinfun.bilinear_simps) + finally have "(\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (B\<^sub>b v) = (\i \ m. ((l *\<^sub>R \

\<^sub>1 ?d)^^i)) (r_dec\<^sub>b ?d) + ((l *\<^sub>R \

\<^sub>1 ?d)^^Suc m) v - v" . + moreover have "L_pow v d m = \\<^sub>b_fin (mk_stationary_det d) (Suc m) + ((l *\<^sub>R \

\<^sub>1 ?d)^^Suc m) v" + by (simp only: L_pow_def L_iter lessThan_Suc_atMost[symmetric]) + ultimately show ?thesis + by (auto simp: \\<^sub>b_fin_eq lessThan_Suc_atMost) +qed + +lemma L_pow_eq_W: + assumes "d \ D\<^sub>D" + shows "L_pow v (policy_improvement d v) m = W (mk_dec_det (policy_improvement d v)) (Suc m) v" + using assms policy_improvement_improving + by (auto simp: W_eq_L_iter L_pow_def) + +lemma L_pow_\\<^sub>b_mono_inv: + assumes "d \ D\<^sub>D" "v \ \\<^sub>b v" + shows "L_pow v (policy_improvement d v) m \ \\<^sub>b (L_pow v (policy_improvement d v) m)" + using assms L_pow_eq_W \\<^sub>b_W_ge policy_improvement_improving + by auto + +subsection \The Modified Policy Iteration Algorithm\ +context + fixes d0 :: "'s \ 'a" + fixes v0 :: "'s \\<^sub>b real" + fixes m :: "nat \ ('s \\<^sub>b real) \ nat" + assumes d0: "d0 \ D\<^sub>D" +begin + +text \We first define a function that executes the algorithm for n steps.\ +fun mpi :: "nat \ (('s \ 'a) \ ('s \\<^sub>b real))" where + "mpi 0 = (policy_improvement d0 v0, v0)" | + "mpi (Suc n) = + (let (d, v) = mpi n; v' = L_pow v d (m n v) in + (policy_improvement d v', v'))" + +definition "mpi_val n = snd (mpi n)" +definition "mpi_pol n = fst (mpi n)" + +lemma mpi_pol_zero[simp]: "mpi_pol 0 = policy_improvement d0 v0" + unfolding mpi_pol_def + by auto + +lemma mpi_pol_Suc: "mpi_pol (Suc n) = policy_improvement (mpi_pol n) (mpi_val (Suc n))" + by (auto simp: case_prod_beta' Let_def mpi_pol_def mpi_val_def) + +lemma mpi_pol_is_dec_det: "mpi_pol n \ D\<^sub>D" + unfolding mpi_pol_def + using policy_improvement_is_dec_det d0 + by (induction n) (auto simp: Let_def split: prod.splits) + +lemma \_improving_mpi_pol: "\_improving (mpi_val n) (mk_dec_det (mpi_pol n))" + using d0 policy_improvement_improving mpi_pol_is_dec_det mpi_pol_Suc + by (cases n) (auto simp: mpi_pol_def mpi_val_def) + +lemma mpi_val_zero[simp]: "mpi_val 0 = v0" + unfolding mpi_val_def by auto + +lemma mpi_val_Suc: "mpi_val (Suc n) = L_pow (mpi_val n) (mpi_pol n) (m n (mpi_val n))" + unfolding mpi_val_def mpi_pol_def + by (auto simp: case_prod_beta' Let_def) + +lemma mpi_val_eq: "mpi_val (Suc n) = + mpi_val n + (\i \ m n (mpi_val n). (l *\<^sub>R \

\<^sub>1 (mk_dec_det (mpi_pol n))) ^^ i) (B\<^sub>b (mpi_val n))" + using L_pow_eq[OF \_improving_mpi_pol] mpi_val_Suc + by auto + + +text \Value Iteration is a special case of MPI where @{term "\n v. m n v = 0"}.\ +lemma mpi_includes_value_it: + assumes "\n v. m n v = 0" + shows "mpi_val (Suc n) = \\<^sub>b (mpi_val n)" + using assms B\<^sub>b_eq_\\<^sub>b mpi_val_eq + by auto + +subsection \Convergence Proof\ +text \We define the sequence @{term w} as an upper bound for the values of MPI.\ +fun w where + "w 0 = v0" | + "w (Suc n) = U\<^sub>b (Suc (m n (mpi_val n))) (w n)" + +lemma dist_\\<^sub>b_opt: "dist (w (Suc n)) \\<^sub>b_opt \ l * dist (w n) \\<^sub>b_opt" + by (fastforce simp: algebra_simps intro: order.trans[OF dist_U\<^sub>b_opt] mult_left_mono power_le_one + mult_left_le_one_le order.strict_implies_order) + +lemma dist_\\<^sub>b_opt_n: "dist (w n) \\<^sub>b_opt \ l^n * dist v0 \\<^sub>b_opt" + by (induction n) (fastforce simp: algebra_simps intro: order.trans[OF dist_\\<^sub>b_opt] mult_left_mono)+ + +lemma w_conv: "w \ \\<^sub>b_opt" +proof - + have "(\n. l^n * dist v0 \\<^sub>b_opt) \ 0" + using LIMSEQ_realpow_zero + by (cases "v0 = \\<^sub>b_opt") auto + then show ?thesis + by (fastforce intro: metric_LIMSEQ_I order.strict_trans1[OF dist_\\<^sub>b_opt_n] simp: LIMSEQ_def) +qed + +text \MPI converges monotonically to the optimal value from below. + The iterates are sandwiched between @{const \\<^sub>b} from below and @{const U\<^sub>b} from above.\ +theorem mpi_conv: + assumes "v0 \ \\<^sub>b v0" + shows "mpi_val \ \\<^sub>b_opt" and "\n. mpi_val n \ mpi_val (Suc n)" +proof - + define y where "y n = (\\<^sub>b^^n) v0" for n + have aux: "mpi_val n \ \\<^sub>b (mpi_val n) \ mpi_val n \ mpi_val (Suc n) \ y n \ mpi_val n \ mpi_val n \ w n" for n + proof (induction n) + case 0 + show ?case + using assms B\<^sub>b_eq_\\<^sub>b + unfolding y_def + by (auto simp: mpi_val_eq blinfun.sum_left \

\<^sub>1_n_disc_pos blincomp_scaleR_right sum_nonneg) + next + case (Suc n) + have val_eq_W: "mpi_val (Suc n) = W (mk_dec_det (mpi_pol n)) (Suc (m n (mpi_val n))) (mpi_val n)" + using \_improving_mpi_pol mpi_val_Suc W_eq_L_iter L_pow_def + by auto + hence *: "mpi_val (Suc n) \ \\<^sub>b (mpi_val (Suc n))" + using Suc.IH \\<^sub>b_W_ge \_improving_mpi_pol by presburger + moreover have "mpi_val (Suc n) \ mpi_val (Suc (Suc n))" + using * + by (simp add: B\<^sub>b_eq_\\<^sub>b mpi_val_eq \

\<^sub>1_n_disc_pos blincomp_scaleR_right blinfun.sum_left sum_nonneg) + moreover have "mpi_val (Suc n) \ w (Suc n)" + using Suc.IH \_improving_mpi_pol + by (auto simp: val_eq_W intro: order.trans[OF _ W_le_U\<^sub>b]) + moreover have "y (Suc n) \ mpi_val (Suc n)" + using Suc.IH \_improving_mpi_pol W_ge_\\<^sub>b + by (auto simp: y_def B\<^sub>b_eq_\\<^sub>b val_eq_W) + ultimately show ?case + by auto + qed + thus "mpi_val n \ mpi_val (Suc n)" for n + by auto + have "y \ \\<^sub>b_opt" + using \\<^sub>b_lim y_def by presburger + thus "mpi_val \ \\<^sub>b_opt" + using aux + by (auto intro: tendsto_bfun_sandwich[OF _ w_conv]) +qed + +subsection \$\epsilon$-Optimality\ +text \This gives an upper bound on the error of MPI.\ +lemma mpi_pol_eps_opt: + assumes "2 * l * dist (mpi_val n) (\\<^sub>b (mpi_val n)) < eps * (1 - l)" "eps > 0" + shows "dist (\\<^sub>b (mk_stationary_det (mpi_pol n))) (\\<^sub>b (mpi_val n)) \ eps / 2" +proof - + let ?p = "mk_stationary_det (mpi_pol n)" + let ?d = "mk_dec_det (mpi_pol n)" + let ?v = "mpi_val n" + have "dist (\\<^sub>b ?p) (\\<^sub>b ?v) = dist (L ?d (\\<^sub>b ?p)) (\\<^sub>b ?v)" + using L_\_fix + by force + also have "\ = dist (L ?d (\\<^sub>b ?p)) (L ?d ?v)" + by (metis \_improving_imp_\\<^sub>b \_improving_mpi_pol) + also have "\ \ dist (L ?d (\\<^sub>b ?p)) (L ?d (\\<^sub>b ?v)) + dist (L ?d (\\<^sub>b ?v)) (L ?d ?v)" + using dist_triangle + by blast + also have "\ \ l * dist (\\<^sub>b ?p) (\\<^sub>b ?v) + dist (L ?d (\\<^sub>b ?v)) (L ?d ?v)" + using contraction_L by auto + also have "\ \ l * dist (\\<^sub>b ?p) (\\<^sub>b ?v) + l * dist (\\<^sub>b ?v) ?v" + using contraction_L by auto + finally have "dist (\\<^sub>b ?p) (\\<^sub>b ?v) \ l * dist (\\<^sub>b ?p) (\\<^sub>b ?v) + l * dist (\\<^sub>b ?v) ?v". + hence *:"(1-l) * dist (\\<^sub>b ?p) (\\<^sub>b ?v) \ l * dist (\\<^sub>b ?v) ?v" + by (auto simp: left_diff_distrib) + thus ?thesis + proof (cases "l = 0") + case True + thus ?thesis + using assms * + by auto + next + case False + have **: "dist (\\<^sub>b ?v) (mpi_val n) < eps * (1 - l) / (2 * l)" + using False le_neq_trans[OF zero_le_disc False[symmetric]] assms + by (auto simp: dist_commute pos_less_divide_eq Groups.mult_ac(2)) + have "dist (\\<^sub>b ?p) (\\<^sub>b ?v) \ (l/ (1-l)) * dist (\\<^sub>b ?v) ?v" + using * + by (auto simp: mult.commute pos_le_divide_eq) + also have "\ \ (l/ (1-l)) * (eps * (1 - l) / (2 * l))" + using ** + by (fastforce intro!: mult_left_mono simp: divide_nonneg_pos) + also have "\ = eps / 2" + using False disc_lt_one + by (auto simp: order.strict_iff_order) + finally show "dist (\\<^sub>b ?p) (\\<^sub>b ?v) \ eps / 2". + qed +qed + +lemma mpi_pol_opt: + assumes "2 * l * dist (mpi_val n) (\\<^sub>b (mpi_val n)) < eps * (1 - l)" "eps > 0" + shows "dist (\\<^sub>b (mk_stationary_det (mpi_pol n))) (\\<^sub>b_opt) < eps" +proof - + have "dist (\\<^sub>b (mk_stationary_det (mpi_pol n))) (\\<^sub>b_opt) \ eps/2 + dist (\\<^sub>b (mpi_val n)) \\<^sub>b_opt" + by (metis mpi_pol_eps_opt[OF assms] dist_commute dist_triangle_le add_right_mono) + thus ?thesis + using dist_\\<^sub>b_opt_eps assms + by fastforce +qed + +lemma mpi_val_term_ex: + assumes "v0 \ \\<^sub>b v0" "eps > 0" + shows "\n. 2 * l * dist (mpi_val n) (\\<^sub>b (mpi_val n)) < eps * (1 - l)" +proof - + note dist_\\<^sub>b_lt_dist_opt + have "(\n. dist (mpi_val n) \\<^sub>b_opt) \ 0" + using mpi_conv(1)[OF assms(1)] tendsto_dist_iff + by blast + hence "(\n. dist (mpi_val n) (\\<^sub>b (mpi_val n))) \ 0" + using dist_\\<^sub>b_lt_dist_opt + by (auto simp: metric_LIMSEQ_I intro: tendsto_sandwich[of "\_. 0" _ _ "\n. 2 * dist (mpi_val n) \\<^sub>b_opt"]) + hence "\e >0. \n. dist (mpi_val n) (\\<^sub>b (mpi_val n)) < e" + by (fastforce dest!: metric_LIMSEQ_D) + hence "l \ 0 \ \n. dist (mpi_val n) (\\<^sub>b (mpi_val n)) < eps * (1 - l) / (2 * l)" + by (simp add: assms order.not_eq_order_implies_strict) + thus "\n. (2 * l) * dist (mpi_val n) (\\<^sub>b (mpi_val n)) < eps * (1 - l)" + using assms le_neq_trans[OF zero_le_disc] + by (cases "l = 0") (auto simp: mult.commute pos_less_divide_eq) +qed +end + +subsection \Unbounded MPI\ +context + fixes eps \ :: real and M :: nat +begin + +function (domintros) mpi_algo where "mpi_algo d v m = ( + if 2 * l * dist v (\\<^sub>b v) < eps * (1 - l) + then (policy_improvement d v, v) + else mpi_algo (policy_improvement d v) (L_pow v (policy_improvement d v) (m 0 v)) (\n. m (Suc n)))" + by auto + +text \We define a tailrecursive version of @{const mpi} which more closely resembles @{const mpi_algo}.\ +fun mpi' where + "mpi' d v 0 m = (policy_improvement d v, v)" | + "mpi' d v (Suc n) m = ( + let d' = policy_improvement d v; v' = L_pow v d' (m 0 v) in mpi' d' v' n (\n. m (Suc n)))" + +lemma mpi_Suc': + assumes "d \ D\<^sub>D" + shows "mpi d v m (Suc n) = mpi (policy_improvement d v) (L_pow v (policy_improvement d v) (m 0 v)) (\a. m (Suc a)) n" + using assms policy_improvement_is_dec_det + by (induction n rule: nat.induct) (auto simp: Let_def) + +lemma + assumes "d \ D\<^sub>D" + shows "mpi d v m n = mpi' d v n m" + using assms +proof (induction n arbitrary: d v m rule: nat.induct) + case (Suc nat) + thus ?case + using policy_improvement_is_dec_det + by (auto simp: Let_def mpi_Suc'[OF Suc(2)] Suc.IH[symmetric]) +qed auto + +lemma termination_mpi_algo: + assumes "eps > 0" "d \ D\<^sub>D" "v \ \\<^sub>b v" + shows "mpi_algo_dom (d, v, m)" +proof - + define n where "n = (LEAST n. 2 * l * dist (mpi_val d v m n) (\\<^sub>b (mpi_val d v m n)) < eps * (1 - l))" (is "n = (LEAST n. ?P d v m n)") + have least0: "\n. P n \ (LEAST n. P n) = (0 :: nat) \ P 0" for P + by (metis LeastI_ex) + from n_def assms show ?thesis + proof (induction n arbitrary: v d m) + case 0 + have "2 * l * dist (mpi_val d v m 0) (\\<^sub>b (mpi_val d v m 0)) < eps * (1 - l)" + using least0 mpi_val_term_ex 0 + by (metis (no_types, lifting)) + thus ?case + using 0 mpi_algo.domintros mpi_val_zero + by (metis (no_types, opaque_lifting)) + next + case (Suc n v d m) + let ?d = "policy_improvement d v" + have "Suc n = Suc (LEAST n. 2 * l * dist (mpi_val d v m (Suc n)) (\\<^sub>b (mpi_val d v m (Suc n))) < eps * (1 - l))" + using mpi_val_term_ex[OF Suc.prems(3) \v \ \\<^sub>b v\ \0 < eps\, of m] Suc.prems + by (subst Nat.Least_Suc[symmetric]) (auto intro: LeastI_ex) + hence "n = (LEAST n. 2 * l * dist (mpi_val d v m (Suc n)) (\\<^sub>b (mpi_val d v m (Suc n))) < eps * (1 - l))" + by auto + hence n_eq: "n = + (LEAST n. 2 * l * dist (mpi_val ?d (L_pow v ?d (m 0 v)) (\a. m (Suc a)) n) (\\<^sub>b (mpi_val ?d (L_pow v ?d (m 0 v)) (\a. m (Suc a)) n)) + < eps * (1 - l))" + using Suc.prems mpi_Suc' + by (auto simp: is_dec_det_pi mpi_val_def) + have "\ 2 * l * dist v (\\<^sub>b v) < eps * (1 - l)" + using Suc mpi_val_zero by force + moreover have "mpi_algo_dom (?d, L_pow v ?d (m 0 v), \a. m (Suc a))" + using Suc.IH[OF n_eq \0 < eps\] Suc.prems is_dec_det_pi L_pow_\\<^sub>b_mono_inv by auto + ultimately show ?case + using mpi_algo.domintros + by blast + qed +qed + +abbreviation "mpi_alg_rec d v m \ + (if 2 * l * dist v (\\<^sub>b v) < eps * (1 - l) then (policy_improvement d v, v) + else mpi_algo (policy_improvement d v) (L_pow v (policy_improvement d v) (m 0 v)) + (\n. m (Suc n)))" + +lemma mpi_algo_def': + assumes "d \ D\<^sub>D" "v \ \\<^sub>b v" "eps > 0" + shows "mpi_algo d v m = mpi_alg_rec d v m" + using mpi_algo.psimps termination_mpi_algo assms + by auto + +lemma mpi_algo_eq_mpi: + assumes "d \ D\<^sub>D" "v \ \\<^sub>b v" "eps > 0" + shows "mpi_algo d v m = mpi d v m (LEAST n. 2 * l * dist (mpi_val d v m n) (\\<^sub>b (mpi_val d v m n)) < eps * (1 - l))" +proof - + define n where "n = (LEAST n. 2 * l * dist (mpi_val d v m n) (\\<^sub>b (mpi_val d v m n)) < eps * (1 - l))" (is "n = (LEAST n. ?P d v m n)") + from n_def assms show ?thesis + proof (induction n arbitrary: d v m) + case 0 + have "?P d v m 0" + by (metis (no_types, lifting) assms(3) LeastI_ex 0 mpi_val_term_ex) + thus ?case + using assms 0 + by (auto simp: mpi_val_def mpi_algo_def') + next + case (Suc n) + hence not0: "\ (2 * l * dist v (\\<^sub>b v) < eps * (1 - l))" + using Suc(3) mpi_val_zero + by auto + obtain n' where "2 * l * dist (mpi_val d v m n') (\\<^sub>b (mpi_val d v m n')) < eps * (1 - l)" + using mpi_val_term_ex[OF Suc(3) Suc(4), of _ m] assms by blast + hence "n = (LEAST n. ?P d v m (Suc n))" + using Suc(2) Suc + by (subst (asm) Least_Suc) auto + hence "n = (LEAST n. ?P (policy_improvement d v) (L_pow v (policy_improvement d v) (m 0 v)) (\n. m (Suc n)) n)" + using Suc(3) policy_improvement_is_dec_det mpi_Suc' + by (auto simp: mpi_val_def) + hence "mpi_algo d v m = mpi d v m (Suc n)" + unfolding mpi_algo_def'[OF Suc.prems(2-4)] + using Suc(1) Suc.prems(2-4) is_dec_det_pi mpi_Suc' not0 L_pow_\\<^sub>b_mono_inv by force + thus ?case + using Suc.prems(1) by presburger + qed +qed + +lemma mpi_algo_opt: + assumes "v0 \ \\<^sub>b v0" "eps > 0" "d \ D\<^sub>D" + shows "dist (\\<^sub>b (mk_stationary_det (fst (mpi_algo d v0 m)))) \\<^sub>b_opt < eps" +proof - + let ?P = "\n. 2 * l * dist (mpi_val d v0 m n) (\\<^sub>b (mpi_val d v0 m n)) < eps * (1 - l)" + let ?n = "Least ?P" + have "mpi_algo d v0 m = mpi d v0 m ?n" and "?P ?n" + using mpi_algo_eq_mpi LeastI_ex[OF mpi_val_term_ex] assms by auto + thus ?thesis + using assms + by (auto simp: mpi_pol_opt mpi_pol_def[symmetric]) +qed + +end + + +subsection \Initial Value Estimate @{term v0_mpi}\ +text \We define an initial estimate of the value function for which Modified Policy Iteration + always terminates.\ + +abbreviation "r_min \ (\s' a. r (s', a))" +definition "v0_mpi s = r_min / (1 - l)" + +lift_definition v0_mpi\<^sub>b :: "'s \\<^sub>b real" is "v0_mpi" + by fastforce + +lemma v0_mpi\<^sub>b_le_\\<^sub>b: "v0_mpi\<^sub>b \ \\<^sub>b v0_mpi\<^sub>b" +proof (rule less_eq_bfunI) + fix x + have "r_min \ r (s, a)" for s a + by (fastforce intro: cInf_lower2) + hence "r_min \ (1-l) * r (s, a) + l * r_min" for s a + using disc_lt_one zero_le_disc + by (meson order_less_imp_le order_refl segment_bound_lemma) + hence "r_min / (1 - l) \ ((1-l) * r (s, a) + l * r_min) / (1 - l)" for s a + using order_less_imp_le[OF disc_lt_one] + by (auto intro!: divide_right_mono) + hence "r_min / (1 - l) \ r (s, a) + (l * r_min) / (1 - l)" for s a + using disc_lt_one + by (auto simp: add_divide_distrib) + thus "v0_mpi\<^sub>b x \ \\<^sub>b v0_mpi\<^sub>b x" + unfolding \\<^sub>b_eq_SUP_L\<^sub>a v0_mpi\<^sub>b.rep_eq v0_mpi_def + by (auto simp: A_ne intro: cSUP_upper2[where x = "arb_act (A x)"]) +qed + +subsection \An Instance of Modified Policy Iteration with a Valid Conservative Initial Value Estimate\ +definition "mpi_user eps m = ( + if eps \ 0 then undefined else mpi_algo eps (\x. arb_act (A x)) v0_mpi\<^sub>b m)" + +lemma mpi_user_eq: + assumes "eps > 0" + shows "mpi_user eps = mpi_alg_rec eps (\x. arb_act (A x)) v0_mpi\<^sub>b" + using v0_mpi\<^sub>b_le_\\<^sub>b assms + by (auto simp: mpi_user_def mpi_algo_def' A_ne is_dec_det_def) + +lemma mpi_user_opt: + assumes "eps > 0" + shows "dist (\\<^sub>b (mk_stationary_det (fst (mpi_user eps n)))) \\<^sub>b_opt < eps" + unfolding mpi_user_def using assms + by (auto intro: mpi_algo_opt simp: is_dec_det_def A_ne v0_mpi\<^sub>b_le_\\<^sub>b) + +end + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/Policy_Iteration.thy b/thys/MDP-Algorithms/Policy_Iteration.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Policy_Iteration.thy @@ -0,0 +1,319 @@ +(* Author: Maximilian Schäffeler *) + +theory Policy_Iteration + imports "MDP-Rewards.MDP_reward" + +begin + +section \Policy Iteration\ +text \ +The Policy Iteration algorithms provides another way to find optimal policies under the expected +total reward criterion. +It differs from Value Iteration in that it continuously improves an initial guess for an optimal +decision rule. Its execution can be subdivided into two alternating steps: policy evaluation and +policy improvement. + +Policy evaluation means the calculation of the value of the current decision rule. + +During the improvement phase, we choose the decision rule with the maximum value for L, +while we prefer to keep the old action selection in case of ties. +\ + + +context MDP_att_\ begin +definition "policy_eval d = \\<^sub>b (mk_stationary_det d)" +end + +context MDP_act +begin + +definition "policy_improvement d v s = ( + if is_arg_max (\a. L\<^sub>a a (apply_bfun v) s) (\a. a \ A s) (d s) + then d s + else arb_act (opt_acts v s))" + +definition "policy_step d = policy_improvement d (policy_eval d)" + +(* todo: move check is_dec_det outside the recursion *) +function policy_iteration :: "('s \ 'a) \ ('s \ 'a)" where + "policy_iteration d = ( + let d' = policy_step d in + if d = d' \ \is_dec_det d then d else policy_iteration d')" + by auto + +text \ +The policy iteration algorithm as stated above does require that the supremum in @{const \\<^sub>b} is +always attained. +\ + +text \ +Each policy improvement returns a valid decision rule. +\ +lemma is_dec_det_pi: "is_dec_det (policy_improvement d v)" + unfolding policy_improvement_def is_dec_det_def is_arg_max_def + by (auto simp: some_opt_acts_in_A) + +lemma policy_improvement_is_dec_det: "d \ D\<^sub>D \ policy_improvement d v \ D\<^sub>D" + unfolding policy_improvement_def is_dec_det_def + using some_opt_acts_in_A + by auto + +lemma policy_improvement_improving: + assumes "d \ D\<^sub>D" + shows "\_improving v (mk_dec_det (policy_improvement d v))" +proof - + have "\\<^sub>b v x = L (mk_dec_det (policy_improvement d v)) v x" for x + using is_opt_act_some + by (fastforce simp: thm_6_2_10_a_aux' L_eq_L\<^sub>a_det is_opt_act_def policy_improvement_def + arg_max_SUP[symmetric, of _ _ "(policy_improvement d v x)"] ) + thus ?thesis + using policy_improvement_is_dec_det assms + by (auto simp: \_improving_alt) +qed + +lemma eval_policy_step_L: + assumes "is_dec_det d" + shows "L (mk_dec_det (policy_step d)) (policy_eval d) = \\<^sub>b (policy_eval d)" + unfolding policy_step_def + using assms + by (auto simp: \_improving_imp_\\<^sub>b[OF policy_improvement_improving]) + +text \ The sequence of policies generated by policy iteration has monotonically increasing +discounted reward.\ +lemma policy_eval_mon: + assumes "is_dec_det d" + shows "policy_eval d \ policy_eval (policy_step d)" +proof - + let ?d' = "mk_dec_det (policy_step d)" + let ?dp = "mk_stationary_det d" + let ?P = "\t. l ^ t *\<^sub>R \

\<^sub>1 ?d' ^^ t" + + have "L (mk_dec_det d) (policy_eval d) \ L ?d' (policy_eval d)" + using assms + by (auto simp: L_le_\\<^sub>b eval_policy_step_L) + hence "policy_eval d \ L ?d' (policy_eval d)" + using L_\_fix policy_eval_def + by auto + hence "\\<^sub>b ?dp \ r_dec\<^sub>b ?d' + l *\<^sub>R \

\<^sub>1 ?d' (\\<^sub>b ?dp)" + unfolding policy_eval_def L_def + by auto + hence "(id_blinfun - l *\<^sub>R \

\<^sub>1 ?d') (\\<^sub>b ?dp) \ r_dec\<^sub>b ?d'" + by (simp add: blinfun.diff_left diff_le_eq scaleR_blinfun.rep_eq) + hence "?P ((id_blinfun - l *\<^sub>R \

\<^sub>1 ?d') (\\<^sub>b ?dp)) \ ?P (r_dec\<^sub>b ?d')" + using lemma_6_1_2_b + by auto + hence "\\<^sub>b ?dp \ ?P (r_dec\<^sub>b ?d')" + using inv_norm_le'(2)[OF norm_\

\<^sub>1_l_less] blincomp_scaleR_right suminf_cong + by (metis (mono_tags, lifting)) + thus ?thesis + unfolding policy_eval_def + by (auto simp: \_stationary) +qed + +text \ +If policy iteration terminates, i.e. @{term "d = policy_step d"}, then it does so with optimal value. +\ +lemma policy_step_eq_imp_opt: + assumes "is_dec_det d" "d = policy_step d" + shows "\\<^sub>b (mk_stationary (mk_dec_det d)) = \\<^sub>b_opt" +proof - + have "policy_eval d = \\<^sub>b (policy_eval d)" + unfolding policy_eval_def + using L_\_fix assms eval_policy_step_L[unfolded policy_eval_def] + by fastforce + thus ?thesis + unfolding policy_eval_def + using \_fix_imp_opt + by blast +qed + +end + +text \We prove termination of policy iteration only if both the state and action sets are finite.\ +locale MDP_PI_finite = MDP_act A K r l arb_act + for + A and + K :: "'s ::countable \ 'a ::countable \ 's pmf" and r l arb_act + + assumes fin_states: "finite (UNIV :: 's set)" and fin_actions: "\s. finite (A s)" +begin + +text \If the state and action sets are both finite, + then so is the set of deterministic decision rules @{const "D\<^sub>D"}\ +lemma finite_D\<^sub>D[simp]: "finite D\<^sub>D" +proof - + let ?set = "{d. \x :: 's. (x \ UNIV \ d x \ (\s. A s)) \ (x \ UNIV \ d x = undefined)}" + have "finite (\s. A s)" + using fin_actions fin_states by blast + hence "finite ?set" + using fin_states + by (fastforce intro: finite_set_of_finite_funs) + moreover have "D\<^sub>D \ ?set" + unfolding is_dec_det_def + by auto + ultimately show ?thesis + using finite_subset + by auto +qed + +lemma finite_rel: "finite {(u, v). is_dec_det u \ is_dec_det v \ \\<^sub>b (mk_stationary_det u) > + \\<^sub>b (mk_stationary_det v)}" +proof- + have aux: "finite {(u, v). is_dec_det u \ is_dec_det v}" + by auto + show ?thesis + by (auto intro: finite_subset[OF _ aux]) +qed + +text \ +This auxiliary lemma shows that policy iteration terminates if no improvement to the value of +the policy could be made, as then the policy remains unchanged. +\ +lemma eval_eq_imp_policy_eq: + assumes "policy_eval d = policy_eval (policy_step d)" "is_dec_det d" + shows "d = policy_step d" +proof - + have "policy_eval d s = policy_eval (policy_step d) s" for s + using assms + by auto + have "policy_eval d = L (mk_dec_det d) (policy_eval (policy_step d))" + unfolding policy_eval_def + using L_\_fix + by (auto simp: assms(1)[symmetric, unfolded policy_eval_def]) + hence "policy_eval d = \\<^sub>b (policy_eval d)" + by (metis L_\_fix policy_eval_def assms eval_policy_step_L) + hence "L (mk_dec_det d) (policy_eval d) s = \\<^sub>b (policy_eval d) s" for s + using \policy_eval d = L (mk_dec_det d) (policy_eval (policy_step d))\ assms(1) by auto + hence "is_arg_max (\a. L\<^sub>a a (\\<^sub>b (mk_stationary (mk_dec_det d))) s) (\a. a \ A s) (d s)" for s + unfolding L_eq_L\<^sub>a_det + unfolding policy_eval_def \\<^sub>b.rep_eq \_eq_SUP_det SUP_step_det_eq + using assms(2) is_dec_det_def L\<^sub>a_le + by (auto simp del: \\<^sub>b.rep_eq simp: \\<^sub>b.rep_eq[symmetric] + intro!: SUP_is_arg_max boundedI[of _ "r\<^sub>M + l * norm _"] bounded_imp_bdd_above) + thus ?thesis + unfolding policy_eval_def policy_step_def policy_improvement_def + by auto +qed + +text \ +We are now ready to prove termination in the context of finite state-action spaces. +Intuitively, the algorithm terminates as there are only finitely many decision rules, +and in each recursive call the value of the decision rule increases. +\ +termination policy_iteration +proof (relation "{(u, v). u \ D\<^sub>D \ v \ D\<^sub>D \ \\<^sub>b (mk_stationary_det u) > \\<^sub>b (mk_stationary_det v)}") + show "wf {(u, v). u \ D\<^sub>D \ v \ D\<^sub>D \ \\<^sub>b (mk_stationary_det v) < \\<^sub>b (mk_stationary_det u)}" + using finite_rel + by (auto intro!: finite_acyclic_wf acyclicI_order) +next + fix d x + assume h: "x = policy_step d" "\ (d = x \ \ is_dec_det d)" + have "is_dec_det d \ \\<^sub>b (mk_stationary_det d) \ \\<^sub>b (mk_stationary_det (policy_step d))" + using policy_eval_mon + by (simp add: policy_eval_def) + hence "is_dec_det d \ d \ policy_step d \ + \\<^sub>b (mk_stationary_det d) < \\<^sub>b (mk_stationary_det (policy_step d))" + using eval_eq_imp_policy_eq policy_eval_def + by (force intro!: order.not_eq_order_implies_strict) + thus "(x, d) \ {(u, v). u \ D\<^sub>D \ v \ D\<^sub>D \ \\<^sub>b (mk_stationary_det v) < \\<^sub>b (mk_stationary_det u)}" + using is_dec_det_pi policy_step_def h + by auto +qed + +text \ +The termination proof gives us access to the induction rule/simplification lemmas associated +with the @{const policy_iteration} definition. +Thus we can prove that the algorithm finds an optimal policy. +\ + +lemma is_dec_det_pi': "d \ D\<^sub>D \ is_dec_det (policy_iteration d)" + using is_dec_det_pi + by (induction d rule: policy_iteration.induct) (auto simp: Let_def policy_step_def) + +lemma pi_pi[simp]: "d \ D\<^sub>D \ policy_step (policy_iteration d) = policy_iteration d" + using is_dec_det_pi + by (induction d rule: policy_iteration.induct) (auto simp: policy_step_def Let_def) + +lemma policy_iteration_correct: + "d \ D\<^sub>D \ \\<^sub>b (mk_stationary_det (policy_iteration d)) = \\<^sub>b_opt" + by (induction d rule: policy_iteration.induct) + (fastforce intro!: policy_step_eq_imp_opt is_dec_det_pi' simp del: policy_iteration.simps) +end + +context MDP_finite_type begin +text \ +The following proofs concern code generation, i.e. how to represent @{const \

\<^sub>1} as a matrix. +\ + +sublocale MDP_att_\ + by (auto simp: A_ne finite_is_arg_max MDP_att_\_def MDP_att_\_axioms_def max_L_ex_def + has_arg_max_def MDP_reward_axioms) + +definition "fun_to_matrix f = matrix (\v. (\ j. f (vec_nth v) j))" +definition "Ek_mat d = fun_to_matrix (\v. ((\

\<^sub>1 d) (Bfun v)))" +definition "nu_inv_mat d = fun_to_matrix ((\v. ((id_blinfun - l *\<^sub>R \

\<^sub>1 d) (Bfun v))))" +definition "nu_mat d = fun_to_matrix (\v. ((\i. (l *\<^sub>R \

\<^sub>1 d) ^^ i) (Bfun v)))" + +lemma apply_nu_inv_mat: + "(id_blinfun - l *\<^sub>R \

\<^sub>1 d) v = Bfun (\i. ((nu_inv_mat d) *v (vec_lambda v)) $i)" +proof - + have eq_onpI: "P x \ eq_onp P x x" for P x + by(simp add: eq_onp_def) + + have "Real_Vector_Spaces.linear (\v. vec_lambda (((id_blinfun - l *\<^sub>R \ \<^sub>1 d) (bfun.Bfun (($) v)))))" + by (auto simp del: real_scaleR_def intro: linearI + simp: scaleR_vec_def eq_onpI plus_vec_def vec_lambda_inverse plus_bfun.abs_eq[symmetric] + scaleR_bfun.abs_eq[symmetric] blinfun.scaleR_right blinfun.add_right) + thus ?thesis + unfolding Ek_mat_def fun_to_matrix_def nu_inv_mat_def + by (auto simp: apply_bfun_inverse vec_lambda_inverse) +qed + +lemma bounded_linear_vec_lambda: "bounded_linear (\x. vec_lambda (x :: 's \\<^sub>b real))" +proof (intro bounded_linear_intro) + fix x :: "'s \\<^sub>b real" + have "sqrt (\ i \ UNIV . (apply_bfun x i)\<^sup>2) \ (\ i \ UNIV . $$apply_bfun x i)$$" + using L2_set_le_sum_abs + unfolding L2_set_def + by auto + also have "(\ i \ UNIV . $$apply_bfun x i)$$ \ (card (UNIV :: 's set) * (\xa. \apply_bfun x xa\))" + by (auto intro!: cSup_upper sum_bounded_above) + finally show "norm (vec_lambda (apply_bfun x)) \ norm x * CARD('s)" + unfolding norm_vec_def norm_bfun_def dist_bfun_def L2_set_def + by (auto simp add: mult.commute) +qed (auto simp: plus_vec_def scaleR_vec_def) + + +lemma bounded_linear_vec_lambda_blinfun: + fixes f :: "('s \\<^sub>b real) \\<^sub>L ('s \\<^sub>b real)" + shows "bounded_linear (\v. vec_lambda (apply_bfun (blinfun_apply f (bfun.Bfun (($) v)))))" + using blinfun.bounded_linear_right + by (fastforce intro: bounded_linear_compose[OF bounded_linear_vec_lambda] + bounded_linear_bfun_nth bounded_linear_compose[of f]) + +lemma invertible_nu_inv_max: "invertible (nu_inv_mat d)" + unfolding nu_inv_mat_def fun_to_matrix_def + by (auto simp: matrix_invertible inv_norm_le' vec_lambda_inverse apply_bfun_inverse + bounded_linear.linear[OF bounded_linear_vec_lambda_blinfun] + intro!: exI[of _ "\v. (\ j. (\v. (\i. (l *\<^sub>R \ \<^sub>1 d) ^^ i) (Bfun v)) (vec_nth v) j)"]) + +end + +definition "least_arg_max f P = (LEAST x. is_arg_max f P x)" + +locale MDP_ord = MDP_finite_type A K r l + for A and + K :: "'s :: {finite, wellorder} \ 'a :: {finite, wellorder} \ 's pmf" + and r l +begin + +lemma \_fin_eq_det: "\ v s = (\a \ A s. L\<^sub>a a v s)" + by (simp add: SUP_step_det_eq \_eq_SUP_det) + +lemma \\<^sub>b_fin_eq_det: "\\<^sub>b v s = (\a \ A s. L\<^sub>a a v s)" + by (simp add: SUP_step_det_eq \\<^sub>b.rep_eq \_eq_SUP_det) + +sublocale MDP_PI_finite A K r l "\X. Least (\x. x \ X)" + by unfold_locales (auto intro: LeastI) + +end +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/ROOT b/thys/MDP-Algorithms/ROOT new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/ROOT @@ -0,0 +1,15 @@ +chapter AFP +session "MDP-Algorithms" (AFP) = "MDP-Rewards" + + options [document = pdf, document_output = "output", timeout = 600] + sessions + Gauss_Jordan + directories + code + examples + theories + Algorithms + Code_Mod + Examples + document_files + "root.bib" + "root.tex" diff --git a/thys/MDP-Algorithms/Splitting_Methods.thy b/thys/MDP-Algorithms/Splitting_Methods.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Splitting_Methods.thy @@ -0,0 +1,2046 @@ +(* Author: Maximilian Schäffeler *) + +theory Splitting_Methods + imports + Blinfun_Matrix + Value_Iteration + Policy_Iteration +begin + +section \Value Iteration using Splitting Methods\ + +subsection \Regular Splittings for Matrices and Bounded Linear Functions\ + +definition "is_splitting_mat X Q R \ + X = Q - R \ invertible Q \ 0 \ matrix_inv Q \ 0 \ R" + +definition "is_splitting_blin X Q R \ is_splitting_mat (blinfun_to_matrix X) (blinfun_to_matrix Q) (blinfun_to_matrix R)" + +lemma is_splitting_blin_def': "is_splitting_blin X Q R \ + X = Q - R \ invertible\<^sub>L Q \ nonneg_blinfun (inv\<^sub>L Q) \ nonneg_blinfun R" +proof - + have "blinfun_to_matrix X = blinfun_to_matrix Q - blinfun_to_matrix R \ X = Q - R" + using blinfun_to_matrix_diff matrix_to_blinfun_inv + by metis + thus ?thesis + unfolding is_splitting_blin_def is_splitting_mat_def + using blinfun_to_matrix_inverse[of Q] matrix_to_blinfun_inv + by (fastforce simp: invertible_invertible\<^sub>L_I(1)) +qed + +lemma is_splitting_blinD[dest]: + assumes "is_splitting_blin X Q R" + shows "X = Q - R" "invertible\<^sub>L Q" "nonneg_blinfun (inv\<^sub>L Q)" "nonneg_blinfun R" + using is_splitting_blin_def' assms by auto + +subsection \Splitting Methods for MDPs\ + +locale MDP_QR = MDP_finite_type A K r l + for A :: "'s :: finite \ ('a :: finite) set" + and K :: "('s \ 'a) \ 's pmf" + and r l + + fixes Q :: "('s \ 'a) \ ('s \\<^sub>b real) \\<^sub>L ('s \\<^sub>b real)" + fixes R :: "('s \ 'a) \ ('s \\<^sub>b real) \\<^sub>L ('s \\<^sub>b real)" + assumes is_splitting: "\d. d \ D\<^sub>D \ is_splitting_blin (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d)) (Q d) (R d)" + assumes QR_contraction: "(\d\D\<^sub>D. norm (inv\<^sub>L (Q d) o\<^sub>L R d)) < 1" + assumes arg_max_ex_split: "\d. \s. is_arg_max (\d. inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d v) s) (\d. d \ D\<^sub>D) d" +begin + +lemma inv_Q_mono: "d \ D\<^sub>D \ u \ v \ (inv\<^sub>L (Q d)) u \ (inv\<^sub>L (Q d)) v" + using is_splitting + by (auto intro!: nonneg_blinfun_mono) + +lemma splitting_eq: "d \ D\<^sub>D \ Q d - R d = (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d))" + using is_splitting + by fastforce + +lemma Q_nonneg: "d \ D\<^sub>D \ 0 \ v \ 0 \ inv\<^sub>L (Q d) v" + using is_splitting nonneg_blinfun_nonneg + by auto + +lemma Q_invertible: "d \ D\<^sub>D \ invertible\<^sub>L (Q d)" + using is_splitting + by auto + +lemma R_nonneg: "d \ D\<^sub>D \ 0 \ v \ 0 \ R d v" + using is_splitting_blinD[OF is_splitting] + by (fastforce simp: nonneg_blinfun_nonneg intro: nonneg_blinfun_mono) + +lemma R_mono: "d \ D\<^sub>D \ u \ v \ (R d) u \ (R d) v" + using R_nonneg[of d "v - u"] + by (auto simp: blinfun.bilinear_simps) + +lemma QR_nonneg: "d \ D\<^sub>D \ 0 \ v \ 0 \ (inv\<^sub>L (Q d) o\<^sub>L R d) v" + by (simp add: Q_nonneg R_nonneg) + +lemma QR_mono: "d \ D\<^sub>D \ u \ v \ (inv\<^sub>L (Q d) o\<^sub>L R d) u \ (inv\<^sub>L (Q d) o\<^sub>L R d) v" + using QR_nonneg[of d "v - u"] + by (auto simp: blinfun.bilinear_simps) + +lemma norm_QR_less_one: "d \ D\<^sub>D \ norm (inv\<^sub>L (Q d) o\<^sub>L R d) < 1" + using QR_contraction + by (auto intro!: cSUP_lessD[of "\d. norm (inv\<^sub>L (Q d) o\<^sub>L R d)"]) + +lemma splitting: "d \ D\<^sub>D \ id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d) = Q d - R d" + using is_splitting + by auto + +subsection \Discount Factor @{term "QR_disc"}\ +abbreviation "QR_disc \ (\d \ D\<^sub>D. norm (inv\<^sub>L (Q d) o\<^sub>L R d))" + +lemma QR_le_QR_disc: "d \ D\<^sub>D \ norm (inv\<^sub>L (Q d) o\<^sub>L (R d)) \ QR_disc" + by (auto intro: cSUP_upper) + +lemma a_nonneg: "0 \ QR_disc" + using QR_contraction norm_ge_zero ex_dec_det + by (fastforce intro!: cSUP_upper2) + +subsection \Bellman-Operator\ +abbreviation "L_split d v \ inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d v)" + +definition "\_split v s = (\d \ D\<^sub>D. L_split d v s)" + +lemma \_split_bfun_aux: + assumes "d \ D\<^sub>D" + shows "norm (L_split d v) \ (\d \ D\<^sub>D. norm (inv\<^sub>L (Q d))) * r\<^sub>M + norm v" +proof - + have "norm (L_split d v) \ norm (inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d))) + norm (inv\<^sub>L (Q d) (R d v))" + by (simp add: blinfun.add_right norm_triangle_ineq) + also have "\ \ norm (inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d))) + norm (inv\<^sub>L (Q d) o\<^sub>L R d) * norm v" + by (auto simp: blinfun_apply_blinfun_compose[symmetric] norm_blinfun simp del: blinfun_apply_blinfun_compose) + also have "\ \ norm (inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d))) + norm v" + using norm_QR_less_one assms + by (fastforce intro!: mult_left_le_one_le) + also have "\ \ norm (inv\<^sub>L (Q d)) * r\<^sub>M + norm v" + by (auto intro!: order.trans[OF norm_blinfun] mult_left_mono simp: norm_r_dec_le) + also have "\ \ (\d \ D\<^sub>D. norm (inv\<^sub>L (Q d))) * r\<^sub>M + norm v" + by (auto intro!: mult_right_mono cSUP_upper assms simp: r\<^sub>M_nonneg) + finally show ?thesis. +qed + +lift_definition \\<^sub>b_split :: "('s \\<^sub>b real) \ ('s \\<^sub>b real)" is \_split + by fastforce + +lemma \\<^sub>b_split_def': "\\<^sub>b_split v s = (\d\D\<^sub>D. L_split d v s)" + unfolding \\<^sub>b_split.rep_eq \_split_def + by auto + +lemma \\<^sub>b_split_contraction: "dist (\\<^sub>b_split v) (\\<^sub>b_split u) \ QR_disc * dist v u" +proof - + have aux: + "\\<^sub>b_split v s - \\<^sub>b_split u s \ QR_disc * norm (v - u)" if h: "\\<^sub>b_split u s \ \\<^sub>b_split v s" for u v s + proof - + obtain d where d: "is_arg_max (\d. inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d v) s) (\d. d \ D\<^sub>D) d" + using finite_is_arg_max[of "D\<^sub>D"] + by auto + have *: "inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d u) s \ \\<^sub>b_split u s" + using d + by (auto simp: \\<^sub>b_split_def' is_arg_max_linorder intro!: cSUP_upper2) + have "inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d v) s = \\<^sub>b_split v s" + by (auto simp: \\<^sub>b_split_def' arg_max_SUP[OF d]) + hence "\\<^sub>b_split v s - \\<^sub>b_split u s = inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d v) s - \\<^sub>b_split u s" + by auto + also have "\ \ (inv\<^sub>L (Q d) o\<^sub>L R d) (v - u) s" + using * + by (auto simp: blinfun.bilinear_simps) + also have "\ \ norm ((inv\<^sub>L (Q d) o\<^sub>L R d)) * norm (v - u)" + by (fastforce intro: order.trans[OF le_norm_bfun norm_blinfun]) + also have "\ \ QR_disc * norm (v - u)" + using QR_contraction d + by (auto simp: is_arg_max_linorder intro!: mult_right_mono cSUP_upper2) + finally show ?thesis. + qed + have "$$\\<^sub>b_split v - \\<^sub>b_split u) s\ \ QR_disc * dist v u" for s + using aux + by (cases "\\<^sub>b_split v s \ \\<^sub>b_split u s") (fastforce simp: dist_norm norm_minus_commute)+ + thus ?thesis + by (auto intro!: cSUP_least simp: dist_bfun.rep_eq dist_real_def) +qed + +lemma \\<^sub>b_lim: + "\!v. \\<^sub>b_split v = v" + "(\n. (\\<^sub>b_split ^^ n) v) \ (THE v. \\<^sub>b_split v = v)" + using banach'[of \\<^sub>b_split] a_nonneg QR_contraction \\<^sub>b_split_contraction + unfolding is_contraction_def + by auto + +lemma \\<^sub>b_split_tendsto_opt: "(\n. (\\<^sub>b_split ^^ n) v) \ \\<^sub>b_opt" +proof - + obtain L where l_fix: "\\<^sub>b_split L = L" + using \\<^sub>b_lim(1) + by auto + have "\\<^sub>b (mk_stationary_det d) \ L" if d: "d \ D\<^sub>D" for d + proof - + let ?QR = "inv\<^sub>L (Q d) o\<^sub>L R d" + have "inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d L) \ \\<^sub>b_split L" + using d l_fix + by (fastforce simp: \\<^sub>b_split_def' intro!: cSUP_upper2) + hence "inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d L) \ L" + using l_fix by auto + hence aux: "inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d)) \ (id_blinfun - ?QR) L" + using that + by (auto simp: blinfun.bilinear_simps le_diff_eq) + have inv_eq: "inv\<^sub>L (id_blinfun - ?QR) = (\i. ?QR ^^ i)" + using QR_contraction d norm_QR_less_one + by (auto intro!: inv\<^sub>L_inf_sum) + have summable_QR:"summable (\i. norm (?QR ^^ i))" + using QR_contraction d + by (fastforce simp: a_nonneg + intro: summable_comparison_test'[where g = "\i. QR_disc^i"] + intro!: cSUP_upper2 power_mono order.trans[OF norm_blinfunpow_le]) + have "summable (\i. (?QR ^^ i) v s)" for v s + by (rule summable_comparison_test'[where g = "\i. norm (?QR ^^ i) * norm v"]) + (auto intro!: summable_QR summable_norm_cancel order.trans[OF abs_le_norm_bfun] order.trans[OF norm_blinfun] summable_mult2) + moreover have "0 \ v \ 0 \ (\i v \ 0 \ (\i. ((?QR ^^ i) v s)) " for v s + by (auto intro!: summable_LIMSEQ LIMSEQ_le) + hence "0 \ v \ 0 \ (\i. ((?QR ^^ i))) v s" for v s + using bounded_linear_apply_bfun summable_QR summable_comparison_test' + by (subst bounded_linear.suminf[where f = "(\i. apply_bfun (blinfun_apply i v) s)"]) + (fastforce intro: bounded_linear_compose[of "\s. apply_bfun s _"])+ + hence "0 \ v \ 0 \ inv\<^sub>L (id_blinfun - ?QR) v" for v + by (simp add: inv_eq less_eq_bfun_def) + hence "(inv\<^sub>L (id_blinfun - ?QR)) ((inv\<^sub>L (Q d)) (r_dec\<^sub>b (mk_dec_det d))) + \ (inv\<^sub>L (id_blinfun - ?QR)) ((id_blinfun - ?QR) L)" + by (metis aux blinfun.diff_right diff_ge_0_iff_ge) + hence "(inv\<^sub>L (id_blinfun - ?QR) o\<^sub>L inv\<^sub>L (Q d)) (r_dec\<^sub>b (mk_dec_det d)) \ L" + using invertible\<^sub>L_inf_sum[OF norm_QR_less_one[OF that]] + by auto + hence "(inv\<^sub>L (Q d o\<^sub>L (id_blinfun - ?QR))) (r_dec\<^sub>b (mk_dec_det d)) \ L" + using d norm_QR_less_one + by (auto simp: inv\<^sub>L_compose[OF Q_invertible invertible\<^sub>L_inf_sum]) + hence "(inv\<^sub>L (Q d - R d)) (r_dec\<^sub>b (mk_dec_det d)) \ L" + using Q_invertible d + by (auto simp: blinfun_compose_diff_right blinfun_compose_assoc[symmetric]) + thus "\\<^sub>b (mk_stationary_det d) \ L" + by (auto simp: \_stationary splitting[OF that, symmetric] inv\<^sub>L_inf_sum blincomp_scaleR_right) + qed + hence opt_le: "\\<^sub>b_opt \ L" + using thm_6_2_10 finite by auto + + obtain d where d: "is_arg_max (\d. inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d L) s) (\d. d \ D\<^sub>D) d" for s + using arg_max_ex_split by blast + hence "d \ D\<^sub>D" + unfolding is_arg_max_linorder + by auto + have "L = inv\<^sub>L (Q d) (r_dec\<^sub>b (mk_dec_det d) + R d L)" + by (subst l_fix[symmetric]) (fastforce simp: \\<^sub>b_split_def' arg_max_SUP[OF d]) + hence "Q d L = r_dec\<^sub>b (mk_dec_det d) + R d L" + by (metis Q_invertible \d \ D\<^sub>D\ inv_app2') + hence "(id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d)) L = r_dec\<^sub>b (mk_dec_det d)" + using splitting[OF \d \ D\<^sub>D\] + by (simp add: blinfun.diff_left) + hence "L = inv\<^sub>L ((id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d))) (r_dec\<^sub>b (mk_dec_det d))" + using invertible\<^sub>L_inf_sum[OF norm_\ \<^sub>1_l_less] inv_app1' + by metis + hence "L = \\<^sub>b (mk_stationary_det d)" + by (auto simp: inv\<^sub>L_inf_sum \_stationary blincomp_scaleR_right) + hence "\\<^sub>b_opt = L" + using opt_le \d \ D\<^sub>D\ is_markovian_def + by (auto intro: order.antisym[OF _ \\<^sub>b_le_opt]) + thus ?thesis + using \\<^sub>b_lim l_fix the1_equality[OF \\<^sub>b_lim(1)] + by auto +qed + +lemma \\<^sub>b_split_fix[simp]: "\\<^sub>b_split \\<^sub>b_opt = \\<^sub>b_opt" + using \\<^sub>b_lim \\<^sub>b_split_tendsto_opt the_equality limI + by (metis (mono_tags, lifting)) + +lemma dist_\\<^sub>b_split_opt_eps: + assumes "eps > 0" "2 * QR_disc * dist v (\\<^sub>b_split v) < eps * (1-QR_disc)" + shows "dist (\\<^sub>b_split v) \\<^sub>b_opt < eps / 2" +proof - + have "(1 - QR_disc) * dist v \\<^sub>b_opt \ dist v (\\<^sub>b_split v)" + using dist_triangle \\<^sub>b_split_contraction[of v "\\<^sub>b_opt"] + by (fastforce simp: algebra_simps intro: order.trans[OF _ add_left_mono[of "dist (\\<^sub>b_split v) \\<^sub>b_opt"]]) + hence "dist v \\<^sub>b_opt \ dist v (\\<^sub>b_split v) / (1 - QR_disc)" + using QR_contraction + by (simp add: mult.commute pos_le_divide_eq) + hence "2 * QR_disc * dist v \\<^sub>b_opt \ 2 * QR_disc * (dist v (\\<^sub>b_split v) / (1 - QR_disc))" + using \\<^sub>b_split_contraction assms mult_le_cancel_left_pos[of "2 * QR_disc"] a_nonneg + by (fastforce intro!: mult_left_mono[of _ _ "2 * QR_disc"]) + hence "2 * QR_disc * dist v \\<^sub>b_opt < eps" + using a_nonneg QR_contraction + by (auto simp: assms(2) pos_divide_less_eq intro: order.strict_trans1) + hence "dist v \\<^sub>b_opt * QR_disc < eps / 2" + by argo + thus "dist (\\<^sub>b_split v) \\<^sub>b_opt < eps / 2" + using \\<^sub>b_split_contraction[of v \\<^sub>b_opt] + by (auto simp: algebra_simps) +qed + +lemma L_split_fix: + assumes "d \ D\<^sub>D" + shows "L_split d (\\<^sub>b (mk_stationary_det d)) = \\<^sub>b (mk_stationary_det d)" +proof - + let ?d = "mk_dec_det d" + let ?p = "mk_stationary_det d" + have "(Q d - R d) (\\<^sub>b ?p) = r_dec\<^sub>b ?d" + using L_\_fix[of "mk_dec_det d"] + by (simp add: L_def splitting[OF assms, symmetric] blinfun.bilinear_simps diff_eq_eq) + thus ?thesis + using assms + by (auto simp: blinfun.bilinear_simps diff_eq_eq inv\<^sub>L_cancel_iff[OF Q_invertible]) +qed + +lemma L_split_contraction: + assumes "d \ D\<^sub>D" + shows "dist (L_split d v) (L_split d u) \ QR_disc * dist v u" +proof - + have aux: "L_split d v s - L_split d u s \ QR_disc * dist v u" if lea: "(L_split d u s) \ (L_split d v s)" for v s u + proof - + have "L_split d v s - L_split d u s = (inv\<^sub>L (Q d) o\<^sub>L (R d)) (v - u) s" + by (auto simp: blinfun.bilinear_simps) + also have "\ \ norm ((inv\<^sub>L (Q d) o\<^sub>L (R d)) (v - u))" + by (simp add: le_norm_bfun) + also have "\ \ norm ((inv\<^sub>L (Q d) o\<^sub>L (R d))) * dist v u" + by (auto simp only: dist_norm norm_blinfun) + also have "\ \ QR_disc * dist v u" + using assms QR_le_QR_disc + by (auto intro!: mult_right_mono) + finally show ?thesis + by auto + qed + have "dist (L_split d v s) (L_split d u s) \ QR_disc * dist v u" for v s u + using aux aux[of v _ u] + by (cases "L_split d v s \ L_split d u s") (auto simp: dist_real_def dist_commute) + thus "dist (L_split d v) (L_split d u) \ QR_disc * dist v u" + by (simp add: dist_bound) +qed + +lemma find_policy_QR_error_bound: + assumes "eps > 0" "2 * QR_disc * dist v (\\<^sub>b_split v) < eps * (1-QR_disc)" + assumes am: "\s. is_arg_max (\d. L_split d (\\<^sub>b_split v) s) (\d. d \ D\<^sub>D) d" + shows "dist (\\<^sub>b (mk_stationary_det d)) \\<^sub>b_opt < eps" +proof - + let ?p = "mk_stationary_det d" + have L_eq_\\<^sub>b: "L_split d (\\<^sub>b_split v) = \\<^sub>b_split (\\<^sub>b_split v)" + by (auto simp: \\<^sub>b_split_def' arg_max_SUP[OF am]) + have "dist (\\<^sub>b ?p) (\\<^sub>b_split v) = dist (L_split d (\\<^sub>b ?p)) (\\<^sub>b_split v)" + using am + by (auto simp: is_arg_max_linorder L_split_fix) + also have "\ \ dist (L_split d (\\<^sub>b ?p)) (\\<^sub>b_split (\\<^sub>b_split v)) + dist (\\<^sub>b_split (\\<^sub>b_split v)) (\\<^sub>b_split v)" + by (auto intro: dist_triangle) + also have "\ = dist (L_split d (\\<^sub>b ?p)) (L_split d (\\<^sub>b_split v)) + dist (\\<^sub>b_split (\\<^sub>b_split v)) (\\<^sub>b_split v)" + by (auto simp: L_eq_\\<^sub>b) + also have "\ \ QR_disc * dist (\\<^sub>b ?p) (\\<^sub>b_split v) + QR_disc * dist (\\<^sub>b_split v) v" + using \\<^sub>b_split_contraction L_split_contraction am unfolding is_arg_max_def + by (auto intro!: add_mono) + finally have aux: "dist (\\<^sub>b ?p) (\\<^sub>b_split v) \ QR_disc * dist (\\<^sub>b ?p) (\\<^sub>b_split v) + QR_disc * dist (\\<^sub>b_split v) v" . + hence "dist (\\<^sub>b ?p) (\\<^sub>b_split v) - QR_disc * dist (\\<^sub>b ?p) (\\<^sub>b_split v) \ QR_disc * dist (\\<^sub>b_split v) v" + by auto + hence "dist (\\<^sub>b ?p) (\\<^sub>b_split v) * (1 - QR_disc) \ QR_disc * dist (\\<^sub>b_split v) v" + by argo + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b_split v) * (1-QR_disc) \ 2 * (QR_disc * dist (\\<^sub>b_split v) v)" + using mult_left_mono + by auto + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b_split v) * (1 - QR_disc) \ eps * (1 - QR_disc)" + using assms + by (auto intro!: mult_left_mono simp: dist_commute pos_divide_le_eq) + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b_split v) \ eps" + using QR_contraction mult_right_le_imp_le + by auto + moreover have "2 * dist (\\<^sub>b_split v) \\<^sub>b_opt < eps" + using dist_\\<^sub>b_split_opt_eps assms + by fastforce + ultimately show ?thesis + using dist_triangle[of "\\<^sub>b ?p" \\<^sub>b_opt "\\<^sub>b_split v"] + by auto +qed +end + +context MDP_ord begin +lemma inv_one_sub_Q': + fixes Q :: "'c :: banach \\<^sub>L 'c" + assumes onorm_le: "norm (id_blinfun - Q) < 1" + shows "inv\<^sub>L Q = (\i. (id_blinfun - Q)^^i)" + by (metis inv\<^sub>L_I inv_one_sub_Q assms) + +text \An important theorem: allows to compare the rate of convergence for different splittings\ +lemma norm_splitting_le: + assumes "is_splitting_blin (id_blinfun - l *\<^sub>R \ \<^sub>1 d) Q1 R1" + and "is_splitting_blin (id_blinfun - l *\<^sub>R \ \<^sub>1 d) Q2 R2" + and "(blinfun_to_matrix R2) \ (blinfun_to_matrix R1)" + and "(blinfun_to_matrix R1) \ (blinfun_to_matrix (l *\<^sub>R \ \<^sub>1 d))" + shows "norm (inv\<^sub>L Q2 o\<^sub>L R2) \ norm (inv\<^sub>L Q1 o\<^sub>L R1)" +proof - + let ?R1 = "blinfun_to_matrix R1" + let ?R2 = "blinfun_to_matrix R2" + let ?Q1 = "blinfun_to_matrix Q1" + let ?Q2 = "blinfun_to_matrix Q2" + have + inv_Q: "inv\<^sub>L Q = (\i. (id_blinfun - Q)^^i)" "norm (id_blinfun - Q) < 1" and + splitting_eq: "id_blinfun - Q = l *\<^sub>R \ \<^sub>1 d - R" and + nonneg_Q: "0 \ blinfun_to_matrix (id_blinfun - Q)" + if "(blinfun_to_matrix R) \ (blinfun_to_matrix (l *\<^sub>R \ \<^sub>1 d))" + and "is_splitting_blin (id_blinfun - l *\<^sub>R \ \<^sub>1 d) Q R" for Q R + proof - + let ?R = "blinfun_to_matrix R" + show splitting_eq: "id_blinfun - Q = l *\<^sub>R \ \<^sub>1 d - R" + using that + by (auto simp: eq_diff_eq is_splitting_blin_def') + have R_nonneg: "0 \ ?R" + using that + by blast + show nonneg_Q: "0 \ blinfun_to_matrix (id_blinfun - Q)" + using that + by (auto simp: splitting_eq blinfun_to_matrix_diff) + moreover have "(blinfun_to_matrix (id_blinfun - Q)) \ (blinfun_to_matrix (l *\<^sub>R \ \<^sub>1 d))" + using R_nonneg + by (auto simp: splitting_eq blinfun_to_matrix_diff) + ultimately have "norm (id_blinfun - Q) \ norm (l *\<^sub>R \ \<^sub>1 d)" + using matrix_le_norm_mono by blast + thus "norm (id_blinfun - Q) < 1" + using norm_\ \<^sub>1_l_less + by (simp add: order.strict_trans1) + thus "inv\<^sub>L Q = (\i. (id_blinfun - Q) ^^ i)" + using inv_one_sub_Q' + by auto + qed + + have i1: "inv\<^sub>L Q1 = (\i. (id_blinfun - Q1) ^^ i)" "norm (id_blinfun - Q1) < 1" + and i2: "inv\<^sub>L Q2 = (\i. (id_blinfun - Q2) ^^ i)" "norm (id_blinfun - Q2) < 1" + using assms + by (auto intro: inv_Q[of R2 Q2] inv_Q[of R1 Q1]) + + have Q1_le_Q2: "blinfun_to_matrix (id_blinfun - Q1) \ blinfun_to_matrix (id_blinfun - Q2)" + using assms unfolding is_splitting_blin_def' + by (auto simp: blinfun_to_matrix_diff eq_diff_eq blinfun_to_matrix_add) + + have "blinfun_to_matrix (inv\<^sub>L Q1) = blinfun_to_matrix ((\i. (id_blinfun - Q1) ^^ i))" + using i1 by auto + also have "\ = ((\i. blinfun_to_matrix ((id_blinfun - Q1) ^^ i)))" + using bounded_linear.suminf[OF bounded_linear_blinfun_to_matrix] summable_inv_Q i1(2) + by auto + also have "\ \ (\i. blinfun_to_matrix ((id_blinfun - Q2) ^^ i))" + proof - + have le_n: "\n. 0 \ n \ (\i (\in. 0 \ n \ (\i (\in. (\i (\i. blinfun_to_matrix ((id_blinfun - Q1) ^^ i))" + by (auto intro!: bounded_linear.summable[of blinfun_to_matrix] summable_LIMSEQ simp add: bounded_linear_blinfun_to_matrix i1(2) summable_inv_Q) + hence le1: "(\n. (\i (\i. blinfun_to_matrix ((id_blinfun - Q1) ^^ i)) j k" for j k + using tendsto_vec_nth + by metis + have "(\n. (\i (\i. blinfun_to_matrix ((id_blinfun - Q2) ^^ i))" + by (auto intro!: bounded_linear.summable[of blinfun_to_matrix] summable_LIMSEQ simp add: bounded_linear_blinfun_to_matrix i2(2) summable_inv_Q) + hence le2: "(\n. (\i (\i. blinfun_to_matrix ((id_blinfun - Q2) ^^ i)) j k" for j k + using tendsto_vec_nth + by metis + have "((\i. blinfun_to_matrix ((id_blinfun - Q1) ^^ i)) j k) \ ((\i. blinfun_to_matrix ((id_blinfun - Q2) ^^ i)) j k)" for j k + by (fastforce intro: Topological_Spaces.lim_mono[OF le_n_elem le1 le2]) + thus ?thesis + by (simp add: less_eq_vec_def) + qed + also have "\ = blinfun_to_matrix (inv\<^sub>L Q2)" + using summable_inv_Q i2(2) i2 + by (auto intro!: bounded_linear.suminf[OF bounded_linear_blinfun_to_matrix, symmetric]) + finally have Q1_le_Q2: "blinfun_to_matrix (inv\<^sub>L Q1) \ blinfun_to_matrix (inv\<^sub>L Q2)" . + + have *: "0 \ blinfun_to_matrix ((inv\<^sub>L Q1) o\<^sub>L R1)" "0 \ blinfun_to_matrix ((inv\<^sub>L Q2) o\<^sub>L R2)" + using assms is_splitting_blin_def' + by (auto simp: blinfun_to_matrix_comp intro: nonneg_matrix_mult) + + have "0 \ (id_blinfun - l *\<^sub>R \ \<^sub>1 d) 1" + using less_imp_le[OF disc_lt_one] + by (auto simp: blinfun.diff_left less_eq_bfun_def blinfun.scaleR_left) + hence "(inv\<^sub>L Q1) ((id_blinfun - l *\<^sub>R \ \<^sub>1 d) 1) \ (inv\<^sub>L Q2) ((id_blinfun - l *\<^sub>R \ \<^sub>1 d) 1)" + by (metis Q1_le_Q2 blinfun.diff_left blinfun_to_matrix_diff diff_ge_0_iff_ge nonneg_blinfun_nonneg) + hence "(inv\<^sub>L Q1) ((Q1 - R1) 1) \ (inv\<^sub>L Q2) ((Q2 - R2) 1)" + by (metis (no_types, opaque_lifting) assms(1) assms(2) is_splitting_blin_def') + hence "(inv\<^sub>L Q1 o\<^sub>L Q1) 1 - (inv\<^sub>L Q1 o\<^sub>L R1) 1 \ (inv\<^sub>L Q2 o\<^sub>L Q2) 1 - (inv\<^sub>L Q2 o\<^sub>L R2) 1" + by (auto simp: blinfun.add_left blinfun.diff_right blinfun.diff_left) + hence "(inv\<^sub>L Q2 o\<^sub>L R2) 1 \ (inv\<^sub>L Q1 o\<^sub>L R1) 1" + using assms + unfolding is_splitting_blin_def' + by auto + moreover have "0 \ (inv\<^sub>L Q2 o\<^sub>L R2) 1" + using * + by (fastforce simp: less_eq_bfunI intro!: nonneg_blinfun_nonneg) + ultimately have "norm ((inv\<^sub>L Q2 o\<^sub>L R2) 1) \ norm ((inv\<^sub>L Q1 o\<^sub>L R1) 1)" + by (auto simp: less_eq_bfun_def norm_bfun_def' intro!: abs_ge_self cSUP_mono intro: order.trans) + thus "norm ((inv\<^sub>L Q2 o\<^sub>L R2)) \ norm ((inv\<^sub>L Q1 o\<^sub>L R1))" + by (auto simp: norm_nonneg_blinfun_one *) +qed + +subsection \Gauss Seidel Splitting\ +subsubsection \Definition of Upper and Lower Triangular Matrices\ +definition "P_dec d \ blinfun_to_matrix (\ \<^sub>1 (mk_dec_det d))" +definition "P_upper d \ (\ i j. if i \ j then P_dec d i j else 0)" +definition "P_lower d \ (\ i j. if j < i then P_dec d i j else 0)" + +definition "\ \<^sub>U d = matrix_to_blinfun (P_upper d)" +definition "\ \<^sub>L d = matrix_to_blinfun (P_lower d)" + +lemma P_dec_elem: "P_dec d i j = pmf (K (i, d i)) j" + unfolding blinfun_to_matrix_def matrix_def \ \<^sub>1.rep_eq K_st_def P_dec_def push_exp.rep_eq vec_lambda_beta + by (subst pmf_expectation_bind[of "{d i}"]) + (auto split: if_splits simp: mk_dec_det_def axis_def vec_lambda_inverse integral_measure_pmf[of "{j}"]) + +lemma nonneg_\ \<^sub>U: "nonneg_blinfun (\ \<^sub>U d)" + unfolding \ \<^sub>U_def Finite_Cartesian_Product.less_eq_vec_def blinfun_to_matrix_inv P_upper_def P_dec_elem + by auto + +lemma nonneg_P_dec: "0 \ P_dec d" + by (simp add: Finite_Cartesian_Product.less_eq_vec_def P_dec_elem) + +lemma nonneg_P_upper: "0 \ P_upper d" + using nonneg_P_dec + by (simp add: Finite_Cartesian_Product.less_eq_vec_def P_upper_def) + +lemma nonneg_P_lower: "0 \ P_lower d" + using nonneg_P_dec + by (simp add: Finite_Cartesian_Product.less_eq_vec_def P_lower_def) + +lemma nonneg_\ \<^sub>L: "nonneg_blinfun (\ \<^sub>L d)" + unfolding \ \<^sub>L_def Finite_Cartesian_Product.less_eq_vec_def blinfun_to_matrix_inv P_lower_def P_dec_elem + by auto + +lemma nonneg_\ \<^sub>1: "nonneg_blinfun (\ \<^sub>1 d)" + unfolding blinfun_to_matrix_def matrix_def + by (auto simp: Finite_Cartesian_Product.less_eq_vec_def axis_def intro!: \ \<^sub>1_pos less_eq_bfunD[of 0, simplified]) + +lemma norm_\ \<^sub>L_le: "norm (\ \<^sub>L d) \ norm (\ \<^sub>1 (mk_dec_det d))" + using nonneg_\ \<^sub>1 + by (fastforce intro!: matrix_le_norm_mono simp: Finite_Cartesian_Product.less_eq_vec_def P_dec_def P_lower_def \ \<^sub>L_def) + +lemma norm_\ \<^sub>L_le_one: "norm (\ \<^sub>L d) \ 1" + using norm_\ \<^sub>L_le norm_\ \<^sub>1 by auto + +lemma norm_\ \<^sub>L_less_one: "norm (l *\<^sub>R \ \<^sub>L d) < 1" + using order.strict_trans1[OF mult_left_le disc_lt_one] zero_le_disc norm_\ \<^sub>L_le_one + by auto + + +lemma \ \<^sub>L_le_\ \<^sub>1: "0 \ v \ \ \<^sub>L d v \ \ \<^sub>1 (mk_dec_det d) v" +proof - + assume "0 \ v" + moreover have "P_lower d \ P_dec d" + using nonneg_P_dec + by (auto simp: P_lower_def less_eq_vec_def) + ultimately show ?thesis + by (metis P_dec_def \ \<^sub>L_def blinfun_apply_mono blinfun_to_matrix_inv nonneg_\ \<^sub>L) +qed + +lemma \ \<^sub>U_le_\ \<^sub>1: "0 \ v \ \ \<^sub>U d v \ \ \<^sub>1 (mk_dec_det d) v" +proof - + assume "0 \ v" + moreover have "P_upper d \ P_dec d" + using nonneg_P_dec + by (auto simp: P_upper_def less_eq_vec_def) + ultimately show ?thesis + by (metis P_dec_def \ \<^sub>U_def blinfun_apply_mono blinfun_to_matrix_inv nonneg_\ \<^sub>U) +qed + +lemma row_P_upper_indep: "d s = d' s \ row s (P_upper d) = row s (P_upper d')" + unfolding row_def P_dec_elem P_upper_def + by auto + +lemma row_P_lower_indep: "d s = d' s \ row s (P_lower d) = row s (P_lower d')" + unfolding row_def P_dec_elem P_lower_def + by auto + +lemma triangular_mat_P_upper: "upper_triangular_mat (P_upper d)" + unfolding upper_triangular_mat_def P_upper_def + by auto + +lemma slt_P_lower: "strict_lower_triangular_mat (P_lower d)" + unfolding strict_lower_triangular_mat_def P_lower_def + by auto + +lemma lt_P_lower: "lower_triangular_mat (P_lower d)" + unfolding lower_triangular_mat_def P_lower_def + by auto + + +subsubsection \Gauss Seidel is a Regular Splitting\ +definition "Q_GS d = id_blinfun - l *\<^sub>R \ \<^sub>L d" +definition "R_GS d = l *\<^sub>R \ \<^sub>U d" + +lemma splitting_gauss: "is_splitting_blin (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d)) (Q_GS d) (R_GS d)" + unfolding is_splitting_blin_def' +proof safe + show "nonneg_blinfun (R_GS d)" + unfolding R_GS_def \ \<^sub>U_def blinfun_to_matrix_scaleR Finite_Cartesian_Product.less_eq_vec_def blinfun_to_matrix_inv + using nonneg_P_upper + by (auto intro!: mult_nonneg_nonneg) +next + have "\ \<^sub>L d + \ \<^sub>U d = \ \<^sub>1 (mk_dec_det d)" for d + proof - + have "\ \<^sub>L d + \ \<^sub>U d = matrix_to_blinfun (\ i j. ((blinfun_to_matrix (\ \<^sub>1 (mk_dec_det d)))) i j)" + unfolding \ \<^sub>L_def \ \<^sub>U_def P_lower_def P_upper_def P_dec_def matrix_to_blinfun_add[symmetric] + by (auto simp: vec_eq_iff intro!: arg_cong[of _ _ matrix_to_blinfun]) + also have "\ = (\ \<^sub>1 (mk_dec_det d))" + by (simp add: matrix_to_blinfun_inv) + finally show "\ \<^sub>L d + \ \<^sub>U d = \ \<^sub>1 (mk_dec_det d)". + qed + thus "id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det d) = Q_GS d - R_GS d" + unfolding Q_GS_def R_GS_def + by (auto simp: algebra_simps scaleR_add_right[symmetric] simp del: scaleR_add_right) +next + have n_le: "norm (l *\<^sub>R \ \<^sub>L d) < 1" + using mult_left_le[OF norm_\ \<^sub>L_le_one[of d] zero_le_disc] order.strict_trans1 + by (auto intro: disc_lt_one) + thus "invertible\<^sub>L (Q_GS d)" + by (simp add: Q_GS_def invertible\<^sub>L_inf_sum) + have "inv\<^sub>L (Q_GS d) = (\i. (l *\<^sub>R \ \<^sub>L d) ^^ i)" + using inv\<^sub>L_inf_sum n_le unfolding Q_GS_def + by blast + hence *: "blinfun_to_matrix (inv\<^sub>L (Q_GS d)) i j = (\k. blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d) ^^ k) i j)" for i j + using summable_inv_Q[of "Q_GS d"] norm_\ \<^sub>L_less_one + unfolding Q_GS_def + by (subst bounded_linear.suminf[symmetric]) + (auto intro!: bounded_linear_compose[OF bounded_linear_vec_nth] bounded_linear_compose[OF bounded_linear_blinfun_to_matrix]) + have "0 \ l^i *\<^sub>R matpow (P_lower d) i" for i + using nonneg_matpow[OF nonneg_P_lower] + by (meson scaleR_nonneg_nonneg zero_le_disc zero_le_power) + have "0 \ (\k. blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d) ^^ k) i j)" for i j + proof (intro suminf_nonneg) + show "summable (\k. blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d) ^^ k) i j)" + using summable_inv_Q[of "Q_GS d"] norm_\ \<^sub>L_less_one + unfolding Q_GS_def + by (fastforce + simp: + blinfun_to_matrix_matpow nonneg_matrix_nonneg blincomp_scaleR_right blinfun_to_matrix_scaleR + intro: + bounded_linear.summable[of _ "\i. (l *\<^sub>R \ \<^sub>L d) ^^ i"] + bounded_linear_compose[OF bounded_linear_vec_nth] + bounded_linear_compose[OF bounded_linear_blinfun_to_matrix]) + show "\n. 0 \ blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d) ^^ n) i j" + using nonneg_matpow[OF nonneg_P_lower] + by (auto simp: \ \<^sub>L_def nonneg_matrix_nonneg blinfun_to_matrix_scaleR matpow_scaleR blinfun_to_matrix_matpow) + qed + thus "nonneg_blinfun (inv\<^sub>L (Q_GS d))" + by (simp add: * Finite_Cartesian_Product.less_eq_vec_def) +qed + +abbreviation "r_det\<^sub>b d \ r_dec\<^sub>b (mk_dec_det d) " +abbreviation "r_vec d \ \ i. r_dec\<^sub>b (mk_dec_det d) i" + +abbreviation "Q_mat d \ blinfun_to_matrix (Q_GS d)" +abbreviation "R_mat d \ blinfun_to_matrix (R_GS d)" + +lemma Q_mat_def: "Q_mat d = mat 1 - l *\<^sub>R P_lower d" + unfolding Q_GS_def + by (simp add: \ \<^sub>L_def blinfun_to_matrix_diff blinfun_to_matrix_id blinfun_to_matrix_scaleR) + +lemma R_mat_def: "R_mat d = l *\<^sub>R P_upper d" + unfolding R_GS_def + by (simp add: \ \<^sub>U_def blinfun_to_matrix_scaleR) + +lemma triangular_mat_R: "upper_triangular_mat (R_mat d)" + using triangular_mat_P_upper + unfolding upper_triangular_mat_def R_mat_def + by auto + +definition "GS_inv d v \ matrix_inv (Q_mat d) *v (r_vec d + R_mat d *v v)" + +text \@{term Q_mat} can be expressed as an infinite sum of @{const P_lower}. + It is therefore lower triangular.\ +lemma inv_Q_mat_suminf: "matrix_inv (Q_mat d) = (\k. (matpow (l *\<^sub>R (P_lower d)) k))" +proof - + have "matrix_inv (Q_mat d) = blinfun_to_matrix (inv\<^sub>L (Q_GS d))" + using blinfun_to_matrix_inverse(2) is_splitting_blin_def' splitting_gauss + by metis + also have "\ = blinfun_to_matrix (\i. (l *\<^sub>R \ \<^sub>L d)^^i)" + using norm_\ \<^sub>L_less_one + by (auto simp: Q_GS_def inv\<^sub>L_inf_sum) + also have "\ = (\k. (blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d)^^k)))" + using summable_inv_Q[of "Q_GS d"] norm_\ \<^sub>L_less_one bounded_linear_blinfun_to_matrix + unfolding Q_GS_def row_def + by (subst bounded_linear.suminf) auto + also have "\ = (\k. (matpow (l *\<^sub>R (P_lower d)) k))" + by (simp add: blinfun_to_matrix_scaleR blinfun_to_matrix_matpow \ \<^sub>L_def blinfun_to_matrix_inv) + finally show ?thesis. +qed + +lemma lt_Q_inv: "lower_triangular_mat (matrix_inv (Q_mat d))" + unfolding inv_Q_mat_suminf + using summable_inv_Q[of "Q_GS d"] norm_\ \<^sub>L_less_one summable_blinfun_to_matrix[of "\i. (l *\<^sub>R \ \<^sub>L d)^^i"] + by (intro lower_triangular_suminf lower_triangular_pow) + (auto simp: lower_triangular_mat_def P_lower_def Q_GS_def blinfun_to_matrix_scaleR blinfun_to_matrix_matpow \ \<^sub>L_def) + +text \Each row of the matrix @{term "Q_mat d"} only depends on @{term d}'s actions in lower states.\ + +lemma inv_Q_mat_indep: + assumes "\i. i \ s \ d i = d' i" "i \ s" + shows "row i (matrix_inv (Q_mat d)) = row i (matrix_inv (Q_mat d'))" +proof - + have "row i (matrix_inv (Q_mat d)) = row i (blinfun_to_matrix (inv\<^sub>L (Q_GS d)))" + using blinfun_to_matrix_inverse(2) is_splitting_blin_def' splitting_gauss + by metis + also have "\ = row i (blinfun_to_matrix (\i. (l *\<^sub>R \ \<^sub>L d)^^i))" + using norm_\ \<^sub>L_less_one + by (auto simp: Q_GS_def inv\<^sub>L_inf_sum) + also have "\ = (\k. row i (blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d)^^k)))" + using summable_inv_Q[of "Q_GS d"] norm_\ \<^sub>L_less_one + unfolding Q_GS_def row_def + by (subst bounded_linear.suminf[OF bounded_linear_compose[OF _ bounded_linear_blinfun_to_matrix]]) auto + also have "\ = (\k. row i (matpow (l *\<^sub>R (P_lower d)) k))" + by (simp add: blinfun_to_matrix_matpow blinfun_to_matrix_scaleR \ \<^sub>L_def blinfun_to_matrix_inv) + also have "\ = (\k. l^k *\<^sub>R row i (matpow ((P_lower d)) k))" + by (subst matpow_scaleR) (auto simp: row_def scaleR_vec_def) + also have "\ = (\k. l^k *\<^sub>R row i (matpow ((P_lower d')) k))" + using assms + by (subst lower_triangular_pow_eq[of "P_lower d'"]) (auto simp: P_dec_elem lt_P_lower row_P_lower_indep[of d' _ d]) + also have "\ = (\k. row i (matpow (l *\<^sub>R (P_lower d')) k))" + by (subst matpow_scaleR) (auto simp: row_def scaleR_vec_def) + also have "\ = (\k. row i (blinfun_to_matrix ((l *\<^sub>R \ \<^sub>L d')^^k)))" + by (simp add: \ \<^sub>L_def blinfun_to_matrix_inv blinfun_to_matrix_matpow blinfun_to_matrix_scaleR) + also have "\ = row i (blinfun_to_matrix (\i. (l *\<^sub>R \ \<^sub>L d')^^i))" + using summable_inv_Q[of "Q_GS d'"] norm_\ \<^sub>L_less_one + unfolding Q_GS_def row_def + by (auto intro!: bounded_linear.suminf[symmetric] + bounded_linear_compose[OF _ bounded_linear_blinfun_to_matrix]) + also have "\ = row i (blinfun_to_matrix (inv\<^sub>L (Q_GS d')))" + by (metis Q_GS_def inv\<^sub>L_inf_sum norm_\ \<^sub>L_less_one) + also have "\ = row i (matrix_inv (Q_mat d'))" + by (metis blinfun_to_matrix_inverse(2) is_splitting_blin_def' splitting_gauss) + finally show ?thesis. +qed + +text \As a result, also @{term GS_inv} is independent of lower actions.\ +lemma GS_indep_high_states: + assumes "\s'. s' \ s \ d s' = d' s'" + shows "GS_inv d v s = GS_inv d' v s" +proof - + have "row i (P_upper d) = row i (P_upper d')" if "i \ s" for i + using assms that row_P_upper_indep by blast + hence R_eq_upto_s: "row i (R_mat d) = row i (R_mat d')" if "i \ s" for i + using that + by (simp add: row_def R_mat_def) + + have Qr_eq: "(matrix_inv (Q_mat d) *v r_vec d) s = (matrix_inv (Q_mat d') *v r_vec d') s" + proof - + have "(matrix_inv (Q_mat d) *v r_vec d) s = (\j\UNIV. matrix_inv (Q_mat d) s j * r_vec d j)" + unfolding matrix_vector_mult_def + by simp + also have "\ = (\j\UNIV. if s < j then 0 else matrix_inv (Q_mat d) s j * r_vec d j)" + using lt_Q_inv + by (auto intro!: sum.cong simp: lower_triangular_mat_def) + also have "\ = (\j\UNIV. if s < j then 0 else matrix_inv (Q_mat d') s j * r_vec d j)" + using inv_Q_mat_indep assms + by (fastforce intro!: sum.cong simp: row_def) + also have "\ = (matrix_inv (Q_mat d') *v r_vec d') s" + using lt_Q_inv + by (auto simp: matrix_vector_mult_def assms lower_triangular_mat_def intro!: sum.cong) + finally show ?thesis. + qed + + have QR_eq: "row s (matrix_inv (Q_mat d) ** R_mat d) = row s (matrix_inv (Q_mat d') ** R_mat d')" + proof - + have "matrix_inv (Q_mat d) s k * R_mat d k j = matrix_inv (Q_mat d') s k * R_mat d' k j" for k j + proof - + have "matrix_inv (Q_mat d) s k * R_mat d k j = + (if s < k then 0 else matrix_inv (Q_mat d) s k * R_mat d k j)" + using lower_triangular_mat_def lt_Q_inv by auto + also have "\ = (if s < k then 0 else matrix_inv (Q_mat d') s k * R_mat d k j)" + by (metis (no_types, lifting) Finite_Cartesian_Product.row_def assms inv_Q_mat_indep order_refl vec_lambda_eta) + also have "\ = (if s < k \ j < k then 0 else (matrix_inv (Q_mat d') s k * R_mat d k j))" + using triangular_mat_R + unfolding upper_triangular_mat_def + by (auto split: if_splits) + also have "\ = (if s < k \ j < k then 0 else (matrix_inv (Q_mat d') s k * R_mat d' k j))" + using R_eq_upto_s + by (auto simp: row_def) + also have "\ = matrix_inv (Q_mat d') s k * R_mat d' k j" + by (metis lower_triangular_mat_def lt_Q_inv mult_not_zero triangular_mat_R upper_triangular_mat_def) + finally show ?thesis. + qed + thus ?thesis + unfolding row_def matrix_matrix_mult_def + by auto + qed + show ?thesis + using QR_eq Qr_eq + by (auto simp add: GS_inv_def vec.add row_def matrix_vector_mul_assoc matrix_vector_mult_code') +qed + +text \This recursive definition mimics the computation of the GS iteration.\ +lemma GS_inv_rec: "GS_inv d v = r_vec d + l *\<^sub>R (P_upper d *v v + P_lower d *v (GS_inv d v))" +proof - + have "Q_mat d *v (GS_inv d v) = r_vec d + R_mat d *v v" + using splitting_gauss[of d] blinfun_to_matrix_inverse(1) + unfolding GS_inv_def matrix_vector_mul_assoc is_splitting_blin_def' + by (subst matrix_inv(2)) auto + thus ?thesis + unfolding Q_mat_def R_mat_def + by (auto simp: algebra_simps scaleR_matrix_vector_assoc) +qed + +lemma is_am_GS_inv_extend: + assumes "\s. s < k \ is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) d" + and "is_arg_max (\a. GS_inv (d (k := a)) v k) (\a. a \ A k) a" + and "s \ k" + and "d \ D\<^sub>D" + shows "is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) (d (k := a))" +proof - + have am_k: "is_arg_max (\d. GS_inv d v k) (\d. d \ D\<^sub>D) (d (k := a))" + proof (rule is_arg_max_linorderI) + fix y + assume "y \ D\<^sub>D" + have "GS_inv y v k = (r_vec y + l *\<^sub>R (P_upper y *v v + P_lower y *v (GS_inv y v))) k" + using GS_inv_rec by auto + also have "\ = r (k, y k) + l * ((P_upper y *v v) k + (P_lower y *v GS_inv y v) k)" + by auto + also have "\ \ r (k, (d(k := y k)) k) + l * ((P_upper (d(k := y k)) *v v) k + (P_lower (d(k := y k)) *v GS_inv (d(k := y k)) v) k)" + proof (rule add_mono, goal_cases) + case 2 + thus ?case + proof (intro mult_left_mono add_mono, goal_cases) + case 1 + thus ?case + by (auto simp: matrix_vector_mult_def P_dec_elem fun_upd_same P_upper_def cong: if_cong) + next + case 2 + thus ?case + proof - + have "(P_lower y *v GS_inv y v) k = (P_lower (d(k := y k)) *v GS_inv y v) k" + unfolding matrix_vector_mult_def + by (auto simp: P_dec_elem fun_upd_same P_lower_def cong: if_cong) + also have "\ = (\j\UNIV. (if j < k then pmf (K (k, y k)) j * GS_inv y v j else 0))" + by (auto simp: matrix_vector_mult_def P_dec_elem P_lower_def intro!: sum.cong) + also have "\ \ (\j\UNIV. (if j < k then pmf (K (k, y k)) j * GS_inv d v j else 0))" + using assms \y\D\<^sub>D\ + by (fastforce intro!: sum_mono mult_left_mono dest: is_arg_max_linorderD) + also have "\ = (\j\UNIV. (if j < k then pmf (K (k, y k)) j * GS_inv (d(k := y k)) v j else 0))" + using GS_indep_high_states[of _ "d(k := y k)" d, symmetric] + by (fastforce intro!: sum.cong dest: leD) + also have "\ = (P_lower (d(k := y k)) *v GS_inv (d(k := y k)) v) k" + unfolding matrix_vector_mult_def P_lower_def P_dec_elem + by (fastforce intro!: sum.cong) + finally show ?thesis. + qed + qed auto + qed auto + also have "\ = (r_vec (d(k := y k)) + l *\<^sub>R ((P_upper (d(k := y k)) *v v) + (P_lower (d(k := y k)) *v GS_inv (d(k := y k)) v))) k" + by auto + also have "\ = GS_inv (d(k := y k)) v k" + using GS_inv_rec by presburger + also have "\ \ GS_inv (d(k := a)) v k" + using is_arg_max_linorderD(2)[OF assms(2)] \y \ D\<^sub>D\ is_dec_det_def + by blast + finally show "GS_inv y v k \ GS_inv (d(k := a)) v k". + next + show "d(k := a) \ D\<^sub>D" + using assms + by (auto simp: is_dec_det_def is_arg_max_linorder) + qed + show ?thesis + proof (cases "s < k") + case True + thus ?thesis + using am_k assms(1)[OF True] GS_indep_high_states[of s "d (k := a)" d] + by (fastforce dest: is_arg_max_linorderD intro!: is_arg_max_linorderI) + next + case False + thus ?thesis + using assms am_k + by auto + qed +qed + + +lemma is_arg_max_GS_le: + "\d. \s\k. is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) d" +proof (induction k rule: less_induct) + case (less x) + show ?case + proof (cases "\y. y < x") + case True + define y where "y = Max {y. y < x}" + have "y < x" + using Max_in + by (simp add: True y_def) + obtain d_opt where d_opt: "is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) d_opt" if "s \ y" for s + using \y < x\ less by blast + + define d_act where d_act: "d_act a = d_opt(x := a)" for a + have le_y: "a < x \ a \ y" for a + by (simp add: y_def) + have 1: "GS_inv d v = r_vec d + l *\<^sub>R (P_upper d *v v + P_lower d *v (GS_inv d v))" for d + proof - + have "Q_mat d *v (GS_inv d v) = (R_mat d *v v + r_vec d)" + unfolding GS_inv_def + using splitting_gauss[unfolded is_splitting_blin_def'] + by (auto simp: matrix_vector_mul_assoc matrix_inv_right[OF blinfun_to_matrix_inverse(1)]) + thus ?thesis + unfolding Q_mat_def R_mat_def + by (auto simp: scaleR_matrix_vector_assoc algebra_simps) + qed + have "(\d \ D\<^sub>D. GS_inv d v x) = (\d \ D\<^sub>D. (r_vec d + l *\<^sub>R (P_upper d *v v + P_lower d *v (GS_inv d v))) x)" + using 1 by auto + also have "\ = (\a \ A x. (r_vec (d_act a) + l *\<^sub>R (P_upper (d_act a) *v v + P_lower (d_act a) *v (GS_inv (d_act a) v))) x)" + proof (rule antisym, rule cSUP_mono, goal_cases) + case (3 n) + moreover have "(P_upper n *v v) x \ (P_upper (d_opt(x := n x)) *v v) x" + unfolding P_upper_def matrix_vector_mult_def + by (auto simp: P_dec_elem cong: if_cong) + moreover + { + have "\j. j < x \ GS_inv n v j \ GS_inv (d_opt(x := n x)) v j" + using d_opt[OF le_y] 3 + by (subst GS_indep_high_states[of _ "d_opt(x := n x)" d_opt]) (auto simp: is_arg_max_linorder) + hence "(P_lower n *v GS_inv n v) x \ (P_lower (d_opt(x := n x)) *v GS_inv (d_opt(x := n x)) v) x" + unfolding matrix_vector_mult_def P_lower_def P_dec_elem + by (fastforce intro!: mult_left_mono sum_mono) + } + ultimately show ?case + unfolding d_act + by (auto intro!: bexI[of _ "n x"] mult_left_mono add_mono simp: is_dec_det_def) + next + case 4 + then show ?case + proof (rule cSUP_mono, goal_cases) + case (3 n) + then show ?case + using d_opt + by (fastforce simp: d_act is_dec_det_def is_arg_max_linorder intro!: bexI[of _ "d_act n"]) + qed (auto simp: A_ne) + qed auto + also have "\ = (\a \ A x. GS_inv (d_act a) v x)" + using 1 by auto + finally have *: "(\d \ D\<^sub>D. GS_inv d v x) = (\a \ A x. GS_inv (d_act a) v x)". + then obtain a_opt where a_opt: "is_arg_max (\a. GS_inv (d_act a) v x) (\a. a \ A x) a_opt" + by (metis A_ne finite finite_is_arg_max) + hence "(\d \ D\<^sub>D. GS_inv d v x) = GS_inv (d_act a_opt) v x" + by (metis * arg_max_SUP) + hence am_a_opt: "is_arg_max (\d. GS_inv d v x) (\d. d \ D\<^sub>D) (d_act a_opt)" + using a_opt d_opt d_act unfolding is_dec_det_def + by (fastforce dest: is_arg_max_linorderD(1) intro!: SUP_is_arg_max) + hence "is_arg_max (\d. GS_inv d v x') (\d. d \ D\<^sub>D) (d_act a_opt)" if "x' < x" for x' + proof - + have "s' \ x' \ d_act a_opt s' = d_opt s'" for s' + using d_act that is_arg_max_linorderD[OF d_opt[OF le_y[OF that]]] + by auto + thus ?thesis + using am_a_opt is_arg_max_linorderD[OF d_opt[OF le_y[OF that]]] + by (auto simp: GS_indep_high_states[of _ "d_act a_opt" d_opt]) + qed + thus ?thesis + by (metis am_a_opt antisym_conv1) + next + case False + thus ?thesis + using finite_is_arg_max[OF finite_D\<^sub>D] + by (fastforce simp: arg_max_def someI_ex dest!: le_neq_trans) + qed +qed + +lemma ex_is_arg_max_GS: + "\d. \s. is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) d" + using is_arg_max_GS_le[of "Max UNIV"] + by auto + +function GS_rec_fun where + "GS_rec_fun v s = (\a \ A s. r (s, a) + l * ( + (\s' < s. pmf (K (s,a)) s' * (GS_rec_fun v s')) + + (\s' \ {s'. s \ s'}. pmf (K (s,a)) s' * v s')))" + by auto +termination +proof (relation "{(x,y). snd x < snd y}", rule wfI_min, goal_cases) + case (1 x Q) + assume " x \ Q" + hence *: "{u. \a. (a, u) \ Q} \ {}" + by (metis (mono_tags, lifting) \x\Q\ prod.collapse Collect_empty_eq) + hence "\a x. (a,x)\Q \ x = Min (snd  Q)" + by (auto simp: image_iff) (metis (mono_tags, lifting) equals0D Min_in[OF finite] prod.collapse image_iff) + then obtain x where "x \ Q" "snd x = Min {snd x| x. x \ Q}" + by (metis Setcompr_eq_image snd_conv) + thus ?case + using * + by (intro bexI[of _ x]) auto +qed auto + +declare GS_rec_fun.simps[simp del] + +definition "GS_rec_elem v s a = r (s, a) + l * ( + (\s' < s. pmf (K (s,a)) s' * (GS_rec_fun v s')) + + (\s' \ {s'. s \ s'}. pmf (K (s,a)) s' * v s'))" + +lemma GS_rec_fun_elem: "GS_rec_fun v s = (\a \ A s. GS_rec_elem v s a)" + unfolding GS_rec_elem_def + using GS_rec_fun.simps + by blast + +definition "GS_rec v = (\ s. GS_rec_fun (vec_nth v) s)" + +lemma GS_rec_def': "GS_rec v s = (\a \ A s. r (s, a) + l * ( + (\s' < s. pmf (K (s,a)) s' * (GS_rec v s')) + + (\s' \ {s'. s \ s'}. pmf (K (s,a)) s' * v s')))" + unfolding GS_rec_def + by (auto simp: GS_rec_fun.simps[of _ s]) + +lemma GS_rec_eq: "GS_rec v s = (\a \ A s. r (s, a) + l * ( + (P_lower (d(s := a)) *v (GS_rec v)) s + (P_upper (d(s := a)) *v v) s))" + unfolding GS_rec_def'[of v s] P_lower_def P_upper_def P_dec_elem matrix_vector_mult_def + by (auto simp: if_distrib[where f = "\x. x * _ _"] sum.If_cases lessThan_def) +definition "GS_rec_step d v \ r_vec d + l *\<^sub>R (P_lower d *v GS_rec v + P_upper d *v v)" + +lemma GS_rec_eq': "GS_rec v s = (\a \ A s. GS_rec_step (d(s:= a)) v s)" + using GS_rec_eq GS_rec_step_def by auto + +lemma GS_rec_eq_vec: + "GS_rec v s = (\d\D\<^sub>D. GS_rec_step d v s)" +proof - + obtain d where d: "is_arg_max (\d. GS_rec_step d v s) (\d. d \D\<^sub>D) d" + using finite_is_arg_max[OF finite, of "D\<^sub>D" ] ex_dec_det by blast + have "GS_rec v s = GS_rec_step d v s" + unfolding GS_rec_eq'[of _ _ d] + proof (intro antisym cSUP_least) + show "\x. x \ A s \ GS_rec_step (d(s := x)) v s \ GS_rec_step d v s" + using A_ne d + by (intro is_arg_max_linorderD[OF d]) (auto simp: is_dec_det_def is_arg_max_linorder) + show "GS_rec_step d v s \ (\a\A s. GS_rec_step (d(s := a)) v s)" + using d unfolding is_arg_max_linorder is_dec_det_def fun_upd_triv + by (auto intro!: cSUP_upper2[of _ _ "d s"]) + qed (auto simp: A_ne) + thus ?thesis + using d + by (subst arg_max_SUP[symmetric]) auto +qed + + +lift_definition GS_rec_fun\<^sub>b :: "('s \\<^sub>b real) \ ('s \\<^sub>b real)" is GS_rec_fun + by auto + +definition "GS_rec_fun_inner (v :: 's \\<^sub>b real) s a \ r (s, a) + l * ( + (\s' < s. pmf (K (s,a)) s' * (GS_rec_fun\<^sub>b v s')) + + (\s' \ {s'. s \ s'}. pmf (K (s,a)) s' * v s'))" + +definition GS_rec_iter where + "GS_rec_iter v s = (\a \ A s. r (s, a) + l * + (\s' \ UNIV. pmf (K (s,a)) s' * v s'))" + +lemma GS_rec_fun_eq_GS_iter: + assumes "\s' < s. v_next s' = GS_rec_fun v s'" "\s' \ {s'. s \ s'}. v_next s' = v s'" + shows "GS_rec_fun v s = GS_rec_iter v_next s" +proof - + have "{s'. s' < s} \ {s'. s \ s'} = UNIV" + by auto + hence *: "(\s's'\Collect (($$ s). f s') = (\s' \ UNIV. f s')" for f + by (subst sum.union_disjoint[symmetric]) (auto simp add: lessThan_def) + have "GS_rec_fun v s = (\a\A s. r (s, a) + l * ((\s's'\Collect ((\) s). pmf (K (s, a)) s' * v s')))" + using assms + by (subst GS_rec_fun.simps) auto + also have "\ = (\a\A s. r (s, a) + l * ((\s's'\Collect ((\) s). pmf (K (s, a)) s' * v_next s')))" + using assms + by auto + also have "\ = GS_rec_iter v_next s" + by (auto simp: * GS_rec_iter_def) + finally show ?thesis . +qed + +lemma foldl_upd_notin: "x \ set X \ foldl (\f y. f(y := g f y)) c X x = c x" + by (induction X arbitrary: c) auto + +lemma foldl_upd_notin': "x \ set Y \ foldl (\f y. f(y := g f y)) c (X@Y) x = foldl (\f y. f(y := g f y)) c X x" + by (induction X arbitrary: x c Y) (auto simp add: foldl_upd_notin) + +lemma sorted_list_of_set_split: + assumes "finite X" + shows "sorted_list_of_set X = sorted_list_of_set {x \ X. x < y} @ sorted_list_of_set {x \ X. y \ x}" + using assms +proof (induction "card X" arbitrary: X) + case (Suc n X) + have "sorted_list_of_set X = Min X # sorted_list_of_set (X - {Min X})" + using Suc by (auto intro: sorted_list_of_set_nonempty) + also have "\ = Min X # sorted_list_of_set {x \ (X - {Min X}). x < y} @ sorted_list_of_set {x \ (X - {Min X}). y \ x}" + using Suc card.remove[OF Suc(3) Min_in] card.empty + by (fastforce simp: Suc(1))+ + also have "\ = sorted_list_of_set ({x \ X. x < y}) @ sorted_list_of_set {x \ X. y \ x}" + proof (cases "Min X < y") + case True + hence Min_eq: "Min X = Min {x \ X. x < y}" + using True Suc Min_in + by (subst eq_Min_iff) fastforce+ + have "{x \ (X - {Min X}). x < y} = {x \ X. x < y} - {Min {x \ X. x < y}}" + using Min_eq by auto + hence "Min X # sorted_list_of_set {x \ (X - {Min X}). x < y} = + Min {x \ X. x < y} # sorted_list_of_set ({x \ X. x < y} - {Min {x \ X. x < y}})" + using Min_eq by auto + also have "\ = sorted_list_of_set ({x \ X. x < y})" + using Suc True Min_in Min_eq + by (subst sorted_list_of_set_nonempty[symmetric]) fastforce+ + finally have "Min X # sorted_list_of_set {x \ (X - {Min X}). x < y} = sorted_list_of_set ({x \ X. x < y})". + hence "Min X # sorted_list_of_set {x \ (X - {Min X}). x < y} @ sorted_list_of_set {x \ (X - {Min X}). y \ x} = + sorted_list_of_set ({x \ X. x < y}) @ sorted_list_of_set {x \ (X - {Min X}). y \ x}" + by auto + then show ?thesis + using True + by (auto simp: append_Cons[symmetric] simp del: append_Cons dest!: leD intro: arg_cong) + next + case False + have Min_eq: "Min X = Min {x \ X. y \ x}" + using False Suc Min_in + by (subst eq_Min_iff) (fastforce simp: linorder_class.not_less)+ + have 2: "{x \ (X - {Min X}). y \ x} = {x \ X. y \ x} - {Min {x \ X. y \ x}}" + using Min_eq by auto + have "x \ X \ \ x < y" for x + using False Min_less_iff Suc(3) by blast + hence "{x \ X. x < y} = {}" + by auto + hence "Min X # sorted_list_of_set {x \ X - {Min X}. x < y} @ sorted_list_of_set {x \ X - {Min X}. y \ x} = + Min X # sorted_list_of_set {x \ X - {Min X}. y \ x}" + using Suc by auto + also have "\ = Min {x \ X. y \ x} # sorted_list_of_set ({x \ X. y \ x} - {Min {x \ X. y \ x}})" + using Min_eq 2 + by auto + also have "\ = sorted_list_of_set ({x \ X. y \ x})" + using Suc False Min_in Min_eq + by (subst sorted_list_of_set_nonempty[symmetric]) fastforce+ + also have "\ = sorted_list_of_set ({x \ X. x < y})@ sorted_list_of_set ({x \ X. y \ x})" + by (simp add: \{x \ X. x < y} = {}\) + finally show ?thesis. + qed + finally show ?case. +qed auto + +lemma sorted_list_of_set_split': + assumes "finite X" + shows "sorted_list_of_set X = sorted_list_of_set {x \ X. x \ y} @ sorted_list_of_set {x \ X. y < x}" + using sorted_list_of_set_split[of X] +proof (cases "\x \ X. y < x") + case True + hence "{x \ X. x \ y} = {x \ X. x < Min {x \ X. y < x}}" + using assms True by (subst Min_gr_iff) auto + moreover have "{x \ X. y < x} = {x \ X. Min {x \ X. y < x} \ x}" + using assms True + by (subst Min_le_iff) auto + ultimately show ?thesis + using sorted_list_of_set_split[OF assms, of "Min {x \ X. y < x}"] + by auto +next + case False + hence *: "{x \ X. y < x} = {}" "{x \ X. x \ y} = X" + by (auto simp add:linorder_class.not_less) + thus ?thesis + using False + by (auto simp: *) +qed + +lemma GS_rec_fun_code: "GS_rec_fun v s = foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {..s}) s" +proof (induction s rule: less_induct) + case (less s) + have "foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {..s}) s + = foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {x \ {..s}. x < s} @ sorted_list_of_set {x \ {..s}. s \ x}) s" + by (subst sorted_list_of_set_split[of _ s]) auto + also have "\ = foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {.. {..s}. x {..s}. s \ x} = {s}" + by auto + thus ?thesis by auto + qed + also have "\ = GS_rec_iter (foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {.. = GS_rec_fun v s" + proof (intro GS_rec_fun_eq_GS_iter[symmetric], safe, goal_cases) + case (1 s') + assume "s' < s" + hence *: "(Collect ((<) s')) \ {}" + by auto + hence "{x \ {.. {.. x} = {s'<..v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {..v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set {..s'} @ sorted_list_of_set {s'<.. = GS_rec_fun v s'" + using "1" less.IH by (subst foldl_upd_notin') fastforce+ + finally show ?case. + qed (auto intro: foldl_upd_notin) + finally show ?case + by metis +qed + +lemma GS_rec_fun_code': "GS_rec_fun v s = foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set UNIV) s" +proof (cases "s = Max UNIV") + case True + then show ?thesis + by (auto simp: GS_rec_fun_code atMost_def) +next + case False + hence *: "(Collect ((<) s)) \ {}" + by (auto simp: not_le eq_Max_iff[OF finite]) + hence "{x. x < Min (Collect ((<) s))} = {..s}" + by (auto simp: Min_less_iff[OF finite *] intro: leI) + then show ?thesis + unfolding sorted_list_of_set_split[of UNIV "Min{s'. s < s'}", OF finite] GS_rec_fun_code + by (subst foldl_upd_notin'[of s]) auto +qed + +lemma GS_rec_fun_code'': "GS_rec_fun v = foldl (\v s. v(s := GS_rec_iter v s)) v (sorted_list_of_set UNIV)" + using GS_rec_fun_code' by auto + +lemma GS_rec_eq_elem: "GS_rec v$ s = GS_rec_fun (vec_nth v) s" + unfolding GS_rec_def + by auto + + + +lemma GS_rec_step_elem: "GS_rec_step d v $s = r (s, d s) + l * ((\s' < s. pmf (K (s, d s)) s' * GS_rec v$ s') + (\s' \ {s'. s \ s'}. pmf (K (s, d s)) s' * v $s'))" + unfolding GS_rec_step_def P_upper_def P_lower_def lessThan_def P_dec_elem matrix_vector_mult_def + by (auto simp: sum.If_cases algebra_simps if_distrib[of "\x. _$ _ * x"]) + +lemma is_arg_max_GS_rec_step_act: + assumes "d \D\<^sub>D" "is_arg_max (\a. GS_rec_step (d'(s := a)) v $s) (\a. a \A s) a" + shows "is_arg_max (\d. GS_rec_step d v$ s) (\d. d \D\<^sub>D) (d(s := a))" + using assms + unfolding GS_rec_step_elem is_arg_max_linorder is_dec_det_def + by auto + +lemma is_arg_max_GS_rec_step_act': + assumes "d \D\<^sub>D" "is_arg_max (\a. GS_rec_step (d'(s := a)) v $s) (\a. a \A s) (d s)" + shows "is_arg_max (\d. GS_rec_step d v$ s) (\d. d \D\<^sub>D) d" + using is_arg_max_GS_rec_step_act[OF assms] + by fastforce + +lemma + is_arg_max_GS_rec: + assumes "\s. is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + shows "GS_rec v = GS_rec_step d v" + using arg_max_SUP[OF assms] + by (auto simp: vec_eq_iff GS_rec_eq_vec ) + +lemma + is_arg_max_GS_rec': + assumes "is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d" + shows "GS_rec v $s = GS_rec_step d v$ s" + using assms + by (auto simp: GS_rec_eq_vec arg_max_SUP[symmetric]) + +lemma + GS_rec_eq_GS_inv: + assumes "\s. is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + shows "GS_rec v = GS_inv d v" +proof - + have "GS_rec v = GS_rec_step d v" + using is_arg_max_GS_rec[OF assms] + by auto + hence "GS_rec v = r_vec d + R_mat d *v v + (l *\<^sub>R P_lower d) *v GS_rec v" + unfolding R_mat_def GS_rec_step_def + by (auto simp: scaleR_matrix_vector_assoc algebra_simps) + hence "Q_mat d *v GS_rec v = r_vec d + R_mat d *v v" + unfolding Q_mat_def + by (metis (no_types, lifting) add_diff_cancel matrix_vector_mult_diff_rdistrib matrix_vector_mul_lid) + hence "(matrix_inv (Q_mat d) ** Q_mat d) *v GS_rec v = matrix_inv (Q_mat d) *v (r_vec d + R_mat d *v v)" + by (metis matrix_vector_mul_assoc) + thus "GS_rec v = GS_inv d v" + using splitting_gauss + unfolding GS_inv_def is_splitting_blin_def' + by (subst (asm) matrix_inv_left) (fastforce intro: blinfun_to_matrix_inverse(1))+ +qed + + +lemma + GS_rec_step_eq_GS_inv: + assumes "\s. is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d" + shows "GS_rec_step d v = GS_inv d v" + using GS_rec_eq_GS_inv[OF assms] is_arg_max_GS_rec[OF assms] + by auto + +lemma strict_lower_triangular_mat_mult: + assumes "strict_lower_triangular_mat M" "\i. i < j \ v $i = v'$ i" + shows "(M *v v) $j = (M *v v')$ j" +proof - + have "(M *v v) $j = (\i\UNIV. (if j \ i then 0 else M$ j $i * v$ i))" + using assms unfolding strict_lower_triangular_mat_def + by (auto simp: matrix_vector_mult_def intro!: sum.cong) + also have "\ = (\i\UNIV. (if j \ i then 0 else M $j$ i * v' $i))" + using assms + by (auto intro!: sum.cong) + also have "\ = (M *v v')$ j" + using assms unfolding strict_lower_triangular_mat_def + by (auto simp: matrix_vector_mult_def intro!: sum.cong) + finally show ?thesis. +qed + +lemma Q_mat_invertible: "invertible (Q_mat d)" + by (meson blinfun_to_matrix_inverse(1) is_splitting_blin_def' splitting_gauss) + +lemma GS_eq_GS_inv: + assumes "\s. s \ k \ is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + assumes "s \ k" + shows "GS_rec_step d v$ s = GS_inv d v $s" +proof - + have *: "GS_rec v$ s = GS_rec_step d v $s" if "s \ k" for s + using assms is_arg_max_GS_rec' that by presburger + hence "GS_rec v$ s = (r_vec d + R_mat d *v v + (l *\<^sub>R P_lower d) *v GS_rec v) $s" if "s \ k" for s + unfolding R_mat_def GS_rec_step_def using that + by (simp add: scaleR_matrix_vector_assoc pth_6) + hence "(Q_mat d *v GS_rec v)$ s = (r_vec d + R_mat d *v v) $s" if "s \ k" for s + unfolding Q_mat_def using that + by (simp add: matrix_vector_mult_diff_rdistrib) + hence "(matrix_inv (Q_mat d) *v (Q_mat d *v GS_rec v))$ s = (matrix_inv (Q_mat d) *v ((r_vec d + R_mat d *v v))) $s" + using assms lt_Q_inv by (auto intro: lower_triangular_mat_mult) + thus "GS_rec_step d v$ s = GS_inv d v $s" + unfolding GS_inv_def + using matrix_inv_left[OF Q_mat_invertible] assms * + by (auto simp: matrix_vector_mul_assoc) +qed + +lemma is_arg_max_GS_imp_splitting': + assumes "\s. s \ k \ is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d" + assumes "s \ k" + shows "is_arg_max (\d. GS_inv d v $s) (\d. d \ D\<^sub>D) d" + using assms +proof (induction k arbitrary: s rule: less_induct) + case (less x) + have d: "d \ D\<^sub>D" + using assms(1) is_arg_max_linorderD by fast + have "is_arg_max (\a. GS_inv (d(s := a)) v$ s) (\a. a \ A s) (d s)" if "s \ x" for s + proof - + have "is_arg_max (\a. GS_rec_step (d(s := a)) v $s) (\a. a \ A s) (d s)" + using less(2)[OF that] + unfolding is_dec_det_def is_arg_max_linorder + by simp + hence *: "is_arg_max (\a. r (s, a) + l * ((P_lower (d(s := a)) *v GS_rec v)$ s + (P_upper (d(s := a)) *v v) $s)) (\a. a \ A s) (d s)" + unfolding GS_rec_step_def + by auto + have "is_arg_max (\a. r (s, a) + l * ((P_lower (d(s := a)) *v GS_inv (d(s := a)) v)$ s + (P_upper (d(s := a)) *v v) $s)) (\a. a \ A s) (d s)" + proof - + have "((P_lower (d(s := a)) *v GS_rec v)$ s = ((P_lower (d(s := a)) *v GS_rec_step d v) $s))" for a + using is_arg_max_GS_rec' less(2) that + by (auto intro!: lower_triangular_mat_mult[OF lt_P_lower]) + moreover have "((P_lower (d(s := a)) *v GS_rec_step d v)$ s) = (P_lower (d(s := a)) *v GS_inv d v) $s" for a + using less(2) that GS_eq_GS_inv + by (fastforce intro!: lower_triangular_mat_mult[OF lt_P_lower]) + moreover have "(P_lower (d(s := a)) *v GS_inv d v)$ s = (P_lower (d(s := a)) *v GS_inv (d(s := a)) v) $s" for a + using GS_indep_high_states[of _ d "d(s := a)"] + by (fastforce intro!: strict_lower_triangular_mat_mult[OF slt_P_lower] dest!: leD) + ultimately show ?thesis + using * + by auto + qed + hence "is_arg_max (\a. ((r_vec (d(s := a)) + l *\<^sub>R ((P_lower (d(s := a)) *v GS_inv (d(s := a)) v) + (P_upper (d(s := a)) *v v)))$ s)) (\a. a \ A s) (d s)" + by auto + hence **: "is_arg_max (\a. ((r_vec (d(s := a)) + R_mat (d(s := a)) *v v) + ((l *\<^sub>R P_lower (d(s := a))) *v GS_inv (d(s := a)) v) ) $s) (\a. a \ A s) (d s)" + unfolding R_mat_def + by (auto simp: algebra_simps scaleR_matrix_vector_assoc) + show ?thesis + proof- + have "(r_vec d + R_mat d *v v) = Q_mat d *v (GS_inv d v)" for d v + unfolding GS_inv_def matrix_vector_mul_assoc + by (metis (no_types, lifting) blinfun_to_matrix_inverse(1) is_splitting_blin_def' matrix_inv(2) matrix_vector_mul_lid splitting_gauss) + hence "((r_vec d + R_mat d *v v) + ((l *\<^sub>R P_lower d)) *v GS_inv d v) = GS_inv d v" for d + unfolding Q_mat_def + by (auto simp: matrix_vector_mult_diff_rdistrib) + thus ?thesis + using ** + by presburger + qed + qed + thus ?case + using less d + by (fastforce intro!: is_am_GS_inv_extend[of x v d "d x" s, unfolded fun_upd_triv]) +qed + +lemma is_am_GS_rec_step_indep: + assumes "d s = d' s" + assumes "is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d" + shows "GS_rec v $s = GS_rec_step d' v$ s" +proof - + have "GS_rec v $s = GS_rec_step d v$ s" + using is_arg_max_GS_rec' assms(2) by blast + moreover have "GS_rec_step d v $s = GS_rec_step d' v$ s" + using GS_rec_step_elem assms(1) by fastforce + ultimately show ?thesis by auto +qed + +lemma is_am_GS_rec_step_indep': + assumes "d s = d' s" + assumes "is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + shows "GS_rec v$ s = GS_rec_step d' v $s" +proof - + have "GS_rec v$ s = GS_rec_step d v $s" + using is_arg_max_GS_rec' assms(2) by blast + moreover have "GS_rec_step d v$ s = GS_rec_step d' v $s" + using GS_rec_step_elem assms(1) by fastforce + ultimately show ?thesis by auto +qed + +lemma is_arg_max_GS_imp_splitting'': + assumes "\s. s \ k \ is_arg_max (\d. GS_inv d v$ s) (\d. d \ D\<^sub>D) d" + assumes "s \ k" + shows "is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d \ GS_inv d v$ s = GS_rec v $s" + using assms +proof (induction k arbitrary: s rule: less_induct) + case (less x) + have d[simp]: "d \ D\<^sub>D" using assms unfolding is_arg_max_linorder by blast + + have "is_arg_max (\a. GS_rec_step (d(s := a)) v$ s) (\a. a \ A s) (d s)" if "s \ x" for s + proof - + have "is_arg_max (\a. GS_inv (d(s := a)) v $s) (\a. a \ A s) (d s)" + using less(2)[OF that] + unfolding is_dec_det_def is_arg_max_linorder + by auto + hence *: "is_arg_max (\a. (r_vec (d(s := a)) + l *\<^sub>R (P_lower (d(s := a)) *v (GS_inv (d(s := a)) v) + P_upper (d(s := a)) *v v))$ s) (\a. a \ A s) (d s)" + by (subst (asm) GS_inv_rec) (auto simp: add.commute) + + hence *: "is_arg_max (\a. (r_vec (d(s := a)) + l *\<^sub>R (P_lower (d(s := a)) *v (GS_inv d v) + P_upper (d(s := a)) *v v)) $s) (\a. a \ A s) (d s)" + proof - + have "(P_lower (d(s := a)) *v (GS_inv (d(s := a)) v))$ s = (P_lower (d(s := a)) *v (GS_inv d v)) $s" for a + using GS_indep_high_states[of _ "d(s := a)" d v] + by (rule strict_lower_triangular_mat_mult[OF slt_P_lower]) (metis array_rules(4) leD) + thus ?thesis using * by auto + qed + thus "is_arg_max (\a. GS_rec_step (d(s := a)) v$ s) (\a. a \ A s) (d s)" + proof - + have "(P_lower (d(s := a)) *v (GS_inv d v)) $s = (P_lower (d(s := a)) *v (GS_rec v))$ s" for a + using less(1) less(2)that + by (intro strict_lower_triangular_mat_mult[OF slt_P_lower]) force + thus ?thesis + using * + unfolding GS_rec_step_def + by auto + qed + qed + hence *: "\s. s \ x \ is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + using d + by (intro is_arg_max_GS_rec_step_act'[of d d]) auto + moreover have "GS_inv d v$ s = GS_rec v $s" if "s \ x" for s + proof - + have "GS_rec v$ s = GS_rec_step d v $s" + using *[OF that] + by (auto simp: is_arg_max_GS_rec') + thus ?thesis + using * GS_eq_GS_inv that by presburger + qed + ultimately show ?case using less by blast +qed + +lemma is_arg_max_GS_imp_splitting''': + assumes "\s. s \ k \ is_arg_max (\d. GS_inv d v$ s) (\d. d \ D\<^sub>D) d" + assumes "s \ k" + shows "is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + using assms is_arg_max_GS_imp_splitting'' by blast + +lemma is_arg_max_GS_imp_splitting: + assumes "\s. is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d" + shows "is_arg_max (\d. GS_inv d v $k) (\d. d \ D\<^sub>D) d" + using assms is_arg_max_GS_imp_splitting'[of "Max UNIV"] + by (simp add: is_arg_max_linorder) + +lemma is_arg_max_gs_iff: + assumes "d \ D\<^sub>D" + shows " (\s \ k. is_arg_max (\d. GS_inv d v$ s) (\d. d \ D\<^sub>D) d) \ + (\s \ k. is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d)" + using is_arg_max_GS_imp_splitting' is_arg_max_GS_imp_splitting'' + by meson + +lemma GS_opt_indep_high: + assumes "(\s'. s' < s \ is_arg_max (\d. GS_rec_step d v$ s') is_dec_det d)" "s' < s" "a \ A s" + shows "is_arg_max (\d. GS_rec_step d v $s') is_dec_det (d(s := a))" +proof (rule is_arg_max_linorderI) + fix y + assume "is_dec_det y" + hence "GS_rec_step y v$ s' \ r (s', d s') + l * (P_lower d *v GS_rec v) $s' + l * (P_upper d *v v)$ s'" + using is_arg_max_linorderD[OF assms(1)] + by (auto simp: GS_rec_step_def algebra_simps assms(2)) + also have "\ = r (s', (d(s := a)) s') + l * (P_lower (d(s := a)) *v GS_rec v) $s' + l * (P_upper (d(s := a)) *v v)$ s'" + proof - + have "(P_lower d *v GS_rec v) $s' = (P_lower (d(s := a)) *v GS_rec v)$ s'" + using assms + by (fastforce simp: matrix_vector_mult_def P_lower_def P_dec_elem intro!: sum.cong) + moreover have "(P_upper d *v v) $s' = (P_upper (d(s := a)) *v v)$ s'" + using assms + by (fastforce simp: matrix_vector_mult_def P_upper_def P_dec_elem intro!: sum.cong) + ultimately show ?thesis + using assms(2) by force + qed + also have "\ = GS_rec_step (d(s := a)) v $s'" + by (auto simp: GS_rec_step_def algebra_simps) + finally show "GS_rec_step y v$ s' \ GS_rec_step (d(s := a)) v $s'". +next + show "is_dec_det (d(s := a))" + using is_arg_max_linorderD[OF assms(1)[OF assms(2)]] assms(3) is_dec_det_def + by fastforce +qed + +lemma mult_mat_vec_nth: "(X *v x)$ i = scalar_product (row i X) x" + by (simp add: matrix_vector_mult_def row_def scalar_product_def) + +(* +(* duplicate *) +lemma ext_GS_opt_eq: + assumes "(\s'. s' < s \ is_arg_max (\d. GS_rec_step d v $s') (\d. d \ D\<^sub>D) d)" + and "is_arg_max (\a. GS_rec_step (d(s := a)) v$ s) (\a. a \ A s) a" + and "d \ D\<^sub>D" +shows "is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) (d(s := a))" +proof (rule is_arg_max_linorderI) + fix y + assume "y \ D\<^sub>D" + have "GS_rec_step y v$ s = GS_rec_step (d (s := y s)) v $s" + unfolding P_lower_def P_upper_def P_dec_elem + using GS_rec_step_elem by force + also have "\ \ GS_rec_step (d (s := a)) v$ s" + using is_arg_max_linorderD[OF assms(2)] \y \ D\<^sub>D\ is_dec_det_def + by auto + finally show "GS_rec_step y v $s \ GS_rec_step (d(s := a)) v$ s". +next + show "d(s := a) \ D\<^sub>D" + using assms(3) is_arg_max_linorderD[OF assms(2)] is_dec_det_def + by simp +qed +*) + +lemma ext_GS_opt_le: + assumes "(\s'. s' < s \ is_arg_max (\d. GS_rec_step d v $s') (\d. d \ D\<^sub>D) d)" + and "is_arg_max (\a. GS_rec_step (d(s := a)) v$ s) (\a. a \ A s) a" "s' \ s" + and "d \ D\<^sub>D" + shows "is_arg_max (\d. GS_rec_step d v $s') (\d. d \ D\<^sub>D) (d(s := a))" + using assms is_arg_max_GS_rec_step_act is_arg_max_linorderD(1) + by (cases "s = s'") (auto intro!: GS_opt_indep_high) + +lemma ex_GS_opt_le: + shows "\d. (\s' \ s. is_arg_max (\d. GS_rec_step d v$ s') (\d. d \ D\<^sub>D) d)" +proof (induction s rule: less_induct) + case (less x) + show ?case + proof (cases "\y. y < x") + case True + hence "{y. y < x} \ {}" + by auto + have 1: "\y. y \ Max {y. y < x} \ y < x" + using True + by (auto simp: Max_ge_iff[OF finite]) + obtain d where d: "is_arg_max (\d. GS_rec_step d v $s') (\d. d \ D\<^sub>D) d" if "s'< x" for s' + using less[of "Max {y. y < x}"] 1 + by auto + obtain a where a: "is_arg_max (\a. GS_rec_step (d(x := a)) v$ x) (\a. a \ A x) a" + using finite_is_arg_max[OF finite A_ne] + by blast + hence d': "is_arg_max (\d. GS_rec_step d v $s') (\d. d \ D\<^sub>D) (d(x := a))" if "s' < x" for s' + using d GS_opt_indep_high that is_arg_max_linorderD(1)[OF a] + by simp + have d': "is_arg_max (\d. GS_rec_step d v$ s') (\d. d \ D\<^sub>D) (d(x := a))" if "s' \ x" for s' + using that a is_arg_max_linorderD[OF d] True + by (fastforce intro!: ext_GS_opt_le[OF d]) + thus ?thesis + by blast + next + case False + define d where "d y = (SOME a. a \ A y)" for y + obtain a where a: "is_arg_max (\a. GS_rec_step (d(x := a)) v $x) (\a. a \ A x) a" + using finite_is_arg_max[OF finite A_ne] + by blast + have 1: "y \ x \ y = x" for y + using False + by (meson le_neq_trans) + have "is_arg_max (\d. GS_rec_step d v$ x) (\d. d \ D\<^sub>D) (d(x := a))" + using False a SOME_is_dec_det unfolding d_def + by (fastforce intro!: is_arg_max_GS_rec_step_act) + then show ?thesis + using 1 + by blast + qed +qed + +lemma ex_GS_opt: + shows "\d. \s. is_arg_max (\d. GS_rec_step d v $s) (\d. d \ D\<^sub>D) d" + using ex_GS_opt_le[of "Max UNIV"] + by auto + +lemma GS_rec_eq_GS_inv': "GS_rec v$ s = (\d\D\<^sub>D. GS_inv d v $s)" +proof - + obtain d where d: "(\s. is_arg_max (\d. GS_rec_step d v$ s) (\d. d \ D\<^sub>D) d)" + using ex_GS_opt by blast + have "(\d\D\<^sub>D. GS_rec_step d v $s) = GS_rec_step d v$ s" + using d is_arg_max_GS_rec GS_rec_eq_vec + by metis + have "(\d\D\<^sub>D. GS_inv d v $s) = GS_inv d v$ s" + using is_arg_max_GS_imp_splitting[OF d] + by (subst arg_max_SUP[symmetric]) auto + thus ?thesis + using GS_rec_eq_GS_inv d + by presburger +qed + +lemma GS_rec_fun_eq_GS_inv: "GS_rec_fun v s = (\d\D\<^sub>D. GS_inv d (vec_lambda v) $s)" + using GS_rec_eq_GS_inv'[of "vec_lambda v"] + unfolding GS_rec_def + by (auto simp: vec_lambda_inverse) + + +lemma invertible_Q_GS: "invertible\<^sub>L (Q_GS d)" for d + by (simp add: Q_mat_invertible invertible_invertible\<^sub>L_I(1)) + +lemma ex_opt_blinfun: "\d. \s. is_arg_max (\d. ((inv\<^sub>L (Q_GS d)) (r_det\<^sub>b d + (R_GS d) v)) s) is_dec_det d" +proof - + have "GS_inv d (vec_lambda v)$ s = inv\<^sub>L (Q_GS d) (r_det\<^sub>b d + R_GS d v) s" for d s + unfolding GS_inv_def plus_bfun_def + by (simp add: invertible_Q_GS blinfun_to_matrix_mult' blinfun_to_matrix_inverse(2)[symmetric] apply_bfun_inverse) + moreover obtain d where "is_arg_max (\d. GS_inv d (vec_lambda v) $s) is_dec_det d" for s + using ex_GS_opt[of "vec_lambda v"] is_arg_max_GS_imp_splitting + by auto + ultimately show ?thesis + by auto +qed + +lemma GS_inv_blinfun_to_matrix: "((inv\<^sub>L (Q_GS d)) (r_det\<^sub>b d + R_GS d v)) = Bfun (vec_nth (GS_inv d (vec_lambda v)))" + unfolding GS_inv_def plus_bfun_def + by (auto simp: invertible_Q_GS blinfun_to_matrix_inverse(2)[symmetric] blinfun_to_matrix_mult'' apply_bfun_inverse ) + +lemma norm_GS_QR_le_disc: "norm (inv\<^sub>L (Q_GS d) o\<^sub>L R_GS d) \ l" +proof - + have "norm (inv\<^sub>L (Q_GS d) o\<^sub>L R_GS d) \ norm (inv\<^sub>L ((\_. id_blinfun) d) o\<^sub>L (l *\<^sub>R \ \<^sub>1 (mk_dec_det d))) " + proof (rule norm_splitting_le[of "mk_dec_det d"], goal_cases) + case 1 + then show ?case + unfolding is_splitting_blin_def' + by (auto simp: nonneg_id_blinfun blinfun_to_matrix_scaleR nonneg_\ \<^sub>1 scaleR_nonneg_nonneg) + next + case 3 + then show ?case + unfolding R_mat_def P_upper_def Finite_Cartesian_Product.less_eq_vec_def + using nonneg_P_dec + by (auto simp: P_dec_def nonneg_matrix_nonneg blinfun_to_matrix_scaleR) + qed (auto simp: splitting_gauss) + also have "\ = norm ((l *\<^sub>R \ \<^sub>1 (mk_dec_det d)))" + by auto + also have "\ \ l" + by auto + finally show ?thesis. +qed + +sublocale GS: MDP_QR A K r l Q_GS R_GS + rewrites "GS.\\<^sub>b_split = GS_rec_fun\<^sub>b" +proof - + have "(\d\D\<^sub>D. norm (inv\<^sub>L (Q_GS d) o\<^sub>L R_GS d)) < 1" + using norm_GS_QR_le_disc ex_dec_det + by (fastforce intro: le_less_trans[of _ l 1] intro!: cSUP_least) + thus "MDP_QR A K r l Q_GS R_GS" + by unfold_locales (auto simp: splitting_gauss ex_opt_blinfun) + thus "MDP_QR.\\<^sub>b_split A r Q_GS R_GS = GS_rec_fun\<^sub>b" + by (fastforce simp: MDP_QR.\\<^sub>b_split.rep_eq MDP_QR.\_split_def GS_rec_fun\<^sub>b.rep_eq GS_rec_fun_eq_GS_inv GS_inv_blinfun_to_matrix) +qed + +abbreviation "gs_measure \ ($$eps, v). + if v = \\<^sub>b_opt \ l = 0 + then 0 + else nat (ceiling (log (1/l) (dist v \\<^sub>b_opt) - log (1/l) (eps * (1-l) / (8 * l)))))" + +lemma dist_\\<^sub>b_split_lt_dist_opt: "dist v (GS_rec_fun\<^sub>b v) \ 2 * dist v \\<^sub>b_opt" +proof - + have le1: "dist v (GS_rec_fun\<^sub>b v) \ dist v \\<^sub>b_opt + dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt" + by (simp add: dist_triangle dist_commute) + have le2: "dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt \ GS.QR_disc * dist v \\<^sub>b_opt" + using GS.\\<^sub>b_split_contraction GS.\\<^sub>b_split_fix + by (metis (no_types, lifting)) + show ?thesis + using mult_right_mono[of GS.QR_disc 1] GS.QR_contraction + by (fastforce intro!: order.trans[OF le2] order.trans[OF le1]) +qed + +lemma GS_QR_disc_le_disc: "GS.QR_disc \ l" + using norm_GS_QR_le_disc ex_dec_det + by (fastforce intro!: cSUP_least) + +lemma gs_rel_dec: + assumes "l \ 0" "GS_rec_fun\<^sub>b v \ \\<^sub>b_opt" + shows "\log (1 / l) (dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt) - c\ < \log (1 / l) (dist v \\<^sub>b_opt) - c\" +proof - + have "log (1 / l) (dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt) - c \ log (1 / l) (l * dist v \\<^sub>b_opt) - c" + using GS.\\<^sub>b_split_contraction[of _ "\\<^sub>b_opt"] GS.QR_contraction norm_GS_QR_le_disc disc_lt_one GS_QR_disc_le_disc + by (fastforce simp: assms less_le intro!: log_le order.trans[OF GS.\\<^sub>b_split_contraction[of v "\\<^sub>b_opt", simplified]] mult_right_mono) + also have "\ = log (1 / l) l + log (1/l) (dist v \\<^sub>b_opt) - c" + using assms disc_lt_one + by (auto simp: less_le intro!: log_mult) + also have "\ = -(log (1 / l) (1/l)) + (log (1/l) (dist v \\<^sub>b_opt)) - c" + using assms disc_lt_one + by (subst log_inverse[symmetric]) (auto simp: less_le right_inverse_eq) + also have "\ = (log (1/l) (dist v \\<^sub>b_opt)) - 1 - c" + using assms order.strict_implies_not_eq[OF disc_lt_one] + by (auto intro!: log_eq_one neq_le_trans) + finally have "log (1 / l) (dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt) - c \ log (1 / l) (dist v \\<^sub>b_opt) - 1 - c" . + thus ?thesis + by linarith +qed + +function gs_iteration :: "real \ ('s \\<^sub>b real) \ ('s \\<^sub>b real)" where + "gs_iteration eps v = + (if 2 * l * dist v (GS_rec_fun\<^sub>b v) < eps * (1-l) \ eps \ 0 then GS_rec_fun\<^sub>b v else gs_iteration eps (GS_rec_fun\<^sub>b v))" + by auto +termination +proof (relation "Wellfounded.measure gs_measure", (simp; fail), cases "l = 0") + case False + fix eps v + assume h: "\ (2 * l * dist v (GS_rec_fun\<^sub>b v) < eps * (1 - l) \ eps \ 0)" + show "((eps, GS_rec_fun\<^sub>b v), eps, v) \ Wellfounded.measure gs_measure" + proof - + have gt_zero[simp]: "l \ 0" "eps > 0" and dist_ge: "eps * (1 - l) \ dist v (GS_rec_fun\<^sub>b v) * (2 * l)" + using h + by (auto simp: algebra_simps) + have v_not_opt: "v \ \\<^sub>b_opt" + using h + by auto + have "log (1 / l) (eps * (1 - l) / (8 * l)) < log (1 / l) (dist v \\<^sub>b_opt)" + proof (intro log_less) + show "1 < 1 / l" + by (auto intro!: mult_imp_less_div_pos intro: neq_le_trans) + show "0 < eps * (1 - l) / (8 * l)" + by (auto intro!: mult_imp_less_div_pos intro: neq_le_trans) + show "eps * (1 - l) / (8 * l) < dist v \\<^sub>b_opt" + using dist_pos_lt[OF v_not_opt] dist_\\<^sub>b_split_lt_dist_opt[of v] gt_zero zero_le_disc + mult_strict_left_mono[of "dist v (GS_rec_fun\<^sub>b v)" "(4 * dist v \\<^sub>b_opt)" l] + by (intro mult_imp_div_pos_less le_less_trans[OF dist_ge], argo+) + qed + thus ?thesis + using gs_rel_dec h + by auto + qed +qed auto + + +lemma THE_fix_GS_rec_fun\<^sub>b: "(THE v. GS_rec_fun\<^sub>b v = v) = \\<^sub>b_opt" + using GS.\\<^sub>b_lim(1) GS.\\<^sub>b_split_fix + by blast+ + +text \ +The distance between an estimate for the value and the optimal value can be bounded with respect to +the distance between the estimate and the result of applying it to @{const \\<^sub>b} +\ +lemma contraction_\_split_dist: "(1 - l) * dist v \\<^sub>b_opt \ dist v (GS_rec_fun\<^sub>b v)" + using GS_QR_disc_le_disc + by (fastforce + simp: THE_fix_GS_rec_fun\<^sub>b + intro: order.trans[OF _ contraction_dist, of _ l] order.trans[OF GS.\\<^sub>b_split_contraction] mult_right_mono)+ + +lemma dist_\\<^sub>b_split_opt_eps: + assumes "eps > 0" "2 * l * dist v (GS_rec_fun\<^sub>b v) < eps * (1-l)" + shows "dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt < eps / 2" +proof - + have "dist v \\<^sub>b_opt \ dist v (GS_rec_fun\<^sub>b v) / (1 - l)" + using contraction_\_split_dist + by (simp add: mult.commute pos_le_divide_eq) + hence "2 * l * dist v \\<^sub>b_opt \ 2 * l * (dist v (GS_rec_fun\<^sub>b v) / (1 - l))" + using contraction_\_dist assms mult_le_cancel_left_pos[of "2 * l"] + by (fastforce intro!: mult_left_mono[of _ _ "2 * l"]) + hence "2 * l * dist v \\<^sub>b_opt < eps" + by (auto simp: assms(2) pos_divide_less_eq intro: order.strict_trans1) + hence "dist v \\<^sub>b_opt * l < eps / 2" + by argo + hence *: "l * dist v \\<^sub>b_opt < eps / 2" + by (auto simp: algebra_simps) + show "dist (GS_rec_fun\<^sub>b v) \\<^sub>b_opt < eps / 2" + using GS.\\<^sub>b_split_contraction[of v \\<^sub>b_opt] order.trans mult_right_mono[OF GS_QR_disc_le_disc zero_le_dist] + by (fastforce intro!: le_less_trans[OF _ *]) +qed +end + +context MDP_ord +begin + +lemma is_am_GS_inv_extend': + assumes "(\s. s < x \ is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) d)" + assumes "is_arg_max (\d. GS_rec_step d v x) (\d. d \ D\<^sub>D) (d(x := a))" + assumes "s \ x" "d \ D\<^sub>D" + shows "is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) (d(x := a))" +proof - + have a: "a \ A x" using assms(2) unfolding is_arg_max_linorder is_dec_det_def by (auto split: if_splits) + have *: "\y. y < x \ s\Max {y. y < x} \ s < x" for x s :: 's + by (auto simp: linorder_class.Max_ge_iff[OF finite]) + have "(\s. s < x \ is_arg_max (\d. GS_rec_step d v s) (\d. d \ D\<^sub>D) d)" + using is_arg_max_gs_iff[OF assms(4), of "Max {y. y < x}"] assms(1) + by (cases "\y. y < x") (auto simp: *) + hence "(\s. s < x \ is_arg_max (\d. GS_rec_step d v s) (\d. d \ D\<^sub>D) (d(x := a)))" + using GS_opt_indep_high a by auto + hence "(\s. s \ x \ is_arg_max (\d. GS_rec_step d v s) (\d. d \ D\<^sub>D) (d(x := a)))" + using assms(2) antisym_conv1 by blast + thus ?thesis + using is_arg_max_gs_iff[of "d(x := a)" s] assms(4) assms a + by (intro is_arg_max_GS_imp_splitting') auto +qed + +definition "opt_policy_gs' d v s = (LEAST a. is_arg_max (\a. GS_rec_step (d(s := a)) v s) (\a. a \ A s) a)" + +definition "GS_iter a v s = r (s, a) + l * (\s' \ UNIV. pmf (K(s,a)) s' * v s')" + +definition "GS_iter_max v s = (\a \ A s. GS_iter a v s)" + +lemma GS_rec_eq_iter: + assumes "\s. s < k \ v' s = GS_rec v s" "\s. k \ s \ v' s = v s" + shows "GS_rec_step (d(k := a)) v k = GS_iter a v' k" +proof - + have "(P_lower d *v GS_rec v) k = (P_lower d *v v') k" for d + using slt_P_lower assms + by (auto intro!: strict_lower_triangular_mat_mult) + moreover have "(P_upper d *v v) k = (P_upper d *v v') k" for d + unfolding P_upper_def using assms + by (auto simp: matrix_vector_mult_def if_distrib[of "\x. x * _ _"] cong: if_cong) + moreover have "P_lower d + P_upper d = P_dec d" for d + by (auto simp: P_lower_def P_upper_def Finite_Cartesian_Product.vec_eq_iff) + ultimately show ?thesis + unfolding vector_add_component[symmetric] matrix_vector_mult_diff_rdistrib[symmetric] GS_rec_step_def + matrix_vector_mult_def P_dec_elem P_lower_def P_upper_def GS_iter_def + by (fastforce simp: sum.distrib[symmetric] intro!: sum.cong) +qed + +lemma GS_rec_eq_iter_max: + assumes "\s. s < k \ v' s = GS_rec v s" "\s. k \ s \ v' s = v s" + shows "GS_rec v k = GS_iter_max v' k" + using GS_rec_eq_iter[OF assms] GS_rec_eq'[of _ _ undefined] GS_iter_max_def + by auto + +definition "GS_iter_arg_max v s = (LEAST a. is_arg_max (\a. GS_iter a v s) (\a. a \ A s) a)" + +definition "GS_rec_am_code v d s = foldl (\vd s. vd(s := (GS_iter_max (\ s. fst (vd s)) s, GS_iter_arg_max (\ s. fst (vd s)) s))) (\s. (v s, d s)) (sorted_list_of_set {..s}) s" +definition "GS_rec_am_code' v d s = foldl (\vd s. vd(s := (GS_iter_max (\ s. fst (vd s)) s, GS_iter_arg_max (\ s. fst (vd s)) s))) (\s. (v s, d s)) (sorted_list_of_set UNIV) s" + +lemma GS_rec_am_code': "GS_rec_am_code = GS_rec_am_code'" +proof - + have *: "sorted_list_of_set UNIV = sorted_list_of_set{..s} @ sorted_list_of_set{s<..}" for s :: 's + using sorted_list_of_set_split'[OF finite, of UNIV s] + by (auto simp: atMost_def greaterThan_def) + have "GS_rec_am_code v d s = GS_rec_am_code' v d s" for v d s + unfolding GS_rec_am_code_def GS_rec_am_code'_def *[of s] + by (fastforce intro!: foldl_upd_notin'[symmetric]) + thus ?thesis + by blast +qed + +lemma opt_policy_gs'_eq_GS_iter: + assumes "\s. s < k \ v' s = GS_rec v s" "\s. k \ s \ v' s = v s" + shows "opt_policy_gs' d v k = GS_iter_arg_max v' k" + unfolding opt_policy_gs'_def GS_iter_arg_max_def + by (subst GS_rec_eq_iter[OF assms, of k d]) auto + +lemma opt_policy_gs'_eq_GS_iter': + "opt_policy_gs' d v k = GS_iter_arg_max (\ s. if s < k then GS_rec v s else v s) k" + using opt_policy_gs'_eq_GS_iter + by (simp add: leD) + +lemma opt_policy_gs'_is_dec_det: "opt_policy_gs' d v \ D\<^sub>D" + unfolding opt_policy_gs'_def is_dec_det_def + using finite_is_arg_max[OF finite A_ne] + by (auto intro: LeastI2_ex) + +lemma opt_policy_gs'_is_arg_max: "is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" +proof (induction arbitrary: d rule: less_induct) + case (less x) + have "s < x \ is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" for d s + using less + by auto + hence *:"s < x \ is_arg_max (\d. GS_rec_step d v s) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" for d s + by (intro is_arg_max_GS_imp_splitting''') auto + have "is_arg_max (\a. GS_rec_step (d(x := a)) v x) (\a. a \ A x) (opt_policy_gs' d v x)" for d + unfolding opt_policy_gs'_def + using finite_is_arg_max[OF _ A_ne] + by (auto intro: LeastI_ex) + hence "is_arg_max (\d. GS_rec_step d v x) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" for d + using opt_policy_gs'_is_dec_det + by (intro is_arg_max_GS_rec_step_act') auto + hence "s \ x \ is_arg_max (\d. GS_rec_step d v s) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" for d s + using * + by (auto simp: order.order_iff_strict) + hence "s \ x \ is_arg_max (\d. GS_inv d v s) (\d. d \ D\<^sub>D) (opt_policy_gs' d v)" for d s + using is_arg_max_GS_imp_splitting' + by blast + thus ?case + by blast +qed + +lemma "GS_rec_am_code v d s = (GS_rec v s, opt_policy_gs' d v s)" +proof (induction s arbitrary: d rule: less_induct) + case (less x) + show ?case + proof (cases "\x'. x' < x") + case True + let ?f = "(\vd s. vd(s := (GS_iter_max (\ s. fst (vd s)) s, GS_iter_arg_max (\ s. fst (vd s)) s)))" + define x' where "x' = Max {x'. x' < x}" + let ?old = "(foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}))" + have 1: "s < x \ (s \ set (sorted_list_of_set {s' \ {..x'}. s < s'}))" for s :: 's + by auto + have "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {s' \ {..x'}. s' \ s} @ sorted_list_of_set {s' \ {..x'}. s < s'}) s" for s + by (subst sorted_list_of_set_split'[symmetric, OF finite]) blast + hence "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {s' \ {..x'}. s' \ s}) s" for s + using foldl_upd_notin'[OF 1] + by fastforce + hence 1: "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..s}) s" for s + unfolding x'_def + using True + by (auto simp: atMost_def Max_ge_iff[OF finite]) meson + have fst_IH: "fst (?old s) = GS_rec v s" if "s < x" for s + using 1[OF that] less[unfolded GS_rec_am_code_def] that + by auto + have fst_IH': "fst (?old s) = v s" if "x \ s" for s + using True that + by (subst foldl_upd_notin) (auto simp: x'_def Max_ge_iff) + have fst_IH'': "fst (?old s) = (if s < x then GS_rec v s else v s)" for s + using fst_IH fst_IH' by auto + have "foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x}) = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'} @ sorted_list_of_set {x})" + proof - + have *: "{x'. x' < x} \ {}" using True by auto + hence **: "{..x'} = {y \ {..x}. y < x}" "{x} = {y \ {..x}. x \ y}" + by (auto simp: x'_def Max_ge_iff[OF finite *]) + show ?thesis + unfolding ** sorted_list_of_set_split[symmetric, OF finite] by auto + qed + also have "\ = ?f (foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'})) x" + by auto + also have "\ = (?old (x := (GS_rec v x, GS_iter_arg_max (\ s. fst (?old s)) x)))" + proof (subst GS_rec_eq_iter_max[of _ "(\ s. fst (?old s))"], goal_cases) + case (1 s) + then show ?case + using fst_IH by auto + next + case (2 s) + then show ?case + unfolding vec_lambda_inverse[OF UNIV_I] + using True + by (subst foldl_upd_notin) (auto simp: x'_def Max_ge_iff[OF finite]) + qed auto + also have "\ = (?old (x := (GS_rec v x, opt_policy_gs' d v x)))" + by (auto simp: fst_IH'' opt_policy_gs'_eq_GS_iter') + finally have "foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x}) = (?old (x := (GS_rec v x, opt_policy_gs' d v x)))". + thus ?thesis + unfolding GS_rec_am_code_def + by auto + next + case False + hence "{..x} = {x}" + by (auto simp: not_less antisym) + thus ?thesis + unfolding GS_rec_am_code_def + using opt_policy_gs'_eq_GS_iter[of x v] GS_rec_eq_iter_max[of x v] False + by fastforce + qed +qed + +lemma GS_rec_am_code_eq: "GS_rec_am_code v d s = (GS_rec v s, opt_policy_gs' d v s)" +proof (induction s arbitrary: d rule: less_induct) + case (less x) + show ?case + proof (cases "\x'. x' < x") + case True + let ?f = "(\vd s. vd(s := (GS_iter_max (\ s. fst (vd s)) s, GS_iter_arg_max (\ s. fst (vd s)) s)))" + define x' where "x' = Max {x'. x' < x}" + let ?old = "(foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}))" + have 1: "s < x \ (s \ set (sorted_list_of_set {s' \ {..x'}. s < s'}))" for s :: 's + by auto + have "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {s' \ {..x'}. s' \ s} @ sorted_list_of_set {s' \ {..x'}. s < s'}) s" for s + by (subst sorted_list_of_set_split'[symmetric, OF finite]) blast + hence "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {s' \ {..x'}. s' \ s}) s" for s + using foldl_upd_notin'[OF 1] + by fastforce + hence 1: "s < x \ foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'}) s = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..s}) s" for s + unfolding x'_def + using True + by (auto simp: atMost_def Max_ge_iff[OF finite]) meson + have fst_IH: "fst (?old s) = GS_rec v s" if "s < x" for s + unfolding 1[OF that] less[unfolded GS_rec_am_code_def, OF that] + by auto + have fst_IH': "fst (?old s) = v s" if "x \ s" for s + using True that + by (subst foldl_upd_notin) (auto simp: x'_def atMost_def Max_ge_iff[OF finite]) + have fst_IH'': "fst (?old s) = (if s < x then GS_rec v s else v s)" for s + using fst_IH fst_IH' by auto + have "foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x}) = foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'} @ sorted_list_of_set {x})" + proof - + have *: "{x'. x' < x} \ {}" using True by auto + hence 1: "{..x'} = {y \ {..x}. y < x}" + by (auto simp: x'_def Max_ge_iff[OF finite *]) + have 2: "{x} = {y \ {..x}. x \ y}" + by auto + thus ?thesis + unfolding 1 2 sorted_list_of_set_split[symmetric, OF finite] by auto + qed + also have "\ = ?f (foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x'})) x" + by auto + also have "\ = (?old (x := (GS_rec v x, GS_iter_arg_max (\ s. fst (?old s)) x)))" + proof (subst GS_rec_eq_iter_max[of _ "(\ s. fst (?old s))"], goal_cases) + case (2 s) + then show ?case + unfolding vec_lambda_inverse[OF UNIV_I] + using True + by (subst foldl_upd_notin) (auto simp: x'_def Max_ge_iff[OF finite]) + qed (auto simp: fst_IH) + also have "\ = (?old (x := (GS_rec v x, opt_policy_gs' d v x)))" + by (auto simp: fst_IH'' opt_policy_gs'_eq_GS_iter') + finally have "foldl ?f (\s. (v s, d s)) (sorted_list_of_set {..x}) = (?old (x := (GS_rec v x, opt_policy_gs' d v x)))". + thus ?thesis + unfolding GS_rec_am_code_def + by auto + next + case (False) + hence "{..x} = {x}" + by (auto simp: not_less antisym) + hence *: "(sorted_list_of_set {..x}) = [x]" + by auto + show ?thesis + unfolding GS_rec_am_code_def + using opt_policy_gs'_eq_GS_iter[of x v] GS_rec_eq_iter_max[of x v] False + by (fastforce simp: *) + qed +qed + +definition GS_rec_iter_arg_max where + "GS_rec_iter_arg_max v s = (LEAST a. is_arg_max (\a. r (s, a) + l * (\s' \ UNIV. pmf (K (s,a)) s' * v s')) (\a. a \ A s) a)" +definition "opt_policy_gs v s = (LEAST a. is_arg_max (\a. GS_rec_fun_inner v s a) (\a. a \ A s) a)" + +lemma opt_policy_gs_eq': "opt_policy_gs v = opt_policy_gs' d (vec_lambda v)" + unfolding opt_policy_gs_def opt_policy_gs'_def GS_rec_fun_inner_def GS_rec_step_elem + by (auto simp: GS_rec_fun\<^sub>b.rep_eq GS_rec_def vec_lambda_inverse) + +declare gs_iteration.simps[simp del] + +lemma gs_iteration_error: + assumes "eps > 0" + shows "dist (gs_iteration eps v) \\<^sub>b_opt < eps / 2" + using assms dist_\\<^sub>b_split_opt_eps gs_iteration.simps + by (induction eps v rule: gs_iteration.induct) auto + + +lemma GS_rec_fun_inner_vec: "GS_rec_fun_inner v s a = GS_rec_step (d(s := a)) (vec_lambda v) s" + unfolding GS_rec_step_elem + by (auto simp: GS_rec_fun_inner_def GS_rec_def GS_rec_fun\<^sub>b.rep_eq vec_lambda_inverse) + +lemma find_policy_error_bound_gs: + assumes "eps > 0" "2 * l * dist v (GS_rec_fun\<^sub>b v) < eps * (1-l)" + shows "dist (\\<^sub>b (mk_stationary_det (opt_policy_gs (GS_rec_fun\<^sub>b v)))) \\<^sub>b_opt < eps" +proof (rule GS.find_policy_QR_error_bound[OF assms(1)]) + have "2 * GS.QR_disc * dist v (GS_rec_fun\<^sub>b v) \ 2 * l * dist v (GS_rec_fun\<^sub>b v)" + using GS_QR_disc_le_disc + by (auto intro!: mult_right_mono) + also have "\ < eps * (1-l)" using assms by auto + also have "\ \ eps * (1 - GS.QR_disc)" + using assms GS_QR_disc_le_disc + by (auto intro!: mult_left_mono) + finally show "2 * GS.QR_disc * dist v (GS_rec_fun\<^sub>b v) < eps * (1 - GS.QR_disc)". +next + obtain d where d: "is_dec_det d" + using ex_dec_det by blast + show "is_arg_max (\d. apply_bfun (GS.L_split d (GS_rec_fun\<^sub>b v)) s) (\d. d \ D\<^sub>D) (opt_policy_gs (GS_rec_fun\<^sub>b v))" for s + unfolding opt_policy_gs_eq'[of _ d] GS_inv_blinfun_to_matrix + using opt_policy_gs'_is_arg_max + by simp +qed + +definition "vi_gs_policy eps v = opt_policy_gs (gs_iteration eps v)" + +lemma vi_gs_policy_opt: + assumes "0 < eps" + shows "dist (\\<^sub>b (mk_stationary_det (vi_gs_policy eps v))) \\<^sub>b_opt < eps" + unfolding vi_gs_policy_def + using assms +proof (induction eps v rule: gs_iteration.induct) + case (1 v) + then show ?case + using find_policy_error_bound_gs + by (subst gs_iteration.simps) auto +qed + +lemma GS_rec_iter_eq_iter_max: "GS_rec_iter v = GS_iter_max (vec_lambda v)" + unfolding GS_rec_iter_def GS_iter_max_def GS_iter_def + by auto +end + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/Value_Iteration.thy b/thys/MDP-Algorithms/Value_Iteration.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/Value_Iteration.thy @@ -0,0 +1,351 @@ +(* Author: Maximilian Schäffeler *) + +theory Value_Iteration + imports "MDP-Rewards.MDP_reward" +begin + +context MDP_att_\ +begin + +section \Value Iteration\ +text \ +In the previous sections we derived that repeated application of @{const "\\<^sub>b"} to any bounded +function from states to the reals converges to the optimal value of the MDP @{const "\\<^sub>b_opt"}. + +We can turn this procedure into an algorithm that computes not only an approximation of +@{const "\\<^sub>b_opt"} but also a policy that is arbitrarily close to optimal. + +Most of the proofs rely on the assumption that the supremum in @{const "\\<^sub>b"} can always be attained. +\ + +text \ +The following lemma shows that the relation we use to prove termination of the value iteration +algorithm decreases in each step. +In essence, the distance of the estimate to the optimal value decreases by a factor of at +least @{term l} per iteration.\ + + +lemma vi_rel_dec: + assumes "l \ 0" "\\<^sub>b v \ \\<^sub>b_opt" + shows "\log (1 / l) (dist (\\<^sub>b v) \\<^sub>b_opt) - c\ < \log (1 / l) (dist v \\<^sub>b_opt) - c\" +proof - + have "log (1 / l) (dist (\\<^sub>b v) \\<^sub>b_opt) - c \ log (1 / l) (l * dist v \\<^sub>b_opt) - c" + using contraction_$of _ "\\<^sub>b_opt"] disc_lt_one + by (auto simp: assms less_le intro: log_le) + also have "\ = log (1 / l) l + log (1/l) (dist v \\<^sub>b_opt) - c" + using assms disc_lt_one + by (auto simp: less_le intro!: log_mult) + also have "\ = -(log (1 / l) (1/l)) + (log (1/l) (dist v \\<^sub>b_opt)) - c" + using assms disc_lt_one + by (subst log_inverse[symmetric]) (auto simp: less_le right_inverse_eq) + also have "\ = (log (1/l) (dist v \\<^sub>b_opt)) - 1 - c" + using assms order.strict_implies_not_eq[OF disc_lt_one] + by (auto intro!: log_eq_one neq_le_trans) + finally have "log (1 / l) (dist (\\<^sub>b v) \\<^sub>b_opt) - c \ log (1 / l) (dist v \\<^sub>b_opt) - 1 - c" . + thus ?thesis + by linarith +qed + +lemma dist_\\<^sub>b_lt_dist_opt: "dist v (\\<^sub>b v) \ 2 * dist v \\<^sub>b_opt" +proof - + have le1: "dist v (\\<^sub>b v) \ dist v \\<^sub>b_opt + dist (\\<^sub>b v) \\<^sub>b_opt" + by (simp add: dist_triangle dist_commute) + have le2: "dist (\\<^sub>b v) \\<^sub>b_opt \ l * dist v \\<^sub>b_opt" + using \\<^sub>b_opt contraction_\ + by metis + show ?thesis + using mult_right_mono[of l 1] disc_lt_one + by (fastforce intro!: order.trans[OF le2] order.trans[OF le1]) +qed + +abbreviation "term_measure \ (\(eps, v). + if v = \\<^sub>b_opt \ l = 0 + then 0 + else nat (ceiling (log (1/l) (dist v \\<^sub>b_opt) - log (1/l) (eps * (1-l) / (8 * l)))))" + +function value_iteration :: "real \ ('s \\<^sub>b real) \ ('s \\<^sub>b real)" where + "value_iteration eps v = + (if 2 * l * dist v (\\<^sub>b v) < eps * (1-l) \ eps \ 0 then \\<^sub>b v else value_iteration eps (\\<^sub>b v))" + by auto + +termination +proof (relation "Wellfounded.measure term_measure", (simp; fail), cases "l = 0") + case False + fix eps v + assume h: "\ (2 * l * dist v (\\<^sub>b v) < eps * (1 - l) \ eps \ 0)" + show "((eps, \\<^sub>b v), eps, v) \ Wellfounded.measure term_measure" + proof - + have gt_zero[simp]: "l \ 0" "eps > 0" and dist_ge: "eps * (1 - l) \ dist v (\\<^sub>b v) * (2 * l)" + using h + by (auto simp: algebra_simps) + have v_not_opt: "v \ \\<^sub>b_opt" + using h + by force + have "log (1 / l) (eps * (1 - l) / (8 * l)) < log (1 / l) (dist v \\<^sub>b_opt)" + proof (intro log_less) + show "1 < 1 / l" + by (auto intro!: mult_imp_less_div_pos intro: neq_le_trans) + show "0 < eps * (1 - l) / (8 * l)" + by (auto intro!: mult_imp_less_div_pos intro: neq_le_trans) + show "eps * (1 - l) / (8 * l) < dist v \\<^sub>b_opt" + using dist_pos_lt[OF v_not_opt] dist_\\<^sub>b_lt_dist_opt[of v] gt_zero zero_le_disc + mult_strict_left_mono[of "dist v (\\<^sub>b v)" "(4 * dist v \\<^sub>b_opt)" l] + by (intro mult_imp_div_pos_less le_less_trans[OF dist_ge], argo+) + qed + thus ?thesis + using vi_rel_dec h + by auto + qed +qed auto + +text \ +The distance between an estimate for the value and the optimal value can be bounded with respect to +the distance between the estimate and the result of applying it to @{const \\<^sub>b} +\ +lemma contraction_\_dist: "(1 - l) * dist v \\<^sub>b_opt \ dist v (\\<^sub>b v)" + using contraction_dist contraction_\ disc_lt_one zero_le_disc + by fastforce + +lemma dist_\\<^sub>b_opt_eps: + assumes "eps > 0" "2 * l * dist v (\\<^sub>b v) < eps * (1-l)" + shows "dist (\\<^sub>b v) \\<^sub>b_opt < eps / 2" +proof - + have "dist v \\<^sub>b_opt \ dist v (\\<^sub>b v) / (1 - l)" + using contraction_\_dist + by (simp add: mult.commute pos_le_divide_eq) + hence "2 * l * dist v \\<^sub>b_opt \ 2 * l * (dist v (\\<^sub>b v) / (1 - l))" + using contraction_\_dist assms mult_le_cancel_left_pos[of "2 * l"] + by (fastforce intro!: mult_left_mono[of _ _ "2 * l"]) + hence "2 * l * dist v \\<^sub>b_opt < eps" + by (auto simp: assms(2) pos_divide_less_eq intro: order.strict_trans1) + hence "dist v \\<^sub>b_opt * l < eps / 2" + by argo + hence "l * dist v \\<^sub>b_opt < eps / 2" + by (auto simp: algebra_simps) + thus "dist (\\<^sub>b v) \\<^sub>b_opt < eps / 2" + using contraction_\[of v \\<^sub>b_opt] + by auto +qed + +text \ +The estimates above allow to give a bound on the error of @{const value_iteration}. +\ +declare value_iteration.simps[simp del] + +lemma value_iteration_error: + assumes "eps > 0" + shows "dist (value_iteration eps v) \\<^sub>b_opt < eps / 2" + using assms dist_\\<^sub>b_opt_eps value_iteration.simps + by (induction eps v rule: value_iteration.induct) auto + +text \ +After the value iteration terminates, one can easily obtain a stationary deterministic +epsilon-optimal policy. + +Such a policy does not exist in general, attainment of the supremum in @{const \\<^sub>b} is required. +\ +definition "find_policy (v :: 's \\<^sub>b real) s = arg_max_on (\a. L\<^sub>a a v s) (A s)" + +definition "vi_policy eps v = find_policy (value_iteration eps v)" + +text \ +We formalize the attainment of the supremum using a predicate @{const has_arg_max}. +\ + +abbreviation "vi u n \ (\\<^sub>b ^^n) u" + +lemma \\<^sub>b_iter_mono: + assumes "u \ v" shows "vi u n \ vi v n" + using assms \\<^sub>b_mono + by (induction n) auto + +lemma + assumes "vi v (Suc n) \ vi v n" + shows "vi v (Suc n + m) \ vi v (n + m)" +proof - + have "vi v (Suc n + m) = vi (vi v (Suc n)) m" + by (simp add: Groups.add_ac(2) funpow_add funpow_swap1) + also have "... \ vi (vi v n) m" + using \\<^sub>b_iter_mono[OF assms] + by auto + also have "... = vi v (n + m)" + by (simp add: add.commute funpow_add) + finally show ?thesis . +qed + + +lemma + assumes "vi v n \ vi v (Suc n)" + shows "vi v (n + m) \ vi v (Suc n + m)" +proof - + have "vi v (n + m) \ vi (vi v n) m" + by (simp add: Groups.add_ac(2) funpow_add funpow_swap1) + also have "\ \ vi v (Suc n + m)" + using \\<^sub>b_iter_mono[OF assms] + by (auto simp only: add.commute funpow_add o_apply) + finally show ?thesis . +qed + +(* 6.3.1 *) +(* a) *) +lemma "vi v \ \\<^sub>b_opt" + using \\<^sub>b_lim. + +lemma "(\n. dist (vi v (Suc n)) (vi v n)) \ 0" + using thm_6_3_1_b_aux[of v] + by (auto simp only: dist_commute[of "((\\<^sub>b ^^ Suc _) v)"]) + + + +end + +context MDP_att_\ +begin + +text \ +The error of the resulting policy is bounded by the distance from its value to the value computed +by the value iteration plus the error in the value iteration itself. +We show that both are less than @{term "eps / 2"} when the algorithm terminates. +\ +lemma find_policy_error_bound: + assumes "eps > 0" "2 * l * dist v (\\<^sub>b v) < eps * (1-l)" + shows "dist (\\<^sub>b (mk_stationary_det (find_policy (\\<^sub>b v)))) \\<^sub>b_opt < eps" +proof - + let ?d = "mk_dec_det (find_policy (\\<^sub>b v))" + let ?p = "mk_stationary ?d" + (* shorter proof: by (auto simp: arg_max_SUP[OF find_policy_QR_is_arg_max] \\<^sub>b_split.rep_eq \_split_def )*) + have L_eq_\\<^sub>b: "L (mk_dec_det (find_policy v)) v = \\<^sub>b v" for v + unfolding find_policy_def + proof (intro antisym) + show "L (mk_dec_det (\s. arg_max_on (\a. L\<^sub>a a v s) (A s))) v \ \\<^sub>b v" + using Sup_att has_arg_max_arg_max abs_L_le + unfolding \\<^sub>b.rep_eq \_eq_SUP_det less_eq_bfun_def arg_max_on_def is_dec_det_def max_L_ex_def + by (auto intro!: cSUP_upper bounded_imp_bdd_above boundedI[of _ "r\<^sub>M + l * norm v"]) + next + show "\\<^sub>b v \ L (mk_dec_det (\s. arg_max_on (\a. L\<^sub>a a v s) (A s))) v" + unfolding less_eq_bfun_def \\<^sub>b.rep_eq \_eq_SUP_det + using Sup_att ex_dec_det + by (auto intro!: cSUP_least app_arg_max_ge simp: L_eq_L\<^sub>a_det max_L_ex_def is_dec_det_def) + qed + have "dist (\\<^sub>b ?p) (\\<^sub>b v) = dist (L ?d (\\<^sub>b ?p)) (\\<^sub>b v)" + using L_\_fix + by force + also have "\ \ dist (L ?d (\\<^sub>b ?p)) (\\<^sub>b (\\<^sub>b v)) + dist (\\<^sub>b (\\<^sub>b v)) (\\<^sub>b v)" + using dist_triangle + by blast + also have "\ = dist (L ?d (\\<^sub>b ?p)) (L ?d (\\<^sub>b v)) + dist (\\<^sub>b (\\<^sub>b v)) (\\<^sub>b v)" + by (auto simp: L_eq_\\<^sub>b) + also have "\ \ l * dist (\\<^sub>b ?p) (\\<^sub>b v) + l * dist (\\<^sub>b v) v" + using contraction_\ contraction_L + by (fastforce intro!: add_mono) + finally have aux: "dist (\\<^sub>b ?p) (\\<^sub>b v) \ l * dist (\\<^sub>b ?p) (\\<^sub>b v) + l * dist (\\<^sub>b v) v" . + hence "dist (\\<^sub>b ?p) (\\<^sub>b v) - l * dist (\\<^sub>b ?p) (\\<^sub>b v) \ l * dist (\\<^sub>b v) v" + by auto + hence "dist (\\<^sub>b ?p) (\\<^sub>b v) * (1 - l) \ l * dist (\\<^sub>b v) v" + by argo + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) * (1-l) \ 2 * (l * dist (\\<^sub>b v) v)" + using zero_le_disc mult_left_mono + by auto + also have "\ \ eps * (1-l)" + using assms + by (auto intro!: mult_left_mono simp: dist_commute pos_divide_le_eq) + finally have "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) * (1 - l) \ eps * (1 - l)" . + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) \ eps" + using disc_lt_one mult_right_le_imp_le + by auto + moreover have "2 * dist (\\<^sub>b v) \\<^sub>b_opt < eps" + using dist_\\<^sub>b_opt_eps assms + by fastforce + moreover have "dist (\\<^sub>b ?p) \\<^sub>b_opt \ dist (\\<^sub>b ?p) (\\<^sub>b v) + dist (\\<^sub>b v) \\<^sub>b_opt" + using dist_triangle + by blast + ultimately show ?thesis + by auto +qed + +lemma vi_policy_opt: + assumes "0 < eps" + shows "dist (\\<^sub>b (mk_stationary_det (vi_policy eps v))) \\<^sub>b_opt < eps" + unfolding vi_policy_def + using assms +proof (induction eps v rule: value_iteration.induct) + case (1 v) + then show ?case + using find_policy_error_bound + by (subst value_iteration.simps) auto +qed + +lemma lemma_6_3_1_d: + assumes "eps > 0" + assumes "2 * l * dist (vi v (Suc n)) (vi v n) < eps * (1-l)" + shows "dist (vi v (Suc n)) \\<^sub>b_opt < eps / 2" + using dist_\\<^sub>b_opt_eps assms + by (simp add: dist_commute) + +end + +context MDP_act begin + +definition "find_policy' (v :: 's \\<^sub>b real) s = arb_act (opt_acts v s)" + +definition "vi_policy' eps v = find_policy' (value_iteration eps v)" + +lemma find_policy'_error_bound: + assumes "eps > 0" "2 * l * dist v (\\<^sub>b v) < eps * (1-l)" + shows "dist (\\<^sub>b (mk_stationary_det (find_policy' (\\<^sub>b v)))) \\<^sub>b_opt < eps" +proof - + let ?d = "mk_dec_det (find_policy' (\\<^sub>b v))" + let ?p = "mk_stationary ?d" + have L_eq_\\<^sub>b: "L (mk_dec_det (find_policy' v)) v = \\<^sub>b v" for v + unfolding find_policy'_def + by (metis \_improving_imp_\\<^sub>b \_improving_opt_acts) + have "dist (\\<^sub>b ?p) (\\<^sub>b v) = dist (L ?d (\\<^sub>b ?p)) (\\<^sub>b v)" + using L_\_fix + by force + also have "\ \ dist (L ?d (\\<^sub>b ?p)) (\\<^sub>b (\\<^sub>b v)) + dist (\\<^sub>b (\\<^sub>b v)) (\\<^sub>b v)" + using dist_triangle + by blast + also have "\ = dist (L ?d (\\<^sub>b ?p)) (L ?d (\\<^sub>b v)) + dist (\\<^sub>b (\\<^sub>b v)) (\\<^sub>b v)" + by (auto simp: L_eq_\\<^sub>b) + also have "\ \ l * dist (\\<^sub>b ?p) (\\<^sub>b v) + l * dist (\\<^sub>b v) v" + using contraction_\ contraction_L + by (fastforce intro!: add_mono) + finally have aux: "dist (\\<^sub>b ?p) (\\<^sub>b v) \ l * dist (\\<^sub>b ?p) (\\<^sub>b v) + l * dist (\\<^sub>b v) v" . + hence "dist (\\<^sub>b ?p) (\\<^sub>b v) - l * dist (\\<^sub>b ?p) (\\<^sub>b v) \ l * dist (\\<^sub>b v) v" + by auto + hence "dist (\\<^sub>b ?p) (\\<^sub>b v) * (1 - l) \ l * dist (\\<^sub>b v) v" + by argo + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) * (1-l) \ 2 * (l * dist (\\<^sub>b v) v)" + using zero_le_disc mult_left_mono + by auto + also have "\ \ eps * (1-l)" + using assms + by (auto intro!: mult_left_mono simp: dist_commute pos_divide_le_eq) + finally have "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) * (1 - l) \ eps * (1 - l)". + hence "2 * dist (\\<^sub>b ?p) (\\<^sub>b v) \ eps" + using disc_lt_one mult_right_le_imp_le + by auto + moreover have "2 * dist (\\<^sub>b v) \\<^sub>b_opt < eps" + using dist_\\<^sub>b_opt_eps assms + by fastforce + moreover have "dist (\\<^sub>b ?p) \\<^sub>b_opt \ dist (\\<^sub>b ?p) (\\<^sub>b v) + dist (\\<^sub>b v) \\<^sub>b_opt" + using dist_triangle + by blast + ultimately show ?thesis + by auto +qed + +lemma vi_policy'_opt: + assumes "eps > 0" "l > 0" + shows "dist (\\<^sub>b (mk_stationary_det (vi_policy' eps v))) \\<^sub>b_opt < eps" + unfolding vi_policy'_def + using assms +proof (induction eps v rule: value_iteration.induct) + case (1 v) + then show ?case + using find_policy'_error_bound + by (subst value_iteration.simps) auto +qed + +end +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/code/Code_DP.thy b/thys/MDP-Algorithms/code/Code_DP.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/code/Code_DP.thy @@ -0,0 +1,526 @@ +(* Author: Maximilian Schäffeler *) + +theory Code_DP + imports + Value_Iteration + Policy_Iteration + Modified_Policy_Iteration + Splitting_Methods + +"HOL-Library.Code_Target_Numeral" +"Gauss_Jordan.Code_Generation_IArrays" +begin + +section \Code Generation for MDP Algorithms\ + +subsection \Least Argmax\ + +lemma least_list: + assumes "sorted xs" "\x \ set xs. P x" + shows "(LEAST x \ set xs. P x) = the (find P xs)" + using assms +proof (induction xs) + case (Cons a xs) + thus ?case + proof (cases "P a") + case False + have "(LEAST x \ set (a # xs). P x) = (LEAST x \ set xs. P x)" + using False Cons(2) + by simp metis + thus ?thesis + using False Cons + by simp + qed (auto intro: Least_equality) +qed auto + +definition "least_enum P = the (find P (sorted_list_of_set (UNIV :: ('b:: {finite, linorder}) set)))" + +lemma least_enum_eq: "\x. P x \ least_enum P = (LEAST x. P x)" + by (auto simp: least_list[symmetric] least_enum_def) + +definition "least_max_arg_max_list f init xs = + foldl (\(am, m) x. if f x > m then (x, f x) else (am, m)) init xs" + +lemma snd_least_max_arg_max_list: + "snd (least_max_arg_max_list f (n, f n) xs) = (MAX x \ insert n (set xs). f x)" + unfolding least_max_arg_max_list_def +proof (induction xs arbitrary: n) + case (Cons a xs) + then show ?case + by (cases "xs = []") (fastforce simp: max.assoc[symmetric])+ +qed auto + +lemma least_max_arg_max_list_snd_fst: "snd (least_max_arg_max_list f (x, f x) xs) = f (fst (least_max_arg_max_list f (x, f x) xs))" + by (induction xs arbitrary: x) (auto simp: least_max_arg_max_list_def) + +lemma fst_least_max_arg_max_list: + fixes f :: "_ \ _ :: linorder" + assumes "sorted (n#xs)" + shows "fst (least_max_arg_max_list f (n, f n) xs) + = (LEAST x. is_arg_max f (\x. x \ insert n (set xs)) x)" + unfolding least_max_arg_max_list_def + using assms proof (induction xs arbitrary: n) + case Nil + then show ?case + by (auto simp: is_arg_max_def intro!: Least_equality[symmetric]) +next + case (Cons a xs) + hence "sorted (a#xs)" + by auto + then show ?case + proof (cases "f a > f n") + case True + then show ?thesis + by (fastforce simp: is_arg_max_def Cons.IH[OF \sorted (a#xs)$ intro!: cong[of Least, OF refl]) + next + case False + have "(LEAST b. is_arg_max f (\x. x \ insert n (set (a # xs))) b) + = (LEAST b. is_arg_max f (\x. x \ (set (n # xs))) b)" + proof (cases "is_arg_max f (\x. x \ set (n #a# xs)) a") + case True + hence "(LEAST b. is_arg_max f (\x. x \ (set (n#a # xs))) b) = n" + using Cons False + by (fastforce simp: is_arg_max_linorder intro!: Least_equality) + thus ?thesis + using False True Cons + by (fastforce simp: is_arg_max_linorder intro!: Least_equality[symmetric]) + next + case False + thus ?thesis + by (fastforce simp: not_less is_arg_max_linorder intro!: cong[of Least, OF refl]) + qed + thus ?thesis + using False Cons + by (auto simp add: Cons.IH[OF \sorted (a#xs)\]) + qed +qed + +definition "least_arg_max_enum f X = ( + let xs = sorted_list_of_set (X :: (_ :: {finite, linorder}) set) in + fst (least_max_arg_max_list f (hd xs, f (hd xs)) (tl xs)))" + +definition "least_max_arg_max_enum f X = ( + let xs = sorted_list_of_set (X :: (_ :: {finite, linorder}) set) in + (least_max_arg_max_list f (hd xs, f (hd xs)) (tl xs)))" + +lemma least_arg_max_enum_correct: + assumes "X \ {}" + shows " + (least_arg_max_enum (f :: _ \ (_ :: linorder)) X) = (LEAST x. is_arg_max f (\x. x \ X) x)" +proof - + have *: "xs \ [] \ (x = hd xs \ x \ set (tl xs)) \ x \ set xs" for x xs + using list.set_sel list.exhaust_sel set_ConsD by metis + thus ?thesis + unfolding least_arg_max_enum_def + using assms + by (auto simp: Let_def fst_least_max_arg_max_list *) +qed + +lemma least_max_arg_max_enum_correct1: + assumes "X \ {}" + shows "fst (least_max_arg_max_enum (f :: _ \ (_ :: linorder)) X) = (LEAST x. is_arg_max f (\x. x \ X) x)" +proof - + have *: "xs \ [] \ (x = hd xs \ x \ set (tl xs)) \ x \ set xs" for x xs + using list.set_sel list.exhaust_sel set_ConsD by metis + thus ?thesis + using assms + by (auto simp: least_max_arg_max_enum_def Let_def fst_least_max_arg_max_list *) +qed + +lemma least_max_arg_max_enum_correct2: + assumes "X \ {}" + shows "snd (least_max_arg_max_enum (f :: _ \ (_ :: linorder)) X) = (MAX x \ X. f x)" +proof - + have *: "xs \ [] \ insert (hd xs) (set (tl xs)) = set xs" for xs + using list.exhaust_sel list.simps(15) + by metis + show ?thesis + using assms + by (auto simp: least_max_arg_max_enum_def Let_def snd_least_max_arg_max_list *) +qed + +subsection \Functions as Vectors\ +typedef ('a, 'b) Fun = "UNIV :: ('a \ 'b) set" + by blast + +setup_lifting type_definition_Fun + +lift_definition to_Fun :: "('a \ 'b) \ ('a, 'b) Fun" is id. + +definition "fun_to_vec (v :: ('a::finite, 'b) Fun) = vec_lambda (Rep_Fun v)" + +lift_definition vec_to_fun :: "'b^'a \ ('a, 'b) Fun" is vec_nth. + +lemma Fun_inverse[simp]: "Rep_Fun (Abs_Fun f) = f" + using Abs_Fun_inverse by auto + +lift_definition zero_Fun :: "('a, 'b::zero) Fun" is 0. + +code_datatype vec_to_fun + +lemmas vec_to_fun.rep_eq[code] + +instantiation Fun :: (enum, equal) equal +begin +definition "equal_Fun (f :: ('a::enum, 'b::equal) Fun) g = (Rep_Fun f = Rep_Fun g)" +instance + by standard (auto simp: equal_Fun_def Rep_Fun_inject) +end + +subsection \Bounded Functions as Vectors\ +lemma Bfun_inverse_fin[simp]: "apply_bfun (Bfun (f :: 'c :: finite \ _)) = f" + using finite by (fastforce intro!: Bfun_inverse simp: bfun_def) + +definition "bfun_to_vec (v :: ('a::finite) \\<^sub>b ('b::metric_space)) = vec_lambda v" +definition "vec_to_bfun v = Bfun (vec_nth v)" + +code_datatype vec_to_bfun + +lemma apply_bfun_vec_to_bfun[code]: "apply_bfun (vec_to_bfun f) x = f x" + by (auto simp: vec_to_bfun_def) + +lemma [code]: "0 = vec_to_bfun 0" + by (auto simp: vec_to_bfun_def) + +subsection \IArrays with Lengths in the Type\ + +typedef ('s :: mod_type, 'a) iarray_type = "{arr :: 'a iarray. IArray.length arr = CARD('s)}" + using someI_ex[OF Ex_list_of_length] + by (auto intro!: exI[of _ "IArray (SOME xs. length xs = CARD('s))"]) + +setup_lifting type_definition_iarray_type + +lift_definition fun_to_iarray_t :: "('s::{mod_type} \ 'a) \ ('s, 'a) iarray_type" is "\f. IArray.of_fun (\s. f (from_nat s)) (CARD('s))" + by auto + +lift_definition iarray_t_sub :: "('s::mod_type, 'a) iarray_type \ 's \ 'a" is "\v x. IArray.sub v (to_nat x)". + +lift_definition iarray_to_vec :: "('s, 'a) iarray_type \ 'a^'s::{mod_type, finite}" + is "\v. (\ s. IArray.sub v (to_nat s))". + +lift_definition vec_to_iarray :: "'a^'s::{mod_type, finite} \ ('s, 'a) iarray_type" + is "\v. IArray.of_fun (\s. v ((from_nat s) :: 's)) (CARD('s))" + by auto + +lemma length_iarray_type [simp]: "length (IArray.list_of (Rep_iarray_type (v:: ('s::{mod_type}, 'a) iarray_type))) = CARD('s)" + using Rep_iarray_type by auto + +lemma iarray_t_eq_iff: "(v = w) = (\x. iarray_t_sub v x = iarray_t_sub w x)" + unfolding iarray_t_sub.rep_eq IArray.sub_def + by (metis Rep_iarray_type_inject iarray_exhaust2 length_iarray_type list_eq_iff_nth_eq to_nat_from_nat_id) + +lemma iarray_to_vec_inv: "iarray_to_vec (vec_to_iarray v) = v" + by (auto simp: to_nat_less_card iarray_to_vec.rep_eq vec_to_iarray.rep_eq vec_eq_iff) + +lemma vec_to_iarray_inv: "vec_to_iarray (iarray_to_vec v) = v" + by (auto simp: to_nat_less_card iarray_to_vec.rep_eq vec_to_iarray.rep_eq iarray_t_eq_iff iarray_t_sub.rep_eq) + +code_datatype iarray_to_vec + +lemma vec_nth_iarray_to_vec[code]: "vec_nth (iarray_to_vec v) x = iarray_t_sub v x" + by (auto simp: iarray_to_vec.rep_eq iarray_t_sub.rep_eq) + +lemma vec_lambda_iarray_t[code]: "vec_lambda v = iarray_to_vec (fun_to_iarray_t v)" + by (auto simp: iarray_to_vec.rep_eq fun_to_iarray_t.rep_eq to_nat_less_card) + +lemma zero_iarray[code]: "0 = iarray_to_vec (fun_to_iarray_t 0)" + by (auto simp: iarray_to_vec.rep_eq fun_to_iarray_t.rep_eq to_nat_less_card vec_eq_iff) + +subsection \Value Iteration\ + +locale vi_code = + MDP_ord A K r l for A :: "'s::mod_type \ ('a::{finite, wellorder}) set" + and K :: "('s::{finite, mod_type} \ 'a::{finite, wellorder}) \ 's pmf" and r l +begin +definition "vi_test (v::'s\\<^sub>b real) v' eps = 2 * l * dist v v'" + +partial_function (tailrec) value_iteration_partial where [code]: "value_iteration_partial eps v = + (let v' = \\<^sub>b v in + (if 2 * l * dist v v' < eps * (1 - l) then v' else (value_iteration_partial eps v')))" + +lemma vi_eq_partial: "eps > 0 \ value_iteration_partial eps v = value_iteration eps v" +proof (induction eps v rule: value_iteration.induct) + case (1 eps v) + then show ?case + by (auto simp: Let_def value_iteration.simps value_iteration_partial.simps) +qed + +definition "L_det d = L (mk_dec_det d)" + +lemma code_L_det [code]: "L_det d (vec_to_bfun v) = vec_to_bfun (\ s. L\<^sub>a (d s) (vec_nth v) s)" + by (auto simp: L_det_def vec_to_bfun_def L_eq_L\<^sub>a_det) + +lemma code_\\<^sub>b [code]: "\\<^sub>b (vec_to_bfun v) = vec_to_bfun (\ s. (MAX a \ A s. r (s, a) + l * measure_pmf.expectation (K (s, a)) (vec_nth v)))" + by (auto simp: vec_to_bfun_def \\<^sub>b_fin_eq_det A_ne cSup_eq_Max) + +lemma code_value_iteration[code]: "value_iteration eps (vec_to_bfun v) = + (if eps \ 0 then \\<^sub>b (vec_to_bfun v) else value_iteration_partial eps (vec_to_bfun v))" + by (simp add: value_iteration.simps vi_eq_partial) + +lift_definition find_policy_impl :: "('s \\<^sub>b real) \ ('s, 'a) Fun" is "\v. find_policy' v". +lemma code_find_policy_impl: "find_policy_impl v = vec_to_fun (\ s. (LEAST x. x \ opt_acts v s))" + by (auto simp: vec_to_fun_def find_policy_impl_def find_policy'_def Abs_Fun_inject) + +lemma code_find_policy_impl_opt[code]: "find_policy_impl v = vec_to_fun (\ s. least_arg_max_enum (\a. L\<^sub>a a v s) (A s))" + by (auto simp: is_opt_act_def code_find_policy_impl least_arg_max_enum_correct[OF A_ne]) + +lemma code_vi_policy'[code]: "vi_policy' eps v = Rep_Fun (find_policy_impl (value_iteration eps v))" + unfolding vi_policy'_def find_policy_impl_def by auto + +subsection \Policy Iteration\ + +partial_function (tailrec) policy_iteration_partial where [code]: "policy_iteration_partial d = + (let d' = policy_step d in if d = d' then d else policy_iteration_partial d')" + +lemma pi_eq_partial: "d \ D\<^sub>D \ policy_iteration_partial d = policy_iteration d" +proof (induction d rule: policy_iteration.induct) + case (1 d) + then show ?case + by (auto simp: Let_def is_dec_det_pi policy_step_def policy_iteration_partial.simps) +qed + +definition "P_mat d = (\ i j. pmf (K (i, Rep_Fun d i)) j)" + +definition "r_vec' d = (\ i. r(i, Rep_Fun d i))" + +lift_definition policy_eval' :: "('s::{mod_type, finite}, 'a) Fun \ ('s \\<^sub>b real)" is "policy_eval". + +lemma mat_eq_blinfun: "mat 1 - l *\<^sub>R (P_mat (vec_to_fun d)) = blinfun_to_matrix (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det (vec_nth d)))" + unfolding blinfun_to_matrix_diff blinfun_to_matrix_id blinfun_to_matrix_scaleR + unfolding blinfun_to_matrix_def P_mat_def \ \<^sub>1.rep_eq K_st_def push_exp_def matrix_def axis_def vec_to_fun_def + by (auto simp: if_distrib mk_dec_det_def integral_measure_pmf[of UNIV] pmf_expectation_bind[of UNIV] pmf_bind cong: if_cong) + +lemma \\<^sub>b_vec: "policy_eval' (vec_to_fun d) = vec_to_bfun (matrix_inv (mat 1 - l *\<^sub>R (P_mat (vec_to_fun d))) *v (r_vec' (vec_to_fun d)))" +proof - + let ?d = "Rep_Fun (vec_to_fun d)" + have "vec_to_bfun (matrix_inv (mat 1 - l *\<^sub>R (P_mat (vec_to_fun d))) *v (r_vec' (vec_to_fun d))) = matrix_to_blinfun (matrix_inv (mat 1 - l *\<^sub>R (P_mat (vec_to_fun d)))) (vec_to_bfun (r_vec' (vec_to_fun d)))" + by (auto simp: matrix_to_blinfun_mult vec_to_bfun_def r_vec'_def) + also have "\ = matrix_to_blinfun (matrix_inv (blinfun_to_matrix (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det ?d)))) (r_dec\<^sub>b (mk_dec_det ?d))" + unfolding mat_eq_blinfun + by (auto simp: r_vec'_def vec_to_bfun_def vec_lambda_inverse r_dec\<^sub>b_def vec_to_fun_def) + also have "\ = inv\<^sub>L (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det ?d)) (r_dec\<^sub>b (mk_dec_det ?d))" + by (auto simp: blinfun_to_matrix_inverse(2)[symmetric] invertible\<^sub>L_inf_sum matrix_to_blinfun_inv) + finally have "vec_to_bfun (matrix_inv (mat 1 - l *\<^sub>R (P_mat (vec_to_fun d))) *v (r_vec' (vec_to_fun d))) = inv\<^sub>L (id_blinfun - l *\<^sub>R \ \<^sub>1 (mk_dec_det ?d)) (r_dec\<^sub>b (mk_dec_det ?d))". + thus ?thesis + by (auto simp: \_stationary policy_eval'.rep_eq policy_eval_def inv\<^sub>L_inf_sum blincomp_scaleR_right) +qed + +lemma \\<^sub>b_vec_opt[code]: "policy_eval' (vec_to_fun d) = vec_to_bfun (Matrix_To_IArray.iarray_to_vec (Matrix_To_IArray.vec_to_iarray ((fst (Gauss_Jordan_PA ((mat 1 - l *\<^sub>R (P_mat (vec_to_fun d)))))) *v (r_vec' (vec_to_fun d)))))" + using \\<^sub>b_vec + by (auto simp: mat_eq_blinfun matrix_inv_Gauss_Jordan_PA blinfun_to_matrix_inverse(1) invertible\<^sub>L_inf_sum iarray_to_vec_vec_to_iarray) + +lift_definition policy_improvement' :: "('s, 'a) Fun \ ('s \\<^sub>b real) \ ('s, 'a) Fun" + is policy_improvement. + +lemma [code]: "policy_improvement' (vec_to_fun d) v = vec_to_fun (\ s. (if is_arg_max (\a. L\<^sub>a a v s) (\a. a \ A s) (d s) then d s else LEAST x. is_arg_max (\a. L\<^sub>a a v s) (\a. a \ A s) x))" + by (auto simp: is_opt_act_def policy_improvement'_def vec_to_fun_def vec_lambda_inverse policy_improvement_def Abs_Fun_inject) + +lift_definition policy_step' :: "('s, 'a) Fun \ ('s, 'a) Fun" + is policy_step. + +lemma [code]: "policy_step' d = policy_improvement' d (policy_eval' d)" + by (auto simp: policy_step'_def policy_step_def policy_improvement'_def policy_eval'_def apply_bfun_inverse) + +lift_definition policy_iteration_partial' :: "('s, 'a) Fun \ ('s, 'a) Fun" + is policy_iteration_partial. + +lemma [code]: "policy_iteration_partial' d = (let d' = policy_step' d in if d = d' then d else policy_iteration_partial' d')" + by (auto simp: policy_iteration_partial'.rep_eq policy_step'.rep_eq Let_def policy_iteration_partial.simps Rep_Fun_inject[symmetric]) + +lift_definition policy_iteration' :: "('s, 'a) Fun \ ('s, 'a) Fun" is policy_iteration. + +lemma code_policy_iteration'[code]: "policy_iteration' d = + (if Rep_Fun d \ D\<^sub>D then d else (policy_iteration_partial' d))" + by transfer (auto simp: pi_eq_partial) + +lemma code_policy_iteration[code]: "policy_iteration d = Rep_Fun (policy_iteration' (vec_to_fun (vec_lambda d)))" + by (auto simp add: vec_lambda_inverse policy_iteration'.rep_eq vec_to_fun_def) + +subsection \Gauss-Seidel Iteration\ + +partial_function (tailrec) gs_iteration_partial where + [code]: "gs_iteration_partial eps v = ( + let v' = (GS_rec_fun\<^sub>b v) in + (if 2 * l * dist v v' < eps * (1 - l) then v' else gs_iteration_partial eps v'))" + +lemma gs_iteration_partial_eq: "eps > 0 \ gs_iteration_partial eps v = gs_iteration eps v" + by (induction eps v rule: gs_iteration.induct) (auto simp: gs_iteration_partial.simps Let_def gs_iteration.simps) + +lemma gs_iteration_code_opt[code]: "gs_iteration eps v = (if eps \ 0 then GS_rec_fun\<^sub>b v else gs_iteration_partial eps v)" + by (auto simp: gs_iteration_partial_eq gs_iteration.simps) + +definition "vec_upd v i x = (\ j. if i = j then x else v j)" + +lemma GS_rec_eq_fold: "GS_rec v = foldl (\v s. (vec_upd v s (GS_iter_max v s))) v (sorted_list_of_set UNIV)" +proof - + have "vec_lambda (foldl (\v s. v(s := GS_rec_iter v s)) (() v) xs) = foldl (\v s. vec_upd v s (GS_iter_max v s)) v xs" for xs + proof (induction xs arbitrary: v) + case (Cons a xs) + show ?case + by (auto simp: vec_upd_def[of v] Cons[symmetric] eq_commute GS_rec_iter_eq_iter_max cong: if_cong) + qed auto + thus ?thesis + unfolding GS_rec_def GS_rec_fun_code' + by auto +qed + +lemma GS_rec_fun_code''''[code]: "GS_rec_fun\<^sub>b (vec_to_bfun v) = vec_to_bfun (foldl (\v s. (vec_upd v s (GS_iter_max v s))) v (sorted_list_of_set UNIV))" + by (auto simp add: GS_rec_eq_fold[symmetric] GS_rec_eq_elem GS_rec_fun\<^sub>b.rep_eq vec_to_bfun_def) + +lemma GS_iter_max_code [code]: "GS_iter_max v s = (MAX a \ A s. GS_iter a v s)" + using A_ne + by (auto simp: GS_iter_max_def cSup_eq_Max) + +lift_definition opt_policy_gs'' :: "('s \\<^sub>b real) \ ('s, 'a) Fun" is opt_policy_gs. + +declare opt_policy_gs''.rep_eq[symmetric, code] + +lemma GS_rec_am_code'_prod: "GS_rec_am_code' v d = + (\s'. ( + let (v', d') = foldl (\(v,d) s. (v(s := (GS_iter_max (vec_lambda v) s)), d(s := GS_iter_arg_max (vec_lambda v) s))) (vec_nth v, d) (sorted_list_of_set UNIV) + in (v' s', d' s')))" +proof - + have 1: "(\x. (f x, g x))(y := (z, w)) = (\x. ((f(y := z)) x, (g(y := w)) x))" for f g y z w + by auto + have 2: "(() f)(a := y) = () (vec_lambda ((vec_nth f)(a := y)))" for f a y + by auto + have "foldl (\vd s. vd(s := (GS_iter_max (\ s. fst (vd s)) s, GS_iter_arg_max (\ s. fst (vd s)) s))) (\s. (v s, d s)) xs = + (\s'. let (v', d') = foldl (\(v, d) s. (v(s := GS_iter_max (vec_lambda v) s), d(s := GS_iter_arg_max (vec_lambda v) s))) (() v, d) xs in (v' s', d' s'))" for xs + proof (induction xs arbitrary: v d) + case Nil + then show ?case by auto + next + case (Cons a xs) + show ?case + by (simp add: 1 Cons.IH[of "(vec_lambda ((() v)(a := GS_iter_max v a)))", unfolded 2[symmetric]] del: fun_upd_apply) + qed + thus ?thesis + unfolding GS_rec_am_code'_def by blast +qed + + +lemma code_GS_rec_am_arr_opt[code]: "opt_policy_gs'' (vec_to_bfun v) = vec_to_fun ((snd (foldl (\(v, d) s. + let (am, m) = least_max_arg_max_enum (\a. r (s, a) + l * (\s' \ UNIV. pmf (K (s,a)) s' * v s')) (A s) in + (vec_upd v s m, vec_upd d s am)) + (v, (\ s. (least_enum (\a. a \ A s)))) (sorted_list_of_set UNIV))))" +proof - + have 1: "opt_policy_gs'' v' = Abs_Fun (opt_policy_gs v')" for v' + by (simp add: opt_policy_gs''.abs_eq) + have 2: "opt_policy_gs (vec_to_bfun v) = opt_policy_gs' d v" for v d + by (metis Bfun_inverse_fin opt_policy_gs_eq' vec_lambda_eta vec_to_bfun_def) + have 3: "opt_policy_gs' d v = (\s. snd (GS_rec_am_code v d s))" for d + by (simp add: GS_rec_am_code_eq) + have 4: "GS_rec_am_code v d = (\s'. let (v', d') = foldl (\(v, d) s. (v(s := GS_iter_max (vec_lambda v) s), d(s := GS_iter_arg_max (vec_lambda v) s))) (() v, d) (sorted_list_of_set UNIV) in (v' s', d' s'))" for d s v + using GS_rec_am_code' GS_rec_am_code'_prod by presburger + have 5: "foldl (\(v, d) s. (v(s := GS_iter_max (vec_lambda v) s), d(s := GS_iter_arg_max (vec_lambda v) s))) (() v, () d) xs = + (let (v', d') = foldl (\(v, d) s. (vec_upd v s (GS_iter_max v s), vec_upd d s (GS_iter_arg_max v s))) (v, d) xs in (vec_nth v', vec_nth d'))" for d v xs + proof (induction xs arbitrary: d v) + case Nil + then show ?case by auto + next + case (Cons a xs) + show ?case + unfolding vec_lambda_inverse Let_def + using Cons[symmetric, unfolded Let_def] + by simp (auto simp: vec_lambda_inverse vec_upd_def Let_def eq_commute cong: if_cong) + qed + have 6: "opt_policy_gs'' (vec_to_bfun v) = vec_to_fun (snd (foldl (\(v, d) s. (vec_upd v s (GS_iter_max v s), vec_upd d s (GS_iter_arg_max v s))) (v, d) (sorted_list_of_set UNIV)))" for d + unfolding 1 + unfolding 2[of _ "Rep_Fun (vec_to_fun d)"] + unfolding 3 + unfolding 4 + using 5 + by (auto simp: Let_def case_prod_beta vec_to_fun_def) + show ?thesis + unfolding Let_def case_prod_beta + unfolding least_max_arg_max_enum_correct1[OF A_ne] + using least_max_arg_max_enum_correct2[OF A_ne] + unfolding least_max_arg_max_enum_correct2[OF A_ne] + using 6 fin_actions A_ne + unfolding GS_iter_max_def GS_iter_def GS_iter_arg_max_def + by (auto simp: cSup_eq_Max split_beta') +qed + + +subsection \Modified Policy Iteration\ + +sublocale MDP_MPI A K r l "\X. Least (\x. x \ X)" + using MDP_act_axioms MDP_reward_axioms + by unfold_locales auto + + +definition "d0 s = (LEAST a. a \ A s)" +lift_definition d0' :: "('s, 'a) Fun" is d0. + +lemma d0_dec_det: "is_dec_det d0" + using A_ne unfolding d0_def is_dec_det_def + by simp + +lemma v0_code[code]: "v0_mpi\<^sub>b = vec_to_bfun (\ s. r_min / (1 - l))" + by (auto simp: v0_mpi\<^sub>b_def v0_mpi_def vec_to_bfun_def ) + +lemma d0'_code[code]: "d0' = vec_to_fun (\ s. (LEAST a. a \ A s))" + by (auto simp: d0'.rep_eq d0_def Rep_Fun_inject[symmetric] vec_to_fun_def) + +lemma step_value_code[code]: "L_pow v d m = (L_det d ^^ Suc m) v" + unfolding L_pow_def L_det_def + by auto + +partial_function (tailrec) mpi_partial where [code]: "mpi_partial eps d v m = + (let d' = policy_improvement d v in ( + if 2 * l * dist v (\\<^sub>b v) < eps * (1 - l) + then (d', v) + else mpi_partial eps d' (L_pow v d' (m 0 v)) (\n. m (Suc n))))" + +lemma mpi_partial_eq_algo: + assumes "eps > 0" "d \ D\<^sub>D" "v \ \\<^sub>b v" + shows "mpi_partial eps d v m = mpi_algo eps d v m" +proof - + have "mpi_algo_dom eps (d,v,m)" + using assms termination_mpi_algo by blast + thus ?thesis + by (induction rule: mpi_algo.pinduct) (auto simp: Let_def mpi_algo.psimps mpi_partial.simps) +qed + +lift_definition mpi_partial' :: "real \ ('s, 'a) Fun \ ('s \\<^sub>b real) \ (nat \ ('s \\<^sub>b real) \ nat) + \ ('s, 'a) Fun \ ('s \\<^sub>b real)" is mpi_partial. + +lemma mpi_partial'_code[code]: "mpi_partial' eps d v m = + (let d' = policy_improvement' d v in ( + if 2 * l * dist v (\\<^sub>b v) < eps * (1 - l) + then (d', v) + else mpi_partial' eps d' (L_pow v (Rep_Fun d') (m 0 v)) (\n. m (Suc n))))" + by (auto simp: mpi_partial'_def mpi_partial.simps Let_def policy_improvement'_def) + +lemma r_min_code[code_unfold]: "r_min = (MIN s. MIN a. r(s,a))" + by (auto simp: cInf_eq_Min) + +lemma mpi_user_code[code]: "mpi_user eps m = + (if eps \ 0 then undefined else + let (d, v) = mpi_partial' eps d0' v0_mpi\<^sub>b m in (Rep_Fun d, v))" + unfolding mpi_user_def case_prod_beta mpi_partial'_def + using mpi_partial_eq_algo A_ne v0_mpi\<^sub>b_le_\\<^sub>b d0_dec_det + by (auto simp: d0'.rep_eq d0_def[symmetric]) +end + +subsection \Auxiliary Equations\ +lemma [code_unfold]: "dist (f::'a::finite \\<^sub>b 'b::metric_space) g = (MAX a. dist (apply_bfun f a) (g a))" + by (auto simp: dist_bfun_def cSup_eq_Max) + +lemma member_code[code del]: "x \ List.coset xs \ \ List.member xs x" + by (auto simp: in_set_member) + +lemma [code]: "iarray_to_vec v + iarray_to_vec u = (Matrix_To_IArray.iarray_to_vec (Rep_iarray_type v + Rep_iarray_type u))" + by (metis (no_types, lifting) Matrix_To_IArray.vec_to_iarray_def iarray_to_vec_vec_to_iarray vec_to_iarray.rep_eq vec_to_iarray_inv vec_to_iarray_plus) + +lemma [code]: "iarray_to_vec v - iarray_to_vec u = (Matrix_To_IArray.iarray_to_vec (Rep_iarray_type v - Rep_iarray_type u))" + unfolding minus_iarray_def Matrix_To_IArray.iarray_to_vec_def iarray_to_vec_def + by (auto simp: vec_eq_iff to_nat_less_card) + +lemma matrix_to_iarray_minus[code_unfold]: "matrix_to_iarray (A - B) = matrix_to_iarray A - matrix_to_iarray B" + unfolding matrix_to_iarray_def o_def minus_iarray_def Matrix_To_IArray.vec_to_iarray_def + by simp + +declare matrix_to_iarray_fst_Gauss_Jordan_PA[code_unfold] + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/code/Code_Mod.thy b/thys/MDP-Algorithms/code/Code_Mod.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/code/Code_Mod.thy @@ -0,0 +1,109 @@ +theory Code_Mod + imports Code_DP +begin +section \Code Generation for Concrete Finite MDPs\ + +locale mod_MDP = + fixes transition :: "'s::{enum, mod_type} \ 'a::{enum, mod_type} \ 's pmf" + and A :: "'s \ 'a set" + and reward :: "'s \ 'a \ real" + and discount :: "real" +begin + +sublocale mdp: vi_code + "\s. (if Set.is_empty (A s) then UNIV else A s)" + transition + reward + "(if 1 \ discount \ discount < 0 then 0 else discount)" + defines \\<^sub>b = mdp.\\<^sub>b + and L_det = mdp.L_det + and value_iteration = mdp.value_iteration + and vi_policy' = mdp.vi_policy' + and find_policy' = mdp.find_policy' + and find_policy_impl = mdp.find_policy_impl + and is_opt_act = mdp.is_opt_act + and value_iteration_partial = mdp.value_iteration_partial + and policy_iteration = mdp.policy_iteration + and is_dec_det = mdp.is_dec_det + and policy_step = mdp.policy_step + and policy_improvement = mdp.policy_improvement + and policy_eval = mdp.policy_eval + and mk_markovian = mdp.mk_markovian + and policy_eval' = mdp.policy_eval' + and policy_iteration_partial' = mdp.policy_iteration_partial' + and policy_iteration' = mdp.policy_iteration' + and policy_iteration_policy_step' = mdp.policy_step' + and policy_iteration_policy_eval' = mdp.policy_eval' + and policy_iteration_policy_improvement' = mdp.policy_improvement' + and gs_iteration = mdp.gs_iteration + and gs_iteration_partial = mdp.gs_iteration_partial + and vi_gs_policy = mdp.vi_gs_policy + and opt_policy_gs = mdp.opt_policy_gs + and opt_policy_gs'' = mdp.opt_policy_gs'' + and P_mat = mdp.P_mat + and r_vec' = mdp.r_vec' + and GS_rec_fun\<^sub>b = mdp.GS_rec_fun\<^sub>b + and GS_iter_max = mdp.GS_iter_max + and GS_iter = mdp.GS_iter + and mpi_user = mdp.mpi_user + and v0_mpi\<^sub>b = mdp.v0_mpi\<^sub>b + and mpi_partial' = mdp.mpi_partial' + and L_pow = mdp.L_pow + and v0_mpi = mdp.v0_mpi + and r_min = mdp.r_min + and d0 = mdp.d0 + and d0' = mdp.d0' + and \\<^sub>b = mdp.\\<^sub>b + and vi_test = mdp.vi_test + by unfold_locales (auto simp add: Set.is_empty_def) +end + +global_interpretation mod_MDP transition A reward discount + for transition A reward discount + defines mod_MDP_\\<^sub>b = mdp.\\<^sub>b + and mod_MDP_\\<^sub>b_L_det = mdp.L_det + and mod_MDP_value_iteration = mdp.value_iteration + and mod_MDP_vi_policy' = mdp.vi_policy' + and mod_MDP_find_policy' = mdp.find_policy' + and mod_MDP_find_policy_impl = mdp.find_policy_impl + and mod_MDP_is_opt_act = mdp.is_opt_act + and mod_MDP_value_iteration_partial = mdp.value_iteration_partial + and mod_MDP_policy_iteration = mdp.policy_iteration + and mod_MDP_is_dec_det = mdp.is_dec_det + and mod_MDP_policy_step = mdp.policy_step + and mod_MDP_policy_improvement = mdp.policy_improvement + and mod_MDP_policy_eval = mdp.policy_eval + and mod_MDP_mk_markovian = mdp.mk_markovian + and mod_MDP_policy_eval' = mdp.policy_eval' + and mod_MDP_policy_iteration_partial' = mdp.policy_iteration_partial' + and mod_MDP_policy_iteration' = mdp.policy_iteration' + and mod_MDP_policy_iteration_policy_step' = mdp.policy_step' + and mod_MDP_policy_iteration_policy_eval' = mdp.policy_eval' + and mod_MDP_policy_iteration_policy_improvement' = mdp.policy_improvement' + and mod_MDP_gs_iteration = mdp.gs_iteration + and mod_MDP_gs_iteration_partial = mdp.gs_iteration_partial + and mod_MDP_vi_gs_policy = mdp.vi_gs_policy + and mod_MDP_opt_policy_gs = mdp.opt_policy_gs + and mod_MDP_opt_policy_gs'' = mdp.opt_policy_gs'' + and mod_MDP_P_mat = mdp.P_mat + and mod_MDP_r_vec' = mdp.r_vec' + and mod_MDP_GS_rec_fun\<^sub>b = mdp.GS_rec_fun\<^sub>b + and mod_MDP_GS_iter_max = mdp.GS_iter_max + and mod_MDP_GS_iter = mdp.GS_iter + and mod_MDP_mpi_user = mdp.mpi_user + and mod_MDP_v0_mpi\<^sub>b = mdp.v0_mpi\<^sub>b + and mod_MDP_mpi_partial' = mdp.mpi_partial' + and mod_MDP_L_pow = mdp.L_pow + and mod_MDP_v0_mpi = mdp.v0_mpi + and mod_MDP_r_min = mdp.r_min + and mod_MDP_d0 = mdp.d0 + and mod_MDP_d0' = mdp.d0' + and mod_MDP_\\<^sub>b = mdp.\\<^sub>b + and mod_MDP_vi_test = mdp.vi_test + . + +(* +value "mod_MDP_vi_gs_policy (\_::(2\2). return_pmf (1::2)) (\_. {}) (\_. 0) 0.5 0.5 (vec_to_bfun (\ i. 1)) 0" +*) + +end diff --git a/thys/MDP-Algorithms/code/Code_Real_Approx_By_Float_Fix.thy b/thys/MDP-Algorithms/code/Code_Real_Approx_By_Float_Fix.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/code/Code_Real_Approx_By_Float_Fix.thy @@ -0,0 +1,59 @@ +theory Code_Real_Approx_By_Float_Fix + imports + "HOL-Library.Code_Real_Approx_By_Float" + "Gauss_Jordan.Code_Real_Approx_By_Float_Haskell" +begin +(*<*) +section \Fix for Floating Point Approximation using Haskell\ + +code_printing + type_constructor real \ (Haskell) "Prelude.Double" (*Double precision*) + | constant "0 :: real" \ (Haskell) "0.0" + | constant "1 :: real" \ (Haskell) "1.0" + | constant "real_of_integer" \ (Haskell) "Prelude.fromIntegral (_)" + | class_instance real :: "HOL.equal" => (Haskell) - (*This is necessary. See the tutorial on code generation, page 29*) + | constant "HOL.equal :: real \ real \ bool" \ + (Haskell) "_ == _" + | constant "(<) :: real => real => bool" \ + (Haskell) "_ < _" + | constant "($$ :: real => real => bool" \ + (Haskell) "_ <= _" + | constant "(+) :: real \ real \ real" \ + (Haskell) "_ + _" + | constant "(-) :: real \ real \ real" \ + (Haskell) "_ - _" + | constant "(*) :: real \ real \ real" \ + (Haskell) "_ * _" + | constant "(/) :: real \ real \ real" \ + (Haskell) "_ '/ _" + | constant "uminus :: real => real" \ + (Haskell) "Prelude.negate" + | constant "sqrt :: real => real" \ + (Haskell) "Prelude.sqrt" + | constant Code_Real_Approx_By_Float.real_exp \ + (Haskell) "Prelude.exp" + | constant ln \ + (Haskell) "Prelude.log" + | constant cos \ + (Haskell) "Prelude.cos" + | constant sin \ + (Haskell) "Prelude.sin" + | constant tan \ + (Haskell) "Prelude.tan" + | constant pi \ + (Haskell) "Prelude.pi" + | constant arctan \ + (Haskell) "Prelude.atan" + | constant arccos \ + (Haskell) "Prelude.acos" + | constant arcsin \ + (Haskell) "Prelude.asin" + +code_printing + constant Realfract \ (Haskell) + "(Prelude.fromIntegral (integer'_of'_int _) '/ Prelude.fromIntegral (integer'_of'_int _))" + +code_printing + constant Realfract \ (SML) "(Real.fromInt (IntInf.toInt (integer'_of'_int _))) '// Real.fromInt (IntInf.toInt (integer'_of'_int _))" +(*>*) +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/document/root.bib b/thys/MDP-Algorithms/document/root.bib new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/document/root.bib @@ -0,0 +1,13 @@ +@book{Puterman, + author = {Martin L. Puterman}, + title = {Markov Decision Processes: Discrete Stochastic Dynamic Programming}, + series = {Wiley Series in Probability and Statistics}, + publisher = {Wiley}, + year = {1994}, + url = {https://doi.org/10.1002/9780470316887}, + doi = {10.1002/9780470316887}, + isbn = {978-0-47161977-2}, + timestamp = {Mon, 22 Jul 2019 15:00:49 +0200}, + biburl = {https://dblp.org/rec/books/wi/Puterman94.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} diff --git a/thys/MDP-Algorithms/document/root.tex b/thys/MDP-Algorithms/document/root.tex new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/document/root.tex @@ -0,0 +1,69 @@ +\documentclass[11pt,a4paper]{article} +\usepackage{isabelle,isabellesym} + +% further packages required for unusual symbols (see also +% isabellesym.sty), use only when needed + +\usepackage{amssymb, amsmath, amsfonts} + %for \, \, \, \, \, \, + %\, \, \, \, \, + %\, \, \ + +%\usepackage{eurosym} + %for \ + +\usepackage[only,bigsqcap]{stmaryrd} + %for \ + +%\usepackage{eufrak} + %for \ ... \, \ ... \ (also included in amssymb) + +%\usepackage{textcomp} + %for \, \, \, \, \, + %\ + +% this should be the last package used +\usepackage{pdfsetup} + +% urls in roman style, theory text in math-similar italics +\urlstyle{rm} +\isabellestyle{it} + +% for uniform font size +%\renewcommand{\isastyle}{\isastyleminor} + + +\begin{document} + +\title{Verified Algorithms for Solving Markov Decision Processes} +\author{Maximilian Schäffeler and Mohammad Abdulaziz} +\maketitle + +\abstract{ +We present a formalization of algorithms for solving Markov Decision Processes (MDPs) with formal guarantees on the optimality of their +solutions. +In particular we build on our analysis of the Bellman operator for discounted infinite horizon MDPs. +From the iterator rule on the Bellman operator we directly derive executable value iteration and policy iteration algorithms to iteratively solve finite MDPs. +We also prove correct optimized versions of value iteration that use matrix splittings to improve the convergence rate. In particular, we formally verify Gauss-Seidel value iteration and modified policy iteration. +The algorithms are evaluated on two standard examples from the literature, namely, inventory management and gridworld. +Our formalization covers most of chapter 6 in Puterman's book \cite{Puterman}. +} + +\tableofcontents + +% sane default for proof documents +\parindent 0pt\parskip 0.5ex + +% generated text of all theories +\input{session} + +% optional bibliography +\bibliographystyle{abbrv} +\bibliography{root} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/thys/MDP-Algorithms/examples/Code_Gridworld.thy b/thys/MDP-Algorithms/examples/Code_Gridworld.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/examples/Code_Gridworld.thy @@ -0,0 +1,142 @@ +(* Author: Maximilian Schäffeler *) + +theory Code_Gridworld + imports + Code_Mod +begin +section \Gridworld Example\ + +lemma [code abstype]: "embed_pmf (pmf P) = P" + by (metis (no_types, lifting) td_pmf_embed_pmf type_definition_def) + +lemmas [code_abbrev del] = pmf_integral_code_unfold + +lemma [code_unfold]: + "measure_pmf.expectation P (f :: 'a :: enum \ real) = (\x \ UNIV. pmf P x * f x)" + by (metis (no_types, lifting) UNIV_I finite_class.finite_UNIV integral_measure_pmf + real_scaleR_def sum.cong) + +lemma [code]: "pmf (return_pmf x) = (\y. indicat_real {y} x) " + by auto + +lemma [code]: + "pmf (bind_pmf N f) = (\i :: 'a. measure_pmf.expectation N (\(x :: 'b ::enum). pmf (f x) i))" + using Probability_Mass_Function.pmf_bind + by fast + +(* 3 x 4 + 1 * Trap *) +type_synonym state_robot = "13" + +definition "from_state x = (Rep x div 4, Rep x mod 4)" +definition "to_state x = (Abs (fst x * 4 + snd x) :: state_robot)" + +(* up, right, down, left *) +type_synonym action_robot = 4 + +fun A_robot :: "state_robot \ action_robot set" where + "A_robot pos = UNIV" + +abbreviation "noise \ (0.2 :: real)" + +lift_definition add_noise :: "action_robot \ action_robot pmf" is "\det rnd. ( + if det = rnd then 1 - noise else if det = rnd - 1 \ det = rnd + 1 then noise / 2 else 0)" + subgoal for n + unfolding nn_integral_count_space_finite[OF finite] UNIV_4 + using exhaust_4[of n] + by fastforce + done + +fun r_robot :: "(state_robot \ action_robot) \ real" where + "r_robot (s,a) = ( + if from_state s = (2,3) then 1 else + if from_state s = (1,3) then -1 else + if from_state s = (3,0) then 0 else + 0)" + +fun K_robot :: "(state_robot \ action_robot) \ state_robot pmf" where + "K_robot (loc, a) = + do { + a \ add_noise a; + let (y, x) = from_state loc; + let (y', x') = + (if a = 0 then (y + 1 , x) + else if a = 1 then (y, x+1) + else if a = 2 then (y-1, x) + else if a = 3 then (y, x-1) + else undefined); + return_pmf ( + if (y,x) = (2,3) \ (y,x) = (1,3) \ (y,x) = (3,0) + then to_state (3,0) + else if y' < 0 \ y' > 2 \ x' < 0 \ x' > 3 \ (y',x') = (1,1) + then to_state (y, x) + else to_state (y', x')) + }" + +definition "l_robot = 0.9" + +lemma "vi_code A_robot r_robot l_robot" + by standard (auto simp: l_robot_def) + + +abbreviation "to_gridworld f \ f K_robot r_robot l_robot" +abbreviation "to_gridworld' f \ f K_robot A_robot r_robot l_robot" + +abbreviation "gridworld_policy_eval' \ to_gridworld mod_MDP_policy_eval'" +abbreviation "gridworld_policy_step' \ to_gridworld' mod_MDP_policy_iteration_policy_step'" +abbreviation "gridworld_mpi_user \ to_gridworld' mod_MDP_mpi_user" +abbreviation "gridworld_opt_policy_gs \ to_gridworld' mod_MDP_opt_policy_gs" +abbreviation "gridworld_\\<^sub>b \ to_gridworld' mod_MDP_\\<^sub>b" +abbreviation "gridworld_find_policy' \ to_gridworld' mod_MDP_find_policy'" +abbreviation "gridworld_GS_rec_fun\<^sub>b \ to_gridworld' mod_MDP_GS_rec_fun\<^sub>b" +abbreviation "gridworld_vi_policy' \ to_gridworld' mod_MDP_vi_policy'" +abbreviation "gridworld_vi_gs_policy \ to_gridworld' mod_MDP_vi_gs_policy" +abbreviation "gridworld_policy_iteration \ to_gridworld' mod_MDP_policy_iteration" + + +fun pi_robot_n where + "pi_robot_n 0 d = (d, gridworld_policy_eval' d)" | + "pi_robot_n (Suc n) d = pi_robot_n n (gridworld_policy_step' d)" + +definition "mpi_robot eps = gridworld_mpi_user eps (\_. 3)" + +fun gs_robot_n where + "gs_robot_n (0 :: nat) v = (gridworld_opt_policy_gs v, v)" | + "gs_robot_n (Suc n :: nat) v = gs_robot_n n (gridworld_GS_rec_fun\<^sub>b v)" + +fun vi_robot_n where + "vi_robot_n (0 :: nat) v = (gridworld_find_policy' v, v)" | + "vi_robot_n (Suc n :: nat) v = vi_robot_n n (gridworld_\\<^sub>b v)" + +definition "mpi_result eps = + (let (d, v) = mpi_robot eps in (d,v))" + +definition "gs_result n = + (let (d,v) = gs_robot_n n 0 in (d,v))" + +definition "vi_result_n n = + (let (d, v) = vi_robot_n n 0 in (d,v))" + +definition "pi_result_n n = + (let (d, v) = pi_robot_n n (vec_to_fun 0) in (d,v))" + +definition "fun_to_list f = map f (sorted_list_of_set UNIV)" + +definition "benchmark_gs = fun_to_list (gridworld_vi_policy' 0.1 0)" +definition "benchmark_vi = fun_to_list (gridworld_vi_gs_policy 0.1 0)" +definition "benchmark_mpi = fun_to_list (fst (gridworld_mpi_user 0.1 (\_ _. 3)))" +definition "benchmark_pi = fun_to_list (gridworld_policy_iteration 0)" + +(* +value [code] "gs_result 20" +value [code] "mpi_result 0.1" +value [code] "vi_result_n 20" +value [code] "pi_result_n 3" +value "benchmark_gs" +value "benchmark_vi" +value "benchmark_mpi" +value "benchmark_pi" +*) + +export_code benchmark_gs benchmark_vi benchmark_mpi benchmark_pi in Haskell module_name DP +export_code benchmark_gs benchmark_vi benchmark_mpi benchmark_pi in SML module_name DP +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/examples/Code_Inventory.thy b/thys/MDP-Algorithms/examples/Code_Inventory.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/examples/Code_Inventory.thy @@ -0,0 +1,197 @@ +(* Author: Maximilian Schäffeler *) + +theory Code_Inventory + imports + Code_Mod + (* Remove for precise results, approximates real numbers by floats, UNSOUND! *) + Code_Real_Approx_By_Float_Fix +begin + +section \Inventory Management Example\ + +lemma [code abstype]: "embed_pmf (pmf P) = P" + by (metis (no_types, lifting) td_pmf_embed_pmf type_definition_def) + +lemmas [code_abbrev del] = pmf_integral_code_unfold + +lemma [code_unfold]: + "measure_pmf.expectation P (f :: 'a :: enum \ real) = (\x \ UNIV. pmf P x * f x)" + by (metis (no_types, lifting) UNIV_I finite_class.finite_UNIV integral_measure_pmf + real_scaleR_def sum.cong) + +lemma [code]: "pmf (return_pmf x) = (\y. indicat_real {y} x) " + by auto + +lemma [code]: + "pmf (bind_pmf N f) = (\i :: 'a. measure_pmf.expectation N (\(x :: 'b ::enum). pmf (f x) i))" + using Probability_Mass_Function.pmf_bind + by fast + +lemma pmf_finite_le: "finite (X :: ('a::finite) set) \ sum (pmf p) X \ 1" + by (subst sum_pmf_eq_1[symmetric, of UNIV p]) (auto intro: sum_mono2) + +lemma mod_less_diff: + assumes "0 < (x::'s::{mod_type})" "x \ y" + shows "y - x < y" +proof - + have "0 \ Rep y - Rep x" + using assms mono_Rep unfolding mono_def + by auto + have 1: "Rep y - Rep x = Rep (y - x)" + unfolding mod_type_class.diff_def Rep_Abs_mod + using Rep_ge_0 + by (auto intro!: mod_pos_pos_trivial[OF \0 \ Rep y - Rep x\ order.strict_trans1[OF _ Rep_less_n, of _ y], symmetric]) + have "Rep y - Rep x < Rep y" + using assms(1) strict_mono_Rep Rep_ge_0 le_less not_less + by (fastforce simp: strict_mono_def) + hence "Rep (y - x) < Rep y" + unfolding 1 by blast + thus ?thesis + by (metis not_less_iff_gr_or_eq strict_mono_Rep strict_mono_def) +qed + +locale inventory = + fixes fixed_cost :: real + and var_cost :: "'s::{mod_type, finite} \ real" + and inv_cost :: "'s \ real" + and demand_prob :: "'s pmf" + and revenue :: "'s \ real" + and discount :: "real" +begin +definition "order_cost u = (if u = 0 then 0 else fixed_cost + var_cost u)" +definition "prob_ge_inv u = 1 - (\jj exp_rev (s + a) - order_cost a - inv_cost (s + a))" +lift_definition transition :: "('s \ 's) \ 's pmf" is "\(s, a) s'. + (if CARD('s) \ Rep s + Rep a + then (if s' = 0 then 1 else 0) + else (if s + a < s' then 0 else + if s' = 0 then prob_ge_inv (s+a) + else pmf demand_prob (s + a - s'))) + " +proof (safe, goal_cases) + case (1 a b x) + then show ?case unfolding prob_ge_inv_def using pmf_finite_le by auto +next + case (2 a b) + then show ?case + proof (cases "int CARD('s) \ Rep a + Rep b") next + case False + hence "(\\<^sup>+ x. ennreal (if int CARD('s) \ Rep a + Rep b then if x = 0 then 1 else 0 else if a + b < x then 0 else if x = 0 then prob_ge_inv (a + b) else pmf demand_prob (a + b - x)) \count_space UNIV) = + (\x \ UNIV. (if a + b < x then 0 else if x = 0 then prob_ge_inv (a + b) else pmf demand_prob (a + b - x)))" + using pmf_nonneg prob_ge_inv_def pmf_finite_le + by (auto simp: nn_integral_count_space_finite intro!: sum_ennreal) + also have "\ =(\x \ UNIV. (if x = 0 then prob_ge_inv (a + b) else if a + b < x then 0 else pmf demand_prob (a + b - x)))" + by (auto intro!: sum.cong ennreal_cong simp add: leD least_mod_type) + also have "\ = prob_ge_inv (a + b) + (\x \ UNIV-{0}. (if a + b < x then 0 else pmf demand_prob (a + b - x)))" + by (auto simp: sum.remove[of UNIV 0]) + also have "\ = prob_ge_inv (a + b) + (\x\{0<..}. (if a + b < x then 0 else pmf demand_prob (a + b - x)))" + by (auto simp add: greaterThan_def le_neq_trans least_mod_type intro!: cong[of "sum _"] ennreal_cong) + also have "\ = prob_ge_inv (a + b) + (\x\{0<..a+b}. (pmf demand_prob (a + b - x)))" + unfolding atMost_def greaterThan_def greaterThanAtMost_def + by (auto simp: Collect_neg_eq[symmetric] not_less sum.If_cases) + also have "\ = 1 - (\j<(a + b). pmf demand_prob j) + (\x\{0<..a+b}. pmf demand_prob (a + b - x))" + unfolding prob_ge_inv_def + by auto + also have "\ = 1 - (\j<(a + b). pmf demand_prob j) + (\x\{..x\{0<..a+b}. pmf demand_prob (a + b - x)) = (\x\{.. 0 < a + b - x" for x + by (metis add.left_neutral diff_add_cancel least_mod_type less_le) + moreover have "x < a + b \ a + b - x \ a + b" for x + by (metis diff_0_right least_mod_type less_le mod_less_diff not_less) + ultimately have "x < a + b \ \xa. x = a + b - xa \ 0 < xa \ xa \ a + b" for x + by (auto simp: algebra_simps intro: exI[of _ "a + b - x"]) + thus "(-) (a + b)  {0<..a + b} = {.. = 1" + by auto + finally show ?thesis. + qed (simp add: nn_integral_count_space_finite) +qed + +definition "A_inv (s::'s) = {a::'s. Rep s + Rep a < CARD('s)}" + +end + +definition "var_cost_lin (c::real) n = c * Rep n" +definition "inv_cost_lin (c::real) n = c * Rep n" +definition "revenue_lin (c::real) n = c * Rep n" + +lift_definition demand_unif :: "('a::finite) pmf" is "\_. 1 / card (UNIV::'a set)" + by (auto simp: ennreal_divide_times divide_ennreal[symmetric] ennreal_of_nat_eq_real_of_nat) + +lift_definition demand_three :: "3 pmf" is "\i. if i = 1 then 1/4 else if i = 2 then 1/2 else 1/4" +proof - + have *: "(UNIV :: (3 set)) = {0,1,2}" + using exhaust_3 + by fastforce + show ?thesis + apply (auto simp: nn_integral_count_space_finite) + unfolding * + by auto +qed + +abbreviation "fixed_cost \ 4" +abbreviation "var_cost \ var_cost_lin 2" +abbreviation "inv_cost \ inv_cost_lin 1" +abbreviation "revenue \ revenue_lin 8" +abbreviation "discount \ 0.99" +type_synonym capacity = "30" + +lemma card_ge_2_imp_ne: "CARD('a) \ 2 \ \(x::'a::finite) y::'a. x \ y" + by (meson card_2_iff' ex_card) + +global_interpretation inventory_ex: inventory fixed_cost "var_cost:: capacity \ real" inv_cost demand_unif revenue discount + defines A_inv = inventory_ex.A_inv + and transition = inventory_ex.transition + and reward = inventory_ex.reward + and prob_ge_inv = inventory_ex.prob_ge_inv + and order_cost = inventory_ex.order_cost + and exp_rev = inventory_ex.exp_rev. + +abbreviation "K \ inventory_ex.transition" +abbreviation "A \ inventory_ex.A_inv" +abbreviation "r \ inventory_ex.reward" +abbreviation "l \ 0.95" +definition "eps = 0.1" + +definition "fun_to_list f = map f (sorted_list_of_set UNIV)" +definition "benchmark_gs (_ :: unit) = map Rep (fun_to_list (vi_policy' K A r l eps 0))" +definition "benchmark_vi (_ :: unit) = map Rep (fun_to_list (vi_gs_policy K A r l eps 0))" +definition "benchmark_mpi (_ :: unit ) = map Rep (fun_to_list (fst (mpi_user K A r l eps (\_ _. 3))))" +definition "benchmark_pi (_ :: unit) = map Rep (fun_to_list (policy_iteration K A r l 0))" + +fun vs_n where + "vs_n 0 v = v" +| "vs_n (Suc n) v = vs_n n (mod_MDP_\\<^sub>b K A r l v)" + +definition "vs_n' n = vs_n n 0" + +definition "benchmark_vi_n n = (fun_to_list (vs_n n 0))" +definition "benchmark_vi_nopol = (fun_to_list (mod_MDP_value_iteration K A r l (1/10) 0))" + +(* +value "benchmark_gs ()" +value "benchmark_vi ()" +value "benchmark_pi ()" +value "benchmark_mpi ()" +*) + + +export_code dist vs_n' benchmark_vi_nopol benchmark_vi_n nat_of_integer integer_of_int benchmark_gs benchmark_vi benchmark_mpi benchmark_pi + in Haskell module_name DP + +export_code integer_of_int benchmark_gs benchmark_vi benchmark_mpi benchmark_pi in SML module_name DP + +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/examples/Examples.thy b/thys/MDP-Algorithms/examples/Examples.thy new file mode 100644 --- /dev/null +++ b/thys/MDP-Algorithms/examples/Examples.thy @@ -0,0 +1,8 @@ +(* Author: Maximilian Schäffeler *) + +theory Examples + imports + Code_Inventory + Code_Gridworld +begin +end \ No newline at end of file diff --git a/thys/MDP-Algorithms/output/document.pdf b/thys/MDP-Algorithms/output/document.pdf new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f79aa81fe9ef01b8402802d0652cb0cc4209582b GIT binary patch literal 531590 zc$|#eLvSv@+BWLgwr$&4v2EM7?W}Ocwr$%^-q^Nn{rmgR8JxkcI$uwouAcOytLu91 z>Lynd6{lxrV22@}ULIH)Spy7@!Z35O5-|}u7+b^e@ewhKTiLprIsd2H8o8Q@nwdD5 znhO|n%P^pSx7^voaI?9t*o3>X(zXEP%^7|)GmoUXVnj~-Pa@i=8aqK@qBOqnf ztAdkzon&^pB@@4x#N2p^g;d)gwuHub2pc8iUVR73vC8FCkHsbraw|U8EGHlXKAy#u@|%%xrPN!Z8KA08zR>XRB&A-_IQuB-DBr35vTA2i^XiGd%9)^_E z_B_lv%7_(F9uomJ@7)(LOL>KM!z?>2Ql{O&;mayu-1wqS-QvKdSOyVm*29 z;*0+$?sFgwR9$EOg9A^QriY@&|E0SVD@g5{3lgsJy!f+1x=^^TgJGsFL+ zDd8eO0zv(pBn%+RB3u-&LWX1NrY*0L?oI8I-m%ckrb59*Osl)n51ANQ2%&dl*QG; zCsJuf#YmdkHg*YJBPf|~CZoHC9y#> zI!67Ezp<35pSj{V1x;?f)yR%64nlN*i?ZXum7at8FKTL=@e0r@44(w6q4VTZN0;2>xBYG)sLA=ma3 zV@aj$a<8YZIPh!1PQq$%EOA*Ib+pMTJhc2Qy(aje>$pwR*+IzlbwWAfMJmmjYM)t z(w|?Oj6(?6zj}FkoXnLa{%~%)U_31|5^@PVnBJ_2)O>PR zyEt7l%PK)Q<7#q@igTHN|g*KRg{<(jXA-vaPgi zwi?in8S>^%5WMtGboLfeEA8PuPV-7eZYV>UTuw<_xrl9x^YO7DeWi*mAB$wg*yYMyue7M!fV zwOxIZqZ0A;W=DHB##L)4qX6DH5{E{%DGYL}>n5&yU@Qqn4qDs@mb+$hJUTL>?HJI- zoHq*_*%ZsOvp2X~o$O&(0lMRF{dq zVKuG}S8q_fV)Qrd2tIUhR3td5R#ODX2*%IMt(f{XC2Y!}bk6h~!M|E_g-o2=oDzu z4esBHEOO88XlXANt9sA$yFnpA3JI4{6El5U!Pw4@qGc6Tcp8vXW};zLc*KZp&I^M z@WhvKrM%)22{20-93eod&IC|{OIgVU<-kpSKwDZ(KNbs=jAo))}l1&uIOCNQN~n zx|i$}0xCv{_cJ!hdnV(SOVG@6w6S#P1$PVB5r{e{iH6Ur@;Gfb2#IM5FZDN&Nn%Uj zUUBlz>EuO4ey-&^@&3tCN89OtmcoccKI^xhB(yuRh*#-0?@NVuE=OcGVYYH4@Rd z5azU&$n?U=q6tn8Md}Ibv-$(c(APVd1=m*oFkdHSc%iu5wep>#;@Ep$UgOsfauJ z{TVr;NKK3@&oj9|iOjOr*-8&3irM%|g&l3xJmoc|Ar?9HB-cjj0jp#A@OoIC_)Mu0 z6^Va(6*LxF0>t&UfXVGqB5tk8H?{vUS+{GXNfRKyM-W!Vc|KiCG{$*NbsDiwui16 zyo26a!F+o5G0#0q*qbgRPQ$#lV={w%1ZjtF?8piu;iIu6-di#d^R;>ipCss3w9VUI z(BYMZ+CYvn-ncsJCI(#iJ_)|Zq$YK*&XF^@G7UC6Af%5L&fbaSj0J(kiUA^Wi+Q znpd{dOiFG^i%Z8^iMp2LO7p4CaulliScTgE#%~K zjBk+2pfvx&V_ywWuhR^ZrO^zIX9$N0zS;5Yqx3g&RUH#Ohsg-aLDkQbLV)zKKSS9G zd8J2(2(Du(_Hs27Hsj(~$?T+iYd)$dyYP?D^Ut=eIpFzYltd!TwWT#02w6RLMeh+1 z3)V?;_hcGtyjWaLpK{1p4*J0-*Fx>Vl5~DpLcGVEOBqB@2|YGR z?$LrOF0MpeY%KpjgkfT3)D~*4A>s)jn^(RfncP_rJP20$isp>aRX4DA1t8Q@}-bu!QR zmkmwJ@gw4ZttzUz%)nJDHZ!eWYTPy2*oWE8pc^} zWr!qe6$9pVtQZnDsAw!EQhdr!pPzRq&E%1xtqECWwy*%7v|J2t)!}j}4!=!g{C( zxs*#CB9BoW!^sb)7I%nWoy{^05|d~YW);KMx(g|k|fW!rJnvuH0!kDf; z_yUy(jE^D5pth~pvz3CD3T6j=iCi80*5vjY(6UJ8EhYzM5HZ)gW(BPO4$x(3H|IS zc}UN%*$+uS07?)Th-RKLMx{AyErf_u=okwb0ZAp=&#+RU)z7kW4nVDseO@UDJ32O{ zh7I})K7Ghl54AQXrBJF40LByS7q%Y&p>X!z2hZg$ng=1}JBWGcP>_I%qMMuS)1?* zh7suiN4rvvq$+@mM9ufDmw*jt z)a*|Ug1s=&LULpPCI;OCK?E>eg+ZnVUXk9x%8%zdI%+DoW}61d(&L?H4xocTqIaNZ_2y>)nGKXR(Vm18L}mlYNt;IC$1kkXhpc%ghUZiK7GRFwB#*BsMk zR0uU6-l1PL}f*o*Je&D9P~d*o{IR=91?mk^#ZNEe@1dzlxnDc%?sVhkh_SSCLGO zT#A_h5a$7EXh(mBmwz&0IyOkk_pF>=^Gbx>u4H^?8@(So)PqXE;Ux{Q%}eF;l;? zBWjhQ*TeN+8$cRym7VOt1MGK;+Ne!E3l*UQWb=pSNTSI3=dF#yW$aW;kwJ*9Ry zGt-B+7430k6bT1x7(-O!BQ(K6ptC)3+k$nRQyK)ut|}P{hi6k%yL=_vONThOypi-p zn!2UP7qvG0w9X{4xVzGv z9*OT;6T|6qiJC(|I8kDZH;-&~e z@;0Owp4*H(WuLZ2Rh!gvDeFgl8VQ0p+608X*_@|lFuG7m(GlEtX@P8%*=t$_oo-oK})K(OwJWJvQs(+!YIWdIV)~ zj-c;6JPw~<6v9*%z$m0-WhjqImzAbw4KY&1bdS_EhPlf953(2#L!fhne|C=o@K9_ z>Y()eY49P7gxiSZ1IJI+mQ~~{wV!+=IW}JXw3;n9&ty0T zdmQ+x*_|((Y6ITOFJJ#r6T1X9bS&v3~I%wz~@P<;_2+oSY{Jb8Hp^0-+w z&fDILl@QN$DEoCH;clBD8@esLJ8bC=8|H*%wSEaNnH;5#%lm<^YN1Xo?nPZRIqwSw z1R5Ee67OH_QT=wvuCJXETaZJizb3nqdNl>r(w-5l4ffdC}DR3l=5;Jy2vY6X%dinSNQ1-(H>x*!+ zgfLS*=H=GFasES;yFz$IL-t7$7zewwy)ln^GALOJ5siWd4k>vJU9bI)H5~awR|X- zTR2$Vee!M0RI$M8s&A(Q_YNr@u#X}IE@qR+_ zzF!9_;M;R}oTM*{|8p;JxN6Y5Bgg0=kd20i=YrGB*Zt*1Iq2eqq|7pGT=A?b}=-y z8(~1mCE?NSk$;xud!*~TugmN0@&TVcBuaXwGIal%mN$9gCHFTRJM1v?gSi^lf-XUd zElLcB-9ooA#JxWhJw<#rrGyjj798?!7^OfOaH_#L*i5k=Xn4x5d6Gog+p>ZH(1Gol$_m6ETvi6N0|F;*{tr!jEjcm| zf+fc32}e;HRgug77eUaARnSMuQ?O+Nw7eB=dK%~11Xy*m=ow(f#NR$ZGttIrkDkE zTGt?Yh*gQr{R#G_Z*k4CJ<_hMg}&o)geIu_v1}azB628U!3gcZfkaKDq<9rEfg2 ze*nIh8-0gEE@A<@#v$Puh{fgxlj#MKKOT<1*@KI9?eCPUK}^K@DI;7U^>sber7;Z zgditJu<7Fm&SY<5O8Sq3DHdx6lwqU1h8EcenikCP#yAcJMz@luaT9U#x|k_f{F1F zY}8%pL+B_qw#KUh=%c+=F(zUuvL_7i9YK$sclPW=+2PzK=f*0TVKzi?;RbBO;-}3)5lf~O^WrOoMbCJQ(@cyAmz;TacngdH z@xZVCCKX$+}5JO8sVd^$Oza*xQgOEYag z+2$(&u|ykKB4x~0skU=gK3Y?<6=HGu*cqji9mrcsc-#!T1)MyK%py9NAA~9DaeyA zIQTywG(lii-8dKkS=&F9HN);7{^kDt0i_@Mq>Q+M-hlx9M5t1$)1Y976+DL+gSlt_ zFER9S-7>+8fby(3c<<7O3Y__Y?Yl9(4OY&MOJyAs#8{^7vRgOAd%+PkG*WDAhXdK zIRJ-O(LaVW5h(Mb+zykxr&LLKf?x=Uv-KuzJC=F%rgl+tPIl@@AP2X z>unNNy1N6lQal+ezELnUwOnmb5Hn=uze z_B~rD&oT%=sm;X+O&?FVAFm%Lw9b*rQ2!BQVxko2202tw<<6mJ zf)qbq}BOaxNNAMLF9J0&no2!;gET}^2FfdglBz&(EsWm3j{Z7cPY$dDf6Y; zVOY;ajme_X=PftgOX6K%YEgI<5V+~!3|>Vg{gX35PsQx^A*L6-6d!z-yft4qln1& z{am=V15}L|Yme{GWURTEElW;CtGjTHHae&lAAX~Nf976qTWs|u zC7Zuz??|463BlUbiK17Afi+*0qNWO4x?>RA3tpLp&tH6f_|}dYJ-M$dl{o3aLY&f(u0k1iSqM6XlH;(WRmJCaK5p9{PcPt91qDvF9g_mrc9 ze;Ju@A{c6oBg&awh3{3qTocWwMFtHVw1=iWk_d{ib*N6iw@1Vr!yv6cbLU1WVK~ zWZA+?n}qv)C#CT87Z{k!{mJ$Cl8Ho?=_QLJ$_tGrTM{=8k1Lx@R6Ev+<+DE61JJS(TRYkIX8T zz#q12Jx^B4R-^ZO+DX~evN*^q|7lryy9qlnO=ke4u5$_~R&NDVu6~}zPVKgmVJA0k zlu7DOyWeigT*WIM}E~xv>H64XEu^2t35Qkl=!}RkLg{djkG%=c->aqMK^>x76iHr zG1v%T&X*lTOT)8<*LHTJlZSg!eG~s>3%Q)StSG9UXE$WipV;7+RF;0H=-Xu+qn@;c zAJcItiE0kwjB{EZ@&r@!1g)#m;7r7EjX;x>H741)$fXoexoH zrTY4U$tuiUfvWqi#i4R{QNsUFs!|#CK{-ovM z<%CctQ&Je?VB*Jzglc@UAI&04s($$0{8y=*LQqoZx_%Gb5rX>{zo(+0!@uH@8^* z@8?DSM!=xA=krmcDmNKT=^#L1_vc%FpYj1Z51TewRY4!8pr7CG^LDW=ulVUg2Ivd= zV+!&R{ioERShY{mO8@-tFeSr5pT-yEqe_cFSlyfG;q92EP3xJ^Kl5mr)!|WyuqB5 zyfGVA*^+NI1q{_6+P!hikJ;HMhICJ>^+IHZRN;B9Hu0&&HxvapU<{FkEp@; zF!(>TlifGTRNvx*N;8+zFrav1xKH6(P!w^QjOkCQLB^+dn%ebkuA})WMGzD%{;oU@9cimg^w-gPutiR?n_IgqVMvM40_Z0rfeyD_1!h@ zJMFyizs^ND7F|A8zPS&>;_J1(1ngfU8@0LYoFY+*UgjQx~r2we5=RV{^B>a=R<3Z%xqnaM8G(M!zAsmPXK9q(?++f7=>2X3sz4!G z0cp?Gt5(6tKM(mu^)kfF5pby)IzXp54H!Lo)QP}{H^=sQwh;rTMasluvNeS2!cEs~ z9x}SJKYGG^=iJ!eL@~FrY-OQINR?(xkn0hc6^e^CKLp}4ilPNhw7*@hC}R!DwE~C zf_mBf!e21~2M}r;E+I zmHy%d&tqABKu-yv4NYn%w*NhLsx!~zX(gzq9jypUqq-P+0RoCB7K6pV(Ghp-nucdt zA*RX>dAt2sldU20M_|{fGzD3EER~b+)@zZ74% z9acKa9)s9991#Jmnd5p8#2KpPR)sn^Wpk)?>Fcj|Z<{YQ-oa4#Tim}^xfxsibEXkz z_W9l7lo&nI?X|>~kE!dz%E%7G8M?LrSEL;w#@!y-eDH!}b&BV&0ZDdZ{J6(ja_xvm z9|!=_p<+oY*^}9PmKg~){coN%WydcOwV~t59BnCtfd(*G2c^VE4Lb0buhv4kLE4 zxhP!@P8k~gAWD&lS#iKQi1L23Wjxf&nFv&S@0epQ55at9ZaViJn5v&Ut44VZd* zt)HFZ?KZ)ST7TCrqHhwlS=Ha{vfEI*7s~@FNg@Npj!J5XmXtrt2&i4m<6EmI2BI znAk+qRY*Jt=#ao&C{iJz(##iWM(ujXGn!NgqUz%qVeoNi7Q&X_DhvX4a*}39=)K& zue=+h9aBd=!!Pp6PN|S!#8&hPOZX#BDEYL)jG6ETDea^VZak6;G%h{a0gtE+lD z8+>5GxKbKGEX)jn{qIkOm4zX#>(!a4y+9D~h7gIc)3q#YR!)9r4P|=dm@*7#EBVv_ zJTLCQjummfhbdNTKq)7Q>zAUfn{A46EOirYNDZyoEx~q-Y6)%NDBf|gxDCajTQIs zJ^%>%jo5mLL6#T+Q8-JT3&HV?sJ{|6Wv7-4#PSR?cnSxAI4o+TPvH#GzOK!y1%!u1 znC&bA40(^uYIn-h;us0aL{Bn2zlIsX^^~g3Hp%wd3_z*z3kd;ID8XzxBL+&Sr!?q2 zA|~1!4QW3hCNgft#KZ@qI63PZ96}hB+<-S}bL|NfU#Bmyi~+~mGZI~ja}pnZ<-22Q z*9(S=?jI6wT4j-l#nO-kY!If#%CpO6o7#f2nSSFZ3LS>lerXO0UcoyZV+a-~#K2 z-OP@v9S=^uyQ>JaW%&=ckQsG<#lf3?m8MIIEJDmT<*Jj{>=o&_jHG-a##D2vyH4 z(zIPP0wUM=%WxXHpTeA^o*-Re^Z4B4fpbG67M5Z-N!CoR1IP0NDw!U@=)+uTJY) zn>p9jym9;Xd}zwqn8_eJ2_(jVJmD>Oolf2Cx8=@!4TDJ#N(L;*rW~#w&~2WGZ1>% z!Rrho=&x@1nntTw2{CG}r^tjZ-bc{KlgBHVMdjyynZ5%bigwll~^%8sedi@>l7M zUQ+2bcuWCt?Z@7(D9PmjVKoiV1ZFxfnC2loD#Z!kT?lGfM%||} zEOTI5738sASN;wgdIiVbz(+!!)(%hf|-uUSsq;!0-OI(T%@Rhin@+wiODSxSFC;* zFIy_eM?ylei0Yir4fkYvK#2G(FxN&(lCRoi|1 z#htcgX3tZq?r8}2cd4*VZc7{~&sC8GHPeehRD&^JRxOTZv zkn_8GrQyJmWP-kmaj6v)<0cl)5U3VZoBdSs?6MbH1)0p7HUzgag~ltT|InP4sv zwHaw~rIc3Fe^+1n)J8Uo?e0;U#g^|SnNfm39}F&CMc9H-ct6fz96>iykEW>RO2tl z3IpQFiR58ddsc*qSd&+SHAC0F*d2=_z~R%tTUpI8z5RSQRo(BinY5!d)Tkq0#6 zF{HX8z#;93(EW%r0j60XKtD_VUNFR(%=RIXaBFu3Luf=WiDPXy9Cwbdwm*Cy==CB) zn+bB=)aBOd-Ph(c!pTO?gW@Upo9qJ>tM;SZ6BsvE5eI9AgRvmyq;2b2?wu_Q}4&9IuG&QRJ zeJoI6L!>Thc9n+-Wm4dzdb*j6DxdhPAb_28@I^Y;&rrlkmb|wM1|llZ_}Vd|VR%4Dl zpNUq-3gdtl3Ozf)Ih|J)wxN-AAds}ih3nzDovUH;7<5<-;i4-_0hp@5qF6Y=Nce3oEosJOTIGp= z2}PIOi?F5uQGtyD)tWh!_%SG=leShnU%Ld!n{Nu)zUmHlH<_dk{NUuID%DHUZfh#tofRy45mLQOcGx+ytAcKaMV^cwd{apqk}T?Y-?1J#x|CJ9!U(P^ zM650D7|(gTLVS*8K~;ABs=Vg8(HXcH#&f>t+B5UO6uO?+rIlo9ZDf7vUXu3oLYUE5 z><4qYIaxz0isle22n#iHWtLaCS(~efsthZx5=);t^q7b7F>(q5s!bumoj#LrP zfl^*n#@dU6S?uQYAgOp(Hb}7aCQbjEBzBvTLVn?iVMEx;cU^OJTpvzUy#{D@Gc zwXuhdUZAHkm%LrJJYB;;tyt^KbVzwdpxKOn8vhPcZOpxmg9knm7P8tq@6k||o2g!p+Eac9bwvAM;dMur|T*grjwycm% zKYLJZUaO*zCYA(By28?HQtrIz4zvZ>_1x_pbTEXwFP2Dcfj%{JK4D8t-Xs#T zFOqpZN~_pGT;GWA8*|UOE>G-*XbUFNqvvh=p9YoXl~8r5MQEO2T8Vj(DbaC+<) zu(Zj;)TeU?rh8JsM5>;1Ty3*O*O2p*B0I=s=hd>@(LDC2g7eXggeazS@!pq4MQgw2 z1YeI}lW(}pNO*#oYy2EC|0A5qXOuV4{I-_pOW=8k^J{CDRTXq zGK_F&IF{l8jnt1ngF}9u4B6Cm5rT0zjqkCp(T}&y8osQ#Pb_FAD+v{EBspZaXs6 zx64_HpEs5Heu!FuIpLsYH*vvVK5ZWf8h--M|zn;^xHS4A4WlP9@5pa9Zudyrxb!^sH zZv!5htuDam{+TUX58KcsvK;Ow4Cvn1s|mN928&5Acb-Yy^ zcI7W&9psXlgubeW@@y7FQA@9AAVz6u0@PcsftH;P-N&W~k>UBD5 z8)hPJ{m#@A+&k|85moA!+J5&2k%InuHk7dlE_sNNFz#f* zb*i%i%^IJ>^VqQ}N-8Sam|-JNel z%HyKm{}#D{jVbzX72wMxy-e{g zyLzl9QIr0yETXo;=05TxOaKsnjg;OEiU;O8Sm&%f92b6>g#FM&IH0tMgvoFB zEyT}>kpXRpxrgjB_VDfDx{tBXzhNb*C0k#>W&DkPef4(thr(reh+CQEuQdBunSsA< zc=8Z&2iBz@{}Ey80t?MXnx*ssUZv7VmSQMU#+entb6pDH{zIBzSGsWn=FS@fUfj zu~Sll^&c->vHwaV=64OTX4yK2<%{LvnyGGFOolT*&kLU*qEP{Ti^_tx%aGam0 z06=pw*aLo;nllx4J(F? zEmuL?>YjQWMr~={Uf3?0*HGjc2O%Jl5uZ?)vpozm%VOWV-~Hq{>i&_hKfgo zc)1xZD{3C;)IaDnI%?z%uS=|haDVF}+b7E}!Sz*!xu%n-6Ou8g zp-csp|HnBcOSV;VMXRt3TS^eBqbt09FWNgbdLJctFypKN53+aN0E9}j4xITP;_G? zG*c+6lW#X* zk0fm(gaIE_PF)|C=+a9TTQHajY!a!m2d?|5_1}o!cJtk)9k#&NXycq?A92KdOTljrCQB|RZN*Bb)j#zsmLk%}2AzBEK zAect(7%)C4&=@;-KqKMP>moj7^bL^dGV|~8IG7T z3+}D=Xc#CkoA+P5zC_Z8UQ;0~^(@LB=al{CIyIpIXhH&5h*>AxySg%7VhH?*zG z6fjz{3W3y0f-WGm-&QqdO;8coo2vNNZtfVDG%C}s5-d_^4i>^1Q9%)x4<u9xVV zrua~Q_5l%0E;0_qf?@f-2MH>f9nDi>R{vvvLs7mtV}Z5B#lF^Wl1dL^aLBaCz5R3huyZR5Fw7zuTpA%1mg;6Uf83-6EmN_XYo zhFe7ae#H*Ng)jF!0Okcel9F;~dJv zFFj>od!o}+RS#9>OP6Dr+Gwh-b&^~BZg-x@bqI4mocaaDg%c7#)V5E1AuDcbcqh2 zL&egx&~% zvj_Q1*3i3F=IIjK?UIcrx6{2ZeFUNEWDsm|hqZtjI-8{{h9aLYfuh$$7I;9)F&)Fy ztN=S$mXRscZ|SsyuXAo0PW7?(V3v6u_B&6Q51*dXXa3fNe*aS{ntL!*9T$zER7hjVC4>b>l1JCb z0Bg>O5qvAh;&8>WA56NW@cIxwm17vHs_Bm*V3>i5K{+Kz@kcDNpoo?3I|D!&w{2lP8s_>Bn9{; z9(~CZ+ivQEH>76Ibt*@tA!+rMSnmnwjf{nZY^UJQb$OW^Av$HT%S&udvGHa_>7_m4sV;970R0x7yA(2jAg3N2G zwJmoWz;_$=If^NE9tW%*R0A5@zv#inaGV-h5${6M}wFpNwtE;n@9L%)^jy>0g0to zF8J6+oIMmzl&IuTQRL95b95V^TvrDgE7J?8mJu4KPDjDXp@!13rGAD?T=5e{K%WO*1 za+4}jMGz_B6wgjSgFa=adH-*w6HNoJID7h;Y6N|X+Rn)X9P1R4R9lUj_ z?7y|YhDhRMAMY;7Df@KvxAwkMA-=0fUfPzO0RxMp1{h>CVgec_TJrkB~e{5YoUa8 za-n3PA?Z98q!SNOB|})>^?m|XX3ehN#1#>NUg7M#CDCu6O#*N#WgP-bXT#uFUZll6Y zMGL=xmfawka8Cy6JWl&q{g2Fjl~R>Xt|u*~yT?dW5R8@+=l(}fw#zuFsSsJ)r82Lbh;ie-S2>G7oLYx)&$mNv>@RdJZE6&( zoeu~-WqWP-eFOoez~>S^n7gwT-Rg8XK6N7_{#O9I^twLHp4qLJc);$#DvhlGWX?; zsQS$kol?Do?n|^5O(whQD#dg-Y@<;wrL1F$ib)j6R{3ZHfH=tGm6%>-eK#AXWz z@oPL~R5BOs3vpYl+9Ang2I13QqXKojfz zdYcwOqa>%6b2DyFIVDOkv3)EAggdxs5aE_e>(z0u+%*e^Vdf@6bvG9Y83_*o$t@0r zI=D%_1idIU^~|3i34rx8lad{#i!;<>B?Xip-M*9h!XwGFOtB+;bmWgQ9JNfXg-#XQ z05!jD_IMd6hxj&(+Lks+2bF!wE}e@3j~#SPd*pGsnMNOI!~9!xoq>eS>rThV>-%Lb zLFpTDLYL?|9WN>A+O?NCBH6T@uT=5lWkn)aOhotBBG@mxZ}rTQuZ)#fCM0jF8MCZ0 zi+B(FGO1Ikm4R*#g6LmeLb^4qb#@&3Lyjta0U=L>eVNZ%2|2gKn+GX@A&fsZl(2( ze-Se!$!pmA|;+EYgb@59xsU-Fg6cmE+xK8vcn#&ObfSV$PHEp9v)v*pRI_Ql-jAN!HAdl)M?X^WNW=puGYosMiYf zywQv0eA^H73ZK}O+YpG(&6~x_2X@sM@BJ5R;}0M?=x3{fhPk^WcmH9-lNl=I2;S-I z3~jfhV||66mlKVc9zykYcG4BwaoB$QL?p^;AAE?kfFjkak7%!B2F>*HO%!sdyvyH zG6SMdYvJeO$h0Vmgx7JCkx76*tH~to-PW$HpO)vT@_1wusmd3Smm9?G$qaOB#{r(>z8sI)6fX|)5&)0_s!L_^XTGts zGaE$BjDxmMzJ~U%d-$JA#@V~8xo3RhIse%#w@J&#J0;pcKX+U&vL1oewD+1h6E+&I z>7>q3+#Z8V;7C9ENM?dbg-n1HO^#%x~el|X^7d8ylxif z+SGSf1_afCrR=Y#5=J3MX^})Qk&g$~PRYElroK50)kejnuMn;q6*uwq@bXwcWd@q1 z7Y6EE4Y6vg<%?gTvr3L^XoKqyFRSDmH3A0NlnG!i9eSAAjf0wu+-{%< zAxh99L;Hd1l9P)o2H1lmog_7|yjd$ADsg<->we_8K)nrpTO@0t(NXH;>Sx{*M#_33 zg>R>B)#m9;WcVqu?AuDR%%+lYn09JltmrC%#jrCM;-@L5w)4&Dl@B0(Q=*FMU3&5kuD}2@ zXjikxbaf>2r#8OT&U?*}RLBFq8bfL|tpm7SJri*Yg{g%dM}F zGA?T}NYV)L8v&u2f$;({VX*UDUsPI?rdi?@siREarb1vAq7o^Jr6Tp39Ai%+O@36 z1q$k6aVv2poM35cN$n!4SkWoLqWlmlx$!l(BaeNU$Ye?gA=)mLgQuY7gyn?=3txl- z7v;Ytf+EFm_h24LybV}MH)L4gWcy7L^>WWnDovh1_!7k<<)p+Q07T%9J{NjwAidXu zd)i1PCzN3v9c~kxu&}SuOI&0~>SE4@^OeXTsswkJ(_v*O=;PC#>a0P#ebB#^+(tl4 zpikoBi~y$S&Ivw|May;HCTZZlQ$(a5OMa(Orajjva-ZwB8cqF19Rz0G@y@zA@(Lv zx~YrMD?B?gm?w4T_<0^ZFm zArWw~pItkb=D~r#Ci7OWG?j;asCe@=ddh_(k$U++qP}nwr$(CZQC~XvTfV8t$QBe zAKY0@dQuV9m0w0@Ur>;@)~vERx5g#eTkyd?qhaDPmQ=U9g%C*O%2<)Gj_XrOX^mEr zRQ=Ywvx*Yg$KIwPLVXxf$apWvQ}!^_{Oh-)mbay3$L*PH{WNm6734D3GuK~jbZAg z=k!hoS{I7Qp1ccz;(CG+N#GG>eC;L|;Awj^dX0J~4Pp^n(&afnh3ZIv*>6?}_KEFJ zAQYUMsSxiEt$0jI!2*AR9~f4??I8o>UtS@SZ(&^4*;9qBiaa0!N|EyYP}2%u>Pi zPzl4qb=h1_4z-ywRahK2rKP6m><0#D~5}CsJ852y!oC5fEejit_{xrg^}9@k!r$ zhA2e(--I>k>NnGP(j6Q4w{HFX3kv!D4nLCOo%-ToC|$42vOM)5p^!Fm9RFCb|lek zj!rt_K7o}6=5Qzr6r-STlq(>h60b#8StbT)W#pY@b4@nH6NqfE8X9)=cW_iTP zMKlwDbF(!5xAB?^=*jE#FrNLh-~3GO3(e3lyacZ$z&izSn^8jfBEM462#3ihdP4s zd*!p*TLQcMY-Ssv z{iWn%ucQ^zo0zi7LRz!wRF2&c)opc*B{VSup(YBtncvtX5K-F$uK*%Si-|2vzp0Qf zL|oMYUZS~eb%>)7)39-I6u{R!2pni*A&L-NU=xj}iIh6qdP*2P9os#=Ss4*}>q6 zVQQOY%UaMttImFXvj7E{g$nECJ#Gl#I%UvS@ zCq19zrqR--v4qV@BQ%PDwUg(p!PXEQgHQ7xI{Fzf{wr}o3HGhW(1HD618_T5J+_W zT&6$)KV&wPb*b5Sb|HvuGqlCaUAUHz8<5nSW8Knk-PJO-mOGQGU+pR_yzZik3oB| zD#M=PDlxfVFTl}zO#|{uVaSw=A(sy$E|MsxM2hqm9hZTi%Ma~ZL~CGzOlXBN{(hD z{y|;KL1#nN+->x-;Y5iTQT}T81~ei4o&NCPmi|>q*rw{NpL%SsSI?#@=hwirS*xn zMI(OW@KL*FzM()2kDRf+Dw$icW^EI zhO|^LX-9!8tyhQsT5BZdHDSp^h;qT9{9R~V{4{Tzq(>l7CGdA~YHf9^LwU_+*Evv%!^ z7X(_VIao2WLiM=dv<&c5P*!PFplEVu_b3KWS0Flm3DzK9$As0KfsH7mEMdj=(32E zwxU^x5}fDDr$^#J>-x!Q4jGc<?pUg%$xbJA&ecuOj=j3QT<<)5#m&Zv;nB)3}GM%_8GpUH&E zivu!H6H>$S$9u}%~hwfyeQ5O3YQa_RwkO3s5?)M@%;G9={Ap7o~8?F-MdHyWnIl z!hKE~6McmGWd%m=B-cU0{>qbmb!NP+Rq4B4?t!;Mkm;j^zG9_ ziGJ{q;psie2T!=Fi4Xb2syth8yAPW8Q9(a9S==<#qOFgSslhm;gebekF>dVK)cycA z!d$IvOHx6}l&P@FD!re{oD<4sXh+Rt;BlH5&WmSF1!#Ao7QLO>@XH2#2I&3R zX)>bCBz7&zv&hvUA>q}xI2O&K+1JkV5etguwh#(fd4uS&m2~&+MDli}ZvD072G{%@ z-UqLiGhZzn~km?7u4=sn;NgFp-O@BTc{QQoVMhHpvQ!&pH}2zF-_Qw)6cgzr@yE z!2+o(98k<^lX{qic3iuu);ji}OdV%;(MU_a4a{jcAP}hYWLv{jxNlwZ&!z9W9qCoT z@0R8t#4Tlj!8mu*xHc% z)TlyX)elv%u-(-g5)NT|xvC+QdkP_%i$jQ0s8AV{ynZCei>!a6e zi~tk+Kkz;s=I2Jl5X@c7#&?W{Lu)xgrn&0N7I)}vmD z+;Ow64G%GF6fsNlQ0)^vW?>0K1M0CfBW_dnenrPW4@+brx3 z*(DM9?jo}a+0ntI2ZvbZ3j;Knm^!?YB1tvQpoe+uh|R!#wCkf%>7b%VGI zUjxbv)CRf$8I1HNumMOPcHmr%K21ROquZguV(AX|Mx)9c@7rb*0{2~B^)izsr;$U z$42lf8_B^7m6MqOaPG$-!f?gjF4adx-ex|dImo>Hd~uG<7N5l zmA|I;4x3~t)Z@Lx|p|JJ43eB%?~P1(~S!*@WZ(a5)Mu(6z?GabR=rPVXIv5Bwt(9 zgdDg3@dQ@@nY+b)|Ili;|$;;LvFcd8fK#_aS>X4u5rpX>S^MJk4dA$25&tmaZU+T z)Xd|J*FoHn2Qv$ukW2|%c&ym!9-rpo_a>GI-t)T?n9fsO-FM+gM0R}lTdq!6kVlkr4tEhrNMZtw>*B>_)%fOC@naL< z6GNRZZBx_>hietd#Fjq0=hql|CV&23@AuEj zr8dffrMaEGzn|iJcz0!Xla>V&JAJIev7P;2&(DLbl&pNo!^O^0e7k?yik*Aw6OqVP zP4Az!)(2~SQ&)92(ynu&*wGnik$$Pm#S><}DbnU-Nq^jsFJ&yMR8=;#l0{D*wS?q_&D>MC*RE=?!FGfX0=|EI<8| zRMxK_7ptv{&_hqRj2g}A)8n?+WE6%?v-n=AmMEglaZ)Z-FV0IYBhhH>^C5S;wI?h# z)_Y9t;;D|g_~1|~ueB;zbuJ)ET=6jXiKM(SNXnbdB5vK{ z6U2KjU7zUZVc(*NjVRrRQI3@Os5URgQ+I%Jl*T{PQZ;U{sQy@NX3 zL%ez^xk~EXzXtutn0Y5^UDpi+LSA>9p3XOdasWzCV%ELu6^pc@- zEys&1Lqo94zgrS>h&9}T;virccL8yw=RvU7bV2<{Wf<1Uh8w@=~{f3mri7Pz4YcYKDkd zG~6s!>7s5Gi_pDkDN;o6flKYtK_e~DCLuxaBaZZDE>z4)VtFJf!?T&avf;-WVBW zVslzVjk8aCjU2&8JR5K8xFagk0N}zv3Y#q6AfDRhr?XYQr9vNlDB#gYae*wbFqTN2A=wd3)K+)#YHNp~uv45-lfrrvY zRj-Ug9?KL{q;Cy?J*BdNN>i5!M&OqP?Lk~C^WKIiws57RMT?Rwv!}Tx)q1?MZ5h+@ zh1#)X#y-)Nji~?dZ^ZzGhe!j%L?rr0Y<{|%!kKocIc2?4>|r(|DF8o44UKt*vIx4h zoaw5S2}pk4K^5AGtIMAj@)0N4Y9YROw+>pTXMgj%Qe!KR?3Z=c7(W@XNG{Gp>Mm z&>v%1w4Iz;9q9TW)=rziYB;byQ#xq2s-?ByJg^4&5zD>W0gJ%26Ww)jX9*V|VM{ZP z@xF;nmPBTd-+NQ_~C9_6Zz7BMuTxf?;)Kv+m^acn_lpaoHfZh%;^cJA?&EH(tl+W z8c>%h4NyXW43EPvWegM3u4RA~V<2KS{ieY+uNWQLH6y|As|eCX_dzCwk~v)H@xB>Dru6pXVgf2x+=13E|j_wm!#D| zXogDxhA68v?YhsU7J;dHb#N*bJO@Vmo@QJi|XpjMvVBU&scPwwT|_fY0-ki+#^i zmD%hC6t6fRMc&dg!OEJbZ;3qpihW5OXRpHA9*f20E53O!K8tLGq0)(WT zCwde@1clcn{J@pf3lbYscPY&j=QQV|ZWD1TLn60~<6BMmMt+ZjGNcJ(gzG{6w!8U z|1rgwcKRK%v@Q7C;7p}_q0?f3+|j13TtbgDrB00gTLCfE7K+w?Y&qFx zPbsU8jf&T;D!pHc^CM0VLk2+X-B4yB1WR2c#>gYXtwh~VOL5kauVI7R0V0YW z)Z*3iY&8nBEPAPSzfh%7*kpiA;|Rj9*08WIO>#Jp+wQ)i8mu1uqEC*Ajm>ScDq70 z8GE>xJ#5}Btps!QM>P-y@0HqGhH;6a4de9qGY4iu{HPWa%X zuRyGt#Lqqn=uISNSB6e7e|L9tvPhc^x-4uNoQKzv zzPBnEJ~g~C@rTSDL}h7-kR%#=q=5+Hswd3!vmRP%*8Z+^kg0uAlVP_^rxjNPIm)7p+s4fksq@16*7ouIC-fZozB>?&r7J+6L~~mxcu^!{Ew?ETcc-y)xhI&D(S zXPVY-E&S}sb;;Gd5tMM|Hmq#= zTUG+Wn@&=VWODd6v&r=4rx(~=A|E-!%RUhR8O0gnIYXd)WvtYZ{z%MXRrSYK4YE# z^FK5x>;F5v%gX#eHEFAEmlJ6R?93bWu}xW97#ZMw>xp6kW_7e8{38#Fn0s-pTAYW zj&GMFo4&eJK__#P1P=rdIDy1Pd{~Xdf#++gv+7|IdRuG*rIc}W!_ZRsP|2+Na zNP?M!my8VQG>tlo=Q?iXs=e0a#@b_sAgweZ2L5}%I8Y4?=}RLh?7b^LDvaY*7?NC?6#@ij@}WNe&d6l-5Ld+=?f}mFpe{ z5hZxxasTC z7X^~IYbK#~W|+6JIJOWcl#e@o;6rzw%oO+uVpNaI93Dq#-Oz4Nx!|uYOsSR<9C{a zSr#wbu&gd+gYRBnCLEQ9-o3!&=%kyt9h1n3Ypsd%&*dhaWgeizf?H%dq>lz-0!(!yn@|s}(PL{P zG*JWx+Y^#&TH-AVekY>%RxzV_tkXp2@6x>%mStxvB=)b;OL#jDU<4H=odg- zM0*eA!*yl7_kpGbdI64YA8cb8WJL_*#<;xxSC!w5POZD&-5vKn=iGu>p(g{J&46L zCe{~kR5!Hn#1i-iQUs38FfEIR6i3y3M~tFkPc4LFfmgB?Cgvnof|)ySfSsq(hkE z)w&GG{f-PGN~2gF%aTt^24%g>o{V6J&;GnrB4AWChi+U5INcn(vOafH+=HNU0W5hD zcFV#RIov)hw{{9b;+)8@1cy7foOgA8qs2@YYI7!-8N{Y@(MK_{3cmmLW(0Qcj(U z--W&As7Ra)t&u-9P<)V34x|Q2M_#>!Z^D(5y+XkCsdA!bW_Sx%YM@-lGtzsgTRPx z+s2)5^vQJn>Sjcqsn_o5bMGr}DY_6fS=25yHQd3f2zJF3S^gT*X&&<{k)S%2LD(D7 zoHai9LmhxdiKR%OjT{VOYN>c!%hrgXmd(Mq+v3|^XH9?s|E%wmq&l?jSeS=EK(v_ zkV|lu(Iu#y+>iaa_n2K~L%2RaN+Cz84U>SdfS3EkiOMWQg-z#dpYs0vHlp#y?M zuA#}))d6Y4h#MKtcu+Bv#thSAAr2D3yUlV)Uy>CaA?vh_=keEa=-PXOMdK=TA&m{ zLDPNu!~zn>gAWKtQ_nF8Vno}PI(xOx}-%gT6UVl+M-(Z=}Q^2e9;{+%osOUhG;Bd zk<%(|m5dPtr@r)}D(2%X#6o=qOznZheiqnH1Q?qG5-~MZgWF!vACB5Ok-jCIke?w z4I^S=hq#RBQVRSH>LEl863-Q?&tg;>^~-L;#;7*gljb-_8oiZ6!00n6f9r9(by_ zpPm*Xsg|jdEpE8GJhS7@{ipaG^3&*PjSEqYbW2MLa%Mzh_qUoxaY1+F8UZz(2UEo) zr=-*22+fyb{0{q5qJ9qRFD>!98(mee~JkMe#WVLVQOUFzRfQiQyn0NI9BcaK^C=o5^ro1xB(w;-{EG%=UKsiIjE2qc8cR-> z4uOW?K0cj_k1d++-tLt&_xDy}<@LB;c)ybdZT0Y-bk*G_ zn9ca}cv&}q^Y_1yzm?%=q2B6W%vR#6NTkxAAUZGJ7M?WLeVXHnh9jLC1(&M^nO z+p)|xzZ>wuS}@_n#Pr?FFYk6RV~j#CNf9~{{-1Bp<-dYPqb7eaIy9V0%jiIbd8A( z7tyP)Qrz_R~Q_SJH^?7yQL9nC_&LdYUuE%_2!f-Dv=}oh(sS4!;I2a6f?}_Fq6%k%DASYaR#o2R!vTo;mQ^YFgzqDD&jpuCD#g-0tujCY%w0x1zQv z*_bISGPI{6N^K&mSK0ZzZ06H>%K9v4QFZi-5{*2?SJgrs@spWt?FY~IA4@yaTI zq8zi#o&@ACvSdlNEBPHDYYA_=481uSfMx0!b0)=cz{jIbV8)lxE zGh2@W+QNVZ2eNYmd#3&&jVMolA|4nke~xbUn!1!2t6fAnfF?Q0=D(Z1?>9%e*~n zl6&#ZKc9mFYlk=tV};S9@(&py?cE@F-SwUxFMDJPe{DEq%v@gqZ2t5p|#?J0t1g@ z*D%Q8>Qb7kNyXwiaoSQii8j1v6;^bZY#GX35&wvmgeSb3VI&cgArbU<a6=lOu4% z%wrN#8cFkMLDR*d|>|65+k1o2z}8C_%LncMTaKJXNZ+H=B@_aLf|FSD%C541Bz z9JMtX)IfBmFi~kyPSD1GO=(UfXn=VQs+QFO8qj8OtS*5RW^5kYS*Z|B=YS<#eJ!c z>lK{pPPE-op=lk1|)wpWXVGujtL(+7TCJv4*Up;EGlhT6f+szvynM%5PtOg{f+=OWIa zjUR4?0wSfNFoWNDakO_sz7QZlAIdVzVVNy6e!FFftEW_sR;ojsZ*k2{zZuwkzZ; zPt&P*VU1k(36PDd?#5BGA+nD0zmq8hB4vCyTA(3rfEcMSQIstry19oJxyGi>zIx zpVbOdpPEXE=o-aRxMI+kMWE50!Vj4u#ew6oR)iop2{%mjx=Z zdoWlS35CPz>^8u>!8x}93BydW%pwy*4aCo*kZ0T=GCoJQEkYPm0vy5e3S=j zuZC51FNAs#DH4_wyrOUof-II+hR@ogb(m7jR#|hEHaFZv z%ZD-Mr65n%43)4CLSD8*(*(o^%%veWR(be21@Fs>|yAF)IZy#>HyY2x3SpeBp| z-LVftJ4Uozpxz<3x}G1tY?aP6Yr^PhZWH>GQ~M7+<(P4|_(XR{^K7RNFPwnp*sN z5VneVjdx(Sih#B8sW&}Nr!q6rPljTlDr4qijk+U+^-t?^I){b-lR9ut^==bBx%sP{ zG^|_VS3DjclV9cVAzV5qMNSzmE@AN+4>_)cQ&-Rm6kT+jfH_eSUmwN}gQ-)Q1jzDe ztDs0BR(Q-KxX69hhlerM>sLltepIi3UiC1(CTKV5pc6LES)bxx^n2#nwYM!;b7F zTn9jw5FJ{~Qk;B&zZ%R^ELzGY_E@DToyIM&Oo3G^{bKJ-RgLySxAR*Zh=Erm!%>V zV6R^7tD>m?4k0V8#YnDp8JZGH=7)x5#RYdXYF%CEZN;}|6F~gV2~^iW5z61Ozzb% ziNPP*ZWV8;S3p{Sq53)SZu zL*==w}0q&by-Br-t=4%M?DV5&z(|K>GO;Z=W>li#!V>}d)QID6iG_jP^ZO!g4K2X#}Uz##-W z)EQLAi6aO^DFyBwN?8C!rJ>7eLQ&#*7gZK}9Z2|>t7qG>XLk;%EY=Z*c)ipnlQqVv z)o5x&{A7VXocwFjVb^Uoy*OpoV;}kYv~L!_U8Ku0EB+tz%;8D()h2V(0Sen8eN#> zxWl=s*eP}oDtJvW*eX^f6wcVygEUaVX5pa1Am%92TD( z#Iy}Q%Vse-3|U!Mrg0YQN83L|Eh9+tjGzapA!W~<>vFz&(GKDoXpm?0_Zu5imqJ~99ZHgV{ z#^yW+d%5XFn0pNTW=b*mY>>3Y+Is8MP}q@V7Er=Pm4=}mung4@&I+9qOgS3i%v(| z^tuQNlyhC)l7^fQB^7 zDQ#e{Ct;d1X-Z6%z^i9Ro!+8m?41=Re2G2gkgPGl3Fa*2qURG%Rq_yXIU5nNy_9 zs5cYAY^fN#RAY&8+OPO^az|W|vBW12sL5z{cfXlkl6k8O+bFI(PAeK3G z+%n2GTb#H~!tbX!B=iO@aS z{&N_a-~|>sdy9@r!l%R(mUgJ?M>2NxY%m7)g!8kk*3bFF9yWk29Cn?;n2}V;}eda z{_k)4><#d4-P~9yIrrd3fPakCoagW^p_9JS#}W0JGgUPQ3BGUg7M?#^tofm#Rvk5z z8WkMY#)bsuuaCxvBHQO2zuzam%wcl<|8j!Z{=ZLTnb?{CrxT>pWk=G5IP*q5x}k@W zPNwNYX9Ad4-30~{p1mQ5v!3|J|3c~ZCeyrb(?nZ5W~V;1FjjbJYsdREi z+w=GSSbn#=+cY@<=Kl4NPY;HFp51|IpGxW;y-6v2@OtFm?%!=;>kK?S{uBwnZ2ez zLEbIB_mQmL^v-*El6rmB=l30ZaYp>95zj6wghf?@5f3=Lyt%g9q2&z^X_}4Dqe3 z-CpySD#xlXY{q&C*5RbS4dtFXYz!)nVjbU<~NDyv!d_^G1=F9|Fu6Dp6pYl?d z=+IBC1xiaopROnW5;Z@?-gPiDT5YVMsTk3oP|Gx7(>=Ilq!Q4JCz27BR4bQtDk{}tufO4rf#pNhIl96tA%;sj3t4@Aj6e(@!bd z%k!jfVikL!FL-kBz^|&zSr74BWOHg@oR*<@{*O}UFD-)wsulydj9M>-8b-Rw~QJUp5Z3Pac zCuU@@nO?-NBX7HZJ~al>@RO3@-_|_3LF{DLJcXIxLGe!YI%h(P%AoklXJ7tRoWKg zZKX?l#6o!L#s0XnuC@xybJeIM2z}Ge#SZ@{Z-0RnLWXnm*%gVx80c}x?C+&D-xUs zr_UDYgmr?;1U5*ogDbpmP)KYSOg~Z(4-_gB>4amDL@NCeV#U41ETxGHi2xQ#@g)6 z0G{)eb+EH=h?1Jeulbc>55pM;I(^1@&>!zDY}|7(f=({PJ+o7WKzN6a4?;D{!D zCCp*p1O!N>77OApD|sx5rY_{41h-%X58^MNbGKayZm2)dH1srd|4dfpy*%WhX63%j zyL(LW97QbJ3Q8vQ;#3QFQ@v6^d;b+*j~;a!RJ;6BK%{Z)!7Hl;nqwdY*ly65BOjb z9wG2rfsi9yJ{{n6_q!!dHr_7ahMuA!OXbJfRi_PfbM1ZFIi0L%wo&mDjEQVva0 zQ4gu?&K)7ZLSIW9MT+pP6eGagl-6UzBjMu!TK@v5rscP=e3aF;SE>uL}3(gpTllYWELl0Ivrm6YQ zV!f)eCG@4k*W1+cvBHG6UUzZkmG#c=m)4SWRpig;7%c?_@4@zAN-VnY}H=;<~ zu~BcDHzgd!^TRvRS(@9>S_dYO%X)@#X%syPF(iLO>s#lGE0HXeBw=t!5PA=qU{xh zOaX^LsmnrRQz}mJIY|!nUtp~gSc{61=3!Xsa7kvV)Py&qtl%9j5RIGmJ z-zxQZD^t**2%{yYnEZOIN(-i^{G(5J_HK-Tk87wA*wf_!HmtWF+Bt|h2BnJPlI#Z z61OPGmIgh4eCxZirTd6g#MgBrXOH3#xp4Eg@^=~DcfV90ullLM-Jl2l zRPlPHNKosG*C>fIs^S@53I|CH_5(tWdm|qRmZB0^}0Uiv9fMwv zJz7!2{Sbg8r|47j-jb7c>zVRDp#0MNg)GZMCYHBS}iudz~PhanfOT3tk!oC9L0l zd(tHBq#o5=bJtxOe?|pbse1g_Pq4m&b^MPCh}k>)JZ_tnHBAG~01n47ie#MVu0X zG6hC2!n!rb*)6Mx>HctyITJegcz*4Br=_Yj&tP0J)Bw5R7(kivat}1C(yywbIOM zpedv3<>Q^USK2L+ieqpT@<35qwQgDU+eP*lLxBPoY_4&z4^!4w7o?TPd;|nbG+ntXltb_Pk_@@+TxHpea^?)dOz zqKD>Mtv@vVZeE4S?9YJ2U%lvainyCip!wmnb{mN~c! zV_Ak!diSp48A1!-S4Pe27^5o-Ud>CGiW=ti>PlZN-6Mt&uIrqyD{9IlHT+P)lQH;n zmr8!#igB|M7fa|uP5SZ0RYM--tQvE)Ll3gO(3d5@*hvf#c(>5cKk+Nb{oSuSZcn) zlea9^%QzEj3#2a5-goG(_u|R_CT&HgA_USNFFYd zh#U;(Gc4}_vq1HX7SZCHOL6Pd2=?6X_6%5aG zsLHP^mzhe?9~@F?hlwS*Ts{)S8L_mtP_d@CP_feVh>ky6uH3p0pHkJuvo2t^JMT zyNv3^W?h#ZA8ZE*LMX{Q@tt89q{x*nExJC z>r>)a>qu&go1)3_t4BGydTHsZYHmVmkL{+eNp_%DIn(Fb*UK~BKefP*&kX8YfzPKB2fzV0~{(e+o zUtWn^zhjJE{|5}UwL}ZP;w?rlf)1avGQ(0*D z_?lxnT0RXvK2+ESHkIth3*z0{Z1X{ESrL(>#38U%3GL0Yol?ue=aBB>I^XO(eaDr zr~nki7W5i%+}Wp+o0Bdo5-*d9Zt9v2w@~d#=@-N^VPY@?7sx0F&J*)vN-9RsV8ens z_;Ga^Ybn^6(TI*jf{3fuZY}S3Kt4kCyYfjbB3oHuoa3nWw9GWQM3|U*db6zig! zyWMqqgC)E9SAQfSSNx(QLrJ!Js=HX#rK>;^rlwQ%(uJ3S0R;4d_o?w81?27PF2Y zKEoDES4rTD!=na=9dIZ8mIaI!9NOqA#svWGIXWjcxWdxORo4jGIF+4@_Dt*Tbj^8 zN%PoZVoUNvSAyttb-;)>qyMvfzi&Z7M?ip5K6pO2x=ZxsvdB)Id*G~r}{{fvRS>> zgZ}6Rtp&}z(&iuDi&p+bMcq&E(T%49ng+8n>W_Lf|HNAQbh>@;VrX|EP=D+~~6 zSri7;TPL}mOgwYr6=8On{=kFIB#2b2{r?_L{+wO!IsOM4u&Grf4KhNXr#*>PtpN9 zd#4L%#A@FU6L1OAKVcRICNmfM-F#A{>RT+S#?P#Sz5H*0|yp-gBw{Qk%&(zWy}Bj zWXS*fnY-if@Aq>n-{<)~P6iHhdvw*;2mA~F1&Fj4G4|Dzy*xL*Zcc+gzw+D%55Ed zvUh(XF7yFVQ~G|Aum3ly9C}q*#E07y7JcdM8a=V3!jiCW&(+xJ}t6q z>E+MKMS_gfR{S%!dT2j3?!d{E2q>{Ke&_#rkfz}_s@w26!QZXXZ6cx{(Bh6KQ{Md zZmvj&DYOcrRfYMvF7 zxmy*QATz2*l#tIkCAUftrcT?1N1HY?+1+l#QixZ9#Wpq0-FEV9vR>5L;KzRg9c1B zaJcUNjv&X{IPt~_qAfk;j6t*f-&A2nwWRX3c#ap^{||>- zYj1q2(a|WPny1vD-*9S){n1He9BKleFi4V;MelYqU4ezqWwrVPX^dO>TZh=^M7r+6 zbGOhlRmZ@IaOK#q^KCfsqNEg8;F=~|0clNQi-XKaat#|1U+Zp zKP;2XcK0repip13nb?oU330qfY_hDnrrp|f)|-^XUW!keu}n zLssEdV6ZmmnwR-FFAM7!O&&;EYo8~F|mD?MpS#CXdb zZlmWonRn~uDJ=2Q8hl&?jjE9DB_J) zL?XKkT*n|YD=Rl) zjfELN%qa}kFUEw2c7ZZH-@yv+#g-qxu+(gT6Mi4KFj|v*nZu&~fdFscy6f)id69n zJ2M#1th}dre8T@Emyby)@Y%GcWsnyS_1FIcr&10p;j4c-&vC6OOXr*pPr=PHtPy zUw9&W{hV;;nl#M>qHv5^WXk*W1|4bP@TiMM57IzM(2!t#n*Hd7bY+ z-mXXrC-4bGQ%l}#(g7g!)kOfWg9LS!TLOnvsOT()V0n&3^K5^&O(bZjTGK0%MFgX z7+^gsE~(_Mt;u2ggg}7HHD~ONIA@;3P2JBf?<@4>1P_6Py)M=dn%D+X8K>3&e z((tVPVXRr2SMYV{O*d-&rF-hd<=_hXV{10dogER{HJo1C^Wk>}WyWK6xvEEE2PIrW!#|x{_p7q?eWJ%gfjdq7B& zA2WfVoWdaN380iiY4=Wyk|7YiEt*zh^O|G^g6w&^8x(nicG~zlgj-0>^)qN%F?~^ zSKyDy>x6Mn^bjZ>0pKnvTsEj0f4>T1Q@znx-a0LrQOL=L%^~Bt(_q%4S{(xTW_Q8 zdb=}COx;esrgqbt82^&*(sw-ji6;)B=@#ij<{UXuH#Qp8E3-#D-D{RC5GCuxjQ ztQIYEKnf<-q}F;ow;XAj^JO=Vky|l4c=GNHTfl^w+se)5pkB@YDInQE-?UWOtuU z|9++(fB)E-PBVfl!kM0&K(nD7KcG9_Jw1l~b#N)<8-!B7(=*ZDzkLt|9n7VWIYY zhgTOk&C2JWJTGw7hdzY3lzFixG92^jj1Wrx54rqZS_2TZdZmKODTM6gN zTCf;jD|n4|9< zG)FxTXK}IWiR22%PU_?B7HUa7ZLH-^xZ-O_q;fEO^O;v#v4wAouWjbj>&8dT>o!)I zDD{}52+wQeqOko@GgtmoyW#D&kZV@~9i!TQ!zUIUXMElmDXc7~um>8b-5km*f>*^? zHyVzCeGi4{?3?h+1O9K#D2e8%n3^(gEn)S&3Y}(w2&I z0n7(wY?LB>Buw1k#+9-3sX(maP*@OZ0amS{Fc?%|v5c@oB@R(|QX^Sa__xvLdfcLr zpTRr4P_(5PJL+Sp#F-QIzRYwwlG&IcWb}e83%Ufqd!4#kG*(*iNRo?*vm}ysvkDR4 zza)~^qMb0o2_6CD(A^aPVkCrZ8+9|eU60)+;A*1TWVj_Lxke!vKlsQ5w;-2*-tt zNM1%XyR((tpvD}LT5M)h&X*^8?dIm3Zjg4PnMGvGiTd#~gji_Dg&bV5BV(KiH+X|w zXtEH|Cmt(iIvZJd=TpcHgy-Q*7Y|POkf7Sl!w@m|72ITCYx&2rW(%dLulD zs+P{idbFF&E^a8& zIO%m0>X0>9%%Cy{@aV9ZB~f5|{uBv}qR7m(Sj>!kZX6fVaJ^SDb{R|sDH6uYcrFPH zx6v#7vd6p6Z98akmqCk6qlS>L{Y-Bdiisc;7|m#Y6v(t90_~Zm*!*a;#ACLA#jG1 z0D7>tF>xli)|zQR-I*c_vJA;O#}@flW0Z=!1QLI6&=yslewBTxj=YJjfZ=BUyLi z@8&%KeTRwNq5^(Mp~ctuv>LwzTmj@Pbo4gpkXG3N%iEzPUo##L-3o^j>?qQ;Fa&;@jWCOgqP)+~4MHpd9OF^|d zwP1uj6jgkTQJ0CK@J6DZf1*x9*%UyZrom7#!EY{P^K(CrV?7aP_7F-gUDW9I2& zlW32m9dA>93_}PLU!6zB6IC+yFztp1)V=p=R;^XJ>g!GC}PPTLbGsTe7Wqh>jb?=&{ia@gBg5pX2{oqHz++9MbGlDYXT#Ex zwiQ|xh@sVOl^w^tD#R@)o9j&~AaDo9(N?!V9Vd@O-H;2^LKVlu0_aY-=}i&MxL1;2D2Tn+vdW=8ZVe7k4Jd%QbrOx zR4vUDu5Lpwl%}PnHBPw-;;gAFQ~{~nqSG41pau7^;>-b&c7jV6(pk}lvmv%qw4a^< zYfO7Swb^W1Chchqr<%dA_HuU0M+mce;;<4;s(!20u#KOL@k(ZR z4O5~c2zO^rF-z+VxF7yM#@-=H6JUwfEvw76?JnE4ZQHhO+qV6e%V%vt#|I5+{w9X zol%Z*m=XJnohx<{q!w?Qzi2cNEksp(70jH)gyY|>)bOPFLJ;{dhz~h6#UE?Mie|| zB-~X_xJ^2w8=BA8)>H}h^G-1HdFU;ogTTs~V5oPUJCY3JP=9(o!79-V{IUsw(9K z3Z6>rl3bz?T@JMfgy!9Bi#TkmS-p;jF9;V(8=cbfsn1bo|SP^(Im9Mqomvvc5LGs zeE%<}-xwX*+v?o+uV}5J^ztd|q-r}FAo+)zOY<38U!z(cxtReG6_+egX3ntp1tdk{ zjLL7#Z76SXTDQQICuU=9qJAB)rrc7>byqm9tX*bJmm{U~86%WdnyYWL)z>pm6 zF6sK>iNn6EJKiyd*YYiCET6#^10*K*-i+T^M8(U|LG)39<=ta?M763wL0W*)lQBz zrC>BpWSmY{M_o_W#UWd1#m#BTwV4LPuwTO?H=QSEXj_HDO0MWJcw@wR=Nr7>^Q+KJt*obbO;u*d^U)cR7AggH;ezD-7!j!>t94fWvw^I>F z?I60rrJrvw!(Scl^U}OizvpzUohrseeJ~Ik^HcVfl2u^qX&1xqjlWMleDsuiZdhG zm}+BSLgK~pg5FB97RUHWnYoKZn|^4Zc%9ZfM6aoNMapjaYyrn;UPAY2f>(sli# z-QAaqpAAFL=Cb#ooIize@mLCZ*%;wdn^B!=G3NB?HBVCyPZthB;mOe_!v&}1->xi! z>3A*KE{X}LQEd(4^LFb6UP&xhA4}}6U|y4emd{MPPVvnVLN32C6Ro>iIsu?;HQ~ z>dSPHgNf!%CUh{8kFdLX)T2CJ(QE>3YvR z&BZ8xn16u>gg|fE?tr|!>za9=CiQqIfvpal@VS(o*c(KsgtJFdFnJv*L*WH6wfl+F83CHFb zI!A((MUVrcdVg-)dN4DPQ(4cQ6%c(xb@|zo3jdk7>Ex*2GKJq zmL&gi-a>JjP)k9DkC?OXrVYAcvVEp}Mq!MIn!kpVUK#ZAh6tGX|QV@VMCnAb4~)~ z!&~H{3i*~SZ?N-|C{B@p&(s1f#Kp^45DNNFQv%yCWxuZ=ndK;A{A{(e##TuRhWY1S z6!KwHHH-=1{#GDBpqx#sQJR9A>@nw^uskUxJt>hW~umcfkZfHqL#2(znK%L}CSG zPc6~QfpH-#H3X&Mpdvws4c6;<9hQg}+WcDTa=jBY+U8zvC<(*LL;D?z8qd6fzGfO z%j9x(Zw5j(SDd}(i8kjlJbfV6C>%mBg znJ82jrprkFMIf1-WgdNhiW{K=MH{7|zNY+M}Bz#snN8E>B*NNElmX|1C&{WC^ow zmxcW6?Vc*h;;prl{;~%6M!xUn|DhCV!nk;84fmw}{(Ethq^%aw{R%)g5U^OdHb z=D^xr*?P7p-uDaT)wcK0w)4JINFSH!imvQmKJmc-!t3q-)EItVEMBEi}Ma;<5sCy zj12FS9!?CBB}5Wd#cxNCg-mW^g7{8?^Gjz_!8pA{m=d=c@5f346UWt_fS|Vd&whex zK(LTbj!w%9U6~et7!{*q;kgYL91W#}c{6*?m-GtD4dxSDfa4ZBOPT_WW?Op#Lp0^U zH3;?7hgb@WhG!kTr#;=A&o1K3yD*mJetLVubAc|M}vS-(rOP5)LM_hX?_S%6{A z;%{b<-4H6h&unIrDH=SypH>q5O2Z7A*St=(U5jjRGs~L>ZG_XUrkOCXPm3nHI*MYL zk8F44iunf3?Q)8G%{ZVmhzgj>W%{>_tN?k~vYj@3i8b_TDG?_7I-4CvwdCcW0-vm zC0~TJT8zig7J-S1JvutP%9e{Af{HXMeWAHU?HFdchRKXSVxCrhA}WFy(9-rYR%52 z!=QVoE8BE*EI1BI5;CT4r;Ys1sG5JlkJP4op0zwE3l}lf7-fE%~*15lTb)ZpnG! zEu7r4-bY30nj*@kwO;fNzf3b#RIQ>TLCi<;JBlzbr8!cIT-9dnE)45E5P~Bm9i%n%SJM5FP1lH)3IMi?t^S< zobLLFNe>3iztTpZi5Qf(lLzuHN9kc|lU!*Vn+JP8F98Q@W&JpGt_8R@S1D)V62m! zivr9OZh#hNd+rK8e6N_ljk^S>aE7_KWIhU(y_P~u?Q7uvvd1k2CPob2J9PF^O(F z|Agp91mZG*2yz&iCU-wv@RTcRxF_sb<7Jwp9!e?iX-{BVjRe5?#5!c!(PJ#=C0 zKR(3y%|;8@9G{Jg5SBcU>Ei-{h{Ldkg@g0;5z^CP?YY4nC|H5wOdMy(kM_ zN7njEbT+|LvSVr2(r_vsZlt4jLAwt9=0mD&=xx8BV(2y&Af-s){&3Xh&Rv&F{Iox z%Pa0yt%BZu=iu*GMkLG9wrs#b31lAtaw-C&(PWyuOjUpoJI82T}_wJ_zJ>?Rva zZ&@gmB|>Ka&mQc(6r9oYxzsf)m7t))Evo>pN(B%^9EM2y!6iU5y|%4=xe5y|jCH zEt5V7yS!fxVZ8w9U_B^kX;JQoLitgQ^Hb{RNa95&iIP5*T{4s#X0xZ>YVNWALTOLma9zVDQ&rnW}2NTn@Obh;u4T!eDIkmaB z49wMRzuD&7%isudu+FFc!yOZ?+vkpj8;1cYP|2;ovAqHH9SclUcnFHWf!2Jf&{U zJ%ee(H)hLk33%IobIbd#+0iIs&zUE^dsxkQhXykL62)uV@b%~d8mogxBA>YQm6ipGnN1G1e5{Hh%kNSRI5#mHgQ}qe|CB< zro8jxoZQ~lkooh^F5TtJx)rgvaUpE!c^qd%VAMP(eCba6rqjka|C>2J^9D^UqZ zxe=8L{&gAqjCdt2RsCu|&ENf8T{ID1!MaN^f8r=5!C|x(aDJtIEnj<4pGsx6d6F zCA2VUAp9=uM%~_DHil~>mo&NuP>=Jj4eig@BV8y);DKXSh~A=!s+&^u(fai!T@{M zgq?mG*3tRfSZjOSst}T}zuvD?el6RZiXhaitfn1&t0>^wP#U+H*r7{iJ3od*>yA zETB69PF}fOqv5CYnH;5<0N&U^jXeaK)WG+&85e@km|;nhuvCTi(!-u))y{9;PR-yJ2Bx_q?v-~ zxP^W%r*klFa&wNOPJ_6a<^5e!{7~89RyfTq z8^+B4X9A5af1(-cn3=4an!<2%xIMoDGdsIaiW5kGml;OqL1>@QIfKI@SOTdu28h-W z-Lyue(v@@MPqfQA5=GuyrW+ugqFRBq^Cb-;5{P>}q{=qC#@E~?b2{Zsgf<+LjaGX z^{FAd1G z(cI2#VXJGpN55p!=WHQqoO_7d~1| zE1(6_ntiCFI;8234Kn5ky#!gPl40C!M27v)1FrSu_7c zV6wIg7dK2*SrH!}haW48pZ(@lDk|>c-ZblLr2@Kf)ssLS5w#|1kZg^0JE>ud5h zM>azxEP(+^NNfq9pEiI&8%u zOu1Q85?sa!AWd#uGfUi%z2 zEj-@TyqZkWrtQlxaCN-z#i(8w5OTcB>ChhCT6EDOuRb1_UEvs2K?lIe#Q>3T8oM zb=z!ym;hGTR|Fw(b%iYLgD(Z4;3WaNQdBxO^mKp*ymUdRrMgoL9&}Z>DpH8WC@p_ z95qRi-ra>aL1XZ0mF~H-rs)7q5OsdZa(@C25R z7;xK-PDMoyG6sj#%bzdtGtF9J09JN#IA!R#>t*B=TOr#!_@ zH#dSfXsV^Ory~Nwbbc_YzJuTWL|>8?yu{gqLSJ4~tGxl=d=ybgBAVuQOV;Q;uaG zYQ)qsJ3fC~PlO8fd{0)03kH|J^X&WDfVxGwCqlvIO!npar{rfP#&(ThMnZe}GP- zuc0D{;tHO*}r;Y6pBTT{nE0k95%r|yb?N+Fp83*K!Ti z4iVVuI8WtN&8K1&q1Q-K>~2_0#1>c^{0ol21SQm2E3{_haB&Z|oFpkU)MNW@l zYQ9%^6hSDE1lg*mz-T|hgYozXBEBV0{T5Q#0NjwUWcZt;>Xlqe-*(fBT=n~b@4ZV z*v!1E)bgRicp%{37O&Tb2o}5nf4zrQN~&QKvDW+PM3neh{5>7{7t*2J=5OoqdFP zXG|P_D&BeEslS|2^6Jbg?|MEW3xay0lA7#M%p1qL^gLXRuQl-4FdBL>j1l~L=m!8ezl8C@(FymSE9yp;5~LHj+{& zIA;-=rGjD4q?L)9mlZIR?54e+^t|ZhP1HLvTRXfT*(hVDtA^%dv4k&gS-+cVwN0Y@Cu%yX|Pa3@)-P67J9i|yY;kSG}h zxx0g>g-2s((r#K*zu%vIe{f1=vHzk;+5hKV>x@h+|4owubfn{PJK=gi)t~0Y39wv zw>%)%+IWG3aDxo&^z>gEIFEnzg;i7DoIu6OyGmtyrYOmR>hU7=G%X_o~~~Ro*q2? z-|tq>Km(CzxH60qorg;WOT~}*28G^2I=khNz)3kxndUuaDUjN7? znWtIj>&m=oU%KeHE2Jd1>4cJ~2AR!wP*eqRPcWad!aP62#2zQ}882Ah*Q_J+{T z=tM&h-cNST5?P&8OU5lwy>hva^=n&2m^=xS{~vRr8^vI2H9ubEYC z%Ftr=x{BDwh3W!F)8uqyK-WjOSG(^zf4a5_#kEh9L3^tq**)Kn*gsQ**Fmoti>Wl zg;C*XjjI_EE%JzyAtvNp8{WmyP;Na4%u2h!UX3c<3(dAfb)cqi+(r3Ej z!xD?byP!jVz#5l&REa68fN(*O>y7k@{fzhSt3UD}uHEJ>5DjF?Y%o6Hw{Kq8xCL5vnBp?|Mwp?_(VL%hs z3700*!azPNaWRaR47rVA^zt2?a-!x?B_w1e0bQjwA+UdZekFfZKJN5kgl8wRl}y z>@CXL4?^nj@8zjzmN==nvwlSQ3d7l7SDE z6+*s7s>!~ss^<;k59O|b*-_Vumhm%Mm-axYco?AWhRwb^lTXawv0?qpbcza&Ktq zvn8@?7SzOupedoCZ+7LCw6-+OCAcwy9enk-Df%+Xdu3JU|wC4RbQUuOb(llH@gC zf<>|Fm!1{IQ|={D3Ckx)oN)%E^c7nB5xWX)7;!z5KNtCG}9m-(G#o5aRT*M~&V08& zk_KoV(-|=5<&;aq&x=r#3yLe9iHP3^ko_swveJhS^-H1&Pvurr%B3nMd~7q@7PmohWnD=X(RT*%FUHJ602B5G-PslJQ@kc7{ zq2f(Su=n=ZQaB(zyr>tKJ+d?L+7lQ@appZ4l)vO4%;ui8rQpK;~R8_eMTdfUfSBq}bx z1*62l+0rUQ;^}j?QmJ8aa2eyn*v|UH1j*ZA_^Qf0aurwsYv zhK-BZ(+;_sdAEbG2WUAbb49q8{I_#LeZH50)WYtX?S9Uv(PBr5(pD)tfOh^K1OG}wkq%m*|Xdr>smmwV+BijTX;|_ z>f39_mh zrpK8sqmGp)jeR2++sTqaTLR7pe@D4kx}z_G06IajMsR6l_8At2=rfCtR8!T+&f& zFfcj7tflU7))po&ydV88Z(@sk&dWKP8Ndgeog@u7yf7zM?9ZQ_TqwhbN2#h=^;u~D z&<#~ce!jM7+0g{sS_mwSb1w;3%9XA3k0-;hIOT5c5_n}>pn|8?&a{_1rg*L#crbd z8km)Cxyk3tI_O(rTaS_kp7_!0H**V^o!3gtk;j<11i{mbO0giEZ6<8+wuKnMlyS zdO0QfyP1JN)~1@axoB*jJ@&FQS-*l)Y_FD>pWodujtva+xppEU8&h{%ci_cDJ%NV z{rRREDoHX)ToPd%f(v-rAD|MD;^hOU^vL@3%C_859gC}=JH_i6*An1j0V8msv0kr z@7&rbxlJ9JfEjR*3e!*Ra_vKHP3;RNj8vYa^#+hYgQiyaZeZbsJKx0J&o%U!u%4 zqfAe9Do=WVBTcGat-6--@;EQH|Q-6 zlFu;A*f|4yRBQ^|Hv>LpZwt2i-()Z33i?@07*aQ&lUNa)X9c^vXOkLVpvm~DBPiWz zsgIa;m)-J{+aUg=V!hu!NV9j*DN z%P-8kS7T26=LKz25KhLNW+vy)AJlN42~Ta{)l&?#0&U5D2|mga|AD~R8%vj-Ayx2 z;O;-WOOyRZ(i_R!16Vxnw+TWYsOcg}Sd4#P9zSYr)L^3FFm5JtOEcy8HPchDk?%VN zvu|XB@Y80qkL9LjuKAQPs(G9=?el->kMT}Sm+KVr7!O<<5-zS7dS9=kcR%(G729};J;j^&N8@25x zhKZf-Q|ee48zY=l=j4?YI2~rWt=!!V<%;RBT%%>u)g3srYAf?XQ&Wq&t1lTpg7zgA&s9qHX~FCq8q=Vdu=O^-giF^@Q0c zqX;bNsVymw=838222T?auyJGL|vUS2USRMv#X6uEaNfWK|n4xLH({CQuG&<{+a~ z>9-2ysK#y;(!b!{D;-pY5P3u*@+UOy(gbP@5RjpFv?e1Xvix&S0EVX?7ba({Gdd) z+E23IIngQXetfQE_voZY|>)8rBe zKkz+8>-a8yw5=%+drwkLQsIhc5tt7W@AI(5ybE615IfA> zj28Z#&x95u=bAE-_9nbMdg2ylyO?#z#*;y~2?X%zyb!7oCz)^oNi9dzc+bVIRG9ro>(vu)7-|79M+1jXwGo_9_ zKOrDkWjQ0WF-Xn0nb>}7=g+;+8-sbbN|~J+=v@kN8|G!1Pkj|}o3e(V286p1JyQ@PHiO)ckH z1_BB4@C771AD@cq|Gbry&s~0&_w{LdM^baSoILy-qTFG8fM7HzCO%@+AUPezy9j? z0xaNGS8s2BQK2|9<-47b(MXWH?~?v+eNmI&WWj^5-MV4RqV1UC@~5PygnX|Jxd&2 z9I3rv+6tfxh?O;tiwUTnoRwPyCc_k>Pa5d7z)b&MMYIi3d6kM@B%c()7_PWpLpRc zpC43ck^XR6Vy_jdvaI=Ae5@yF8XKmR8qI}IJ=9fTht0QFQEC@UPSquP&-ShpA|ju z{eX876Qb2rx|YN(hL?%Q*{BzK~u zDk&9)OVBX=iao%qAO0Dy2pCrfEv+U?_*i10y&);%5f} zZ}zh)SjUTvIdI2e#S-5+iieHzx1o2hNgQcVd0)o!OO#U>Fdpw&cU5&A?DjCLn4 z-aQIsks1BOq)Ulx)4|1FRk4)c&Hq|dB%e)^0f#b^^jg0dn=Id5$$0Wd9(wC6T0gEbzin=;$p=QhjwnZw0gvtsw5mu9ih|QxlzsK>d4ygH}b#836={3GSN)3-vgoJmpte}LU7l-h zqn6?{s0zIlKyHaiypB6~F<@nMtA23?Qfq!iyuPqIP3XbO0tIKVVTRGsv~FG)LigLP zwkE^r@Cv@l-P>AZ$kkeQ6okMzCPZy++}pkAOYpZ*%f!OY}g)cz9fc033CA8GPAs z!e>>&{;GRh^T8kgaDtMw8Pv!(;9W$5#_DNC_0KAXZo>q_tz|dzyo^3-XqO)lmL^ zZqcltmr!6PG1|QpkGowPNO0cHis&0B6x;J0@;JvCU;YcbalpTm+LWr#&N+z$Xgh| zU-CIN=%nvWKZe&y8?ZVYcS+B)E@FUwv=H2F%(^B9hM6Z)J^KR2vfaU6_u|kfy@V zodAT!1604MucAdWSn^9b0lKXY0z>b zKZ2@N)=9H={*rQP#2r(qTfZuFSS99g;y6i@9_3$SDGLP$HV$w+d*Pzm&!biZr$S+ z^{0)5bHOP!ST?Qe>x%8S%{J=~ZWY;{l{<^9 zP%{4ghC70s@q${F+ByS&TcGP_3sz!NB%jyDF*QQoE^T()RgvE>m~QjzL zdGy5DhJi&t;ub-d*Miip&F*lIwI%21OB#p?dX!FEU#T2p^0+GG%U7R(CQ#r0_D znQNtbR+l9=K-wwh%RTzMG@pCb>=B=bs(9Z7QJ$!v2pweX=a3Mb9BPB{BPYqLk*?X zHQOm%3fQJ&-r4r)5biL^94E{~6t$Wro*hk#{ohsSjcoJ1c*1QRJ8&m_$2x{EgDbL ziEZx%vggd?yefoXwe{nuWx4irlrufvT0uI;R~OJUj{Y~RMBUtr6r-@q9XI3DMMl&= z?uBJKOPh6+*J03yGM$4n>py2L&I&>{f{q2*B&=?=Z0^=W_VJU7jRWQS+yLB8=W;Ew zuG;thk#x4_uoKu0I&EBYSUFsESmE>vroN%SZ_mux9(65%u!QL!eTR3&NjLIGmBc1j zeiWDiE~-RXLhRoLgWMF#GwTa+ZTKBVObkUq77~&cTrz)TQX*YMCpqwd^V&Fv0jT z{kjK(R*-MwhV4oH-mEVR#j!1Ox54Ts3+}Nx+==bP+h0N$v(|yAS!ve^#t>d@q? z;rI_#Yh;Vv4|eMxOg51<AeD>*aCX zP+D4j#BUVFtbgG_Mqj_-p!xz_>e0HeU3hTD14<$Q3ckAY?vxX!cS?jNM?EwBHM~s zMZj{DisP>p_zDhJ8ADe?dIbiD1r?ku*_uWy%tnEm zkgkHQxwNE)A%i#EG&#z-eMj5gJthrYjTJZ>sXNZ@QW5DI7cpjVWdM^})ApqM9E^z@3p3(^~_JI z(sO6q04^UOxD$W*S^MpJPv&{F7Gw&Np7er&fDthQ(-3y?kA;n$G=~%FT?wKFuxH+ z8a57wR{8lTKZN)yAWd8T1t7>3DHss+{@mTi&O1~n9CI(U$h!@5=Z2eM8n8$*JFi z)ye{*1}KW(+KWf4OkxqaQWl)*D9%O7LG=?+n%!7;5Kqy zz4*=c7O1~!mp2h^U)Jq zcn(uQbKFkRhOoEzgxOOtq!_42>nkGA1BSI2LRah?iMh_aQlST^3xFsC(4S~L3QH{& zkPng;djATSgiu4B z(!)SYN^ErXCdhHdk&@aLaxU9#CZ!%O#)wg>AUn=REp+Af%OwJs|9dDR>pi*vagr zp(~!2YmYgU=!h0#qu|=azE*)L)BsHtw|M%#$DEZ@-pwqv9UDAY;f+Ygj(1+^&p3( zbH7@u+TRG_bVeVD2bOP?qR^^8akWqP&PHvkistL9T7+&(tVnlq~>uYE#s1sikrv3 zSx@VUUfug7F;$H3Y-!}=;~$1D_YRgj9Q4$pEi8-Vg)kfI*M*py>ml(vpNMg2qRG@ zVknBForDMuNr)?Zlr$!aqWp0;k}RlD%Vj$EOzu;ZZx)#1P?G!yLgnY_TVOh{Ty227 z(H2ip$GY#yeEo{lmrZ!Gw+>G*UngdPvuAnRA z(+su5!YOXS!AZ!)DNIdY8GFRNkHws;(FP&78_j?S;k#3>); zgJN*)tdz9ErIZ*Bl*-Zem>AF~zUriqGGnf~%tIAJOH!A$SQEUGfM?XWy664W6!h6 z66R<^y|O4$Xb)6vq~w1Rr+gX89uLEiaN-7}k>J#g(kX5PRAZMXJb0N?7fcYTr-*Rg zIu*QQsiy_0p<9PpV6lJV=WFj<)t{dlAQd-LgyVln9yvsu%nu3XRa>|f$crvI!zv> z19OiOsVW$hpc88YV!pdpMXHeS-+C$B?-XJh?G#P3D1&1=f(j(ib zq6Wm>V$vbMfYSHRiAz2}3)ZoW^y6NKlP;gf2oQ+U2li$1J*-J}AHdZx?qhs-Xi53S zxHFyn6-NYT0&NSJ9ElO8LCsSk)HfZ8aT0~j#@ixaK>@eg-oyTMfJ;HqtZ+OY_~2ML zOU*Kepav_XG-Jk(n4@trhJ4V@;X7ju^_CXNxm3!_)JeNf55rXI9~;4!OF)ao_F{ zeRS({T32S^my8HFL32(7uJEF0h zi{0iV2yg5>N`S3%fI}6>xglN+fM+4rF~qUJGcNpN0}7K*3DP~@Mid-FysM3LKUHEE zw%J)1@vqytH+9t}N6>cGTLwz=4^*n(FBjG3_*pER=*5bm-P#=|k-XyYaD;N!p-\$t% z7{9EFhI1^((wcR