diff --git a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy.thy b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy.thy --- a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy.thy +++ b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy.thy @@ -1,327 +1,315 @@ section \Accuracy with cutoff\label{sec:accuracy}\ text \This section verifies that each of the $l$ estimate have the required accuracy with high probability assuming as long as the cutoff is below @{term "q_max"}, generalizing the result from Section~\ref{sec:accuracy_wo_cutoff}.\ theory Distributed_Distinct_Elements_Accuracy imports Distributed_Distinct_Elements_Accuracy_Without_Cutoff Distributed_Distinct_Elements_Cutoff_Level begin unbundle intro_cong_syntax lemma (in semilattice_set) Union: assumes "finite I" "I \ {}" assumes "\i. i \ I \ finite (Z i)" assumes "\i. i \ I \ Z i \ {}" shows "F (\ (Z ` I)) = F ((\i. (F (Z i))) ` I)" using assms(1,2,3,4) proof (induction I rule:finite_ne_induct) case (singleton x) then show ?case by simp next case (insert x I) have "F (\ (Z ` insert x I)) = F ((Z x) \ (\ (Z ` I)))" by simp also have "... = f (F (Z x)) (F (\ (Z ` I)))" using insert by (intro union finite_UN_I) auto also have "... = f (F {F (Z x)}) (F ((\i. F (Z i)) ` I))" using insert(5,6) by (subst insert(4)) auto also have "... = F ({F (Z x)} \ (\i. F (Z i)) ` I)" using insert(1,2) by (intro union[symmetric] finite_imageI) auto also have "... = F ((\i. F (Z i)) ` insert x I)" by simp finally show ?case by simp qed text \This is similar to the existing @{thm [source] hom_Max_commute} with the crucial difference that it works even if the function is a homomorphism between distinct lattices. An example application is @{term "Max (int ` A) = int (Max A)"}.\ lemma hom_Max_commute': assumes "finite A" "A \ {}" assumes "\x y. x \ A \ y \ A \ max (f x) (f y) = f (max x y)" shows "Max (f ` A) = f (Max A)" using assms by (induction A rule:finite_ne_induct) auto context inner_algorithm_fix_A begin definition t\<^sub>c where "t\<^sub>c \ \ = (Max ((\j. \\<^sub>1 \ A \ j + \) ` {..c (* tilde t *) where "s\<^sub>c \ \ = nat (t\<^sub>c \ \)" definition p\<^sub>c (* tilde p *) where "p\<^sub>c \ \ = card {j\ {..\<^sub>1 \ A \ j + \ \ s\<^sub>c \ \}" definition Y\<^sub>c (* tilde A* *) where "Y\<^sub>c \ \ = 2 ^ s\<^sub>c \ \ * \_inv (p\<^sub>c \ \)" lemma s\<^sub>c_eq_s: assumes "(f,g,h) \ sample_pro \" assumes "\ \ s f" shows "s\<^sub>c (f,g,h) \ = s f" proof - have "int (Max (f ` A)) - int b_exp + 9 \ int (Max (f ` A)) - 26 + 9" using b_exp_ge_26 by (intro add_mono diff_left_mono) auto also have "... \ int (Max (f ` A))" by simp finally have 1:"int (Max (f ` A)) - int b_exp + 9 \ int (Max (f ` A))" by simp have "\ \ int (s f)" using assms(2) by simp also have "... = max 0 (t f)" unfolding s_def by simp also have "... \ max 0 (int (Max (f ` A)))" unfolding t_def using 1 by simp also have "... = int (Max (f ` A))" by simp finally have "\ \ int (Max (f ` A))" by simp hence 0: "int \ - 1 \ int (Max (f ` A))" by simp have c:"h \ sample_pro (\ k (C\<^sub>7 * b\<^sup>2) (\ b))" using assms(1) sample_set_\ by auto hence h_range: "h x < b" for x using h_range_1 by simp have "(MAX j\{..\<^sub>1 (f, g, h) A \ j + int \) = (MAX x\{.. A \ h (g a) = x} \ {-1} \ {int \ -1}))" using fin_f[OF assms(1)] by (simp add:max_add_distrib_left max.commute \\<^sub>1_def) also have "... = Max (\x A \ h (g a) = x} \ {- 1} \ {int \ - 1})" using fin_f[OF assms(1)] b_ne by (intro Max.Union[symmetric]) auto also have "... = Max ({int (f a) |a. a \ A} \ {- 1, int \ - 1})" using h_range by (intro arg_cong[where f="Max"]) auto also have "... = max (Max (int ` f ` A)) (int \ - 1)" using A_nonempty fin_A unfolding Setcompr_eq_image image_image by (subst Max.union) auto also have "... = max (int (Max (f ` A))) (int \ - 1)" using fin_A A_nonempty by (subst hom_Max_commute') auto also have "... = int (Max (f ` A))" by (intro max_absorb1 0) finally have "(MAX j\{..\<^sub>1 (f, g, h) A \ j + int \) = Max (f ` A)" by simp thus ?thesis unfolding s\<^sub>c_def t\<^sub>c_def s_def t_def by simp qed lemma p\<^sub>c_eq_p: assumes "(f,g,h) \ sample_pro \" assumes "\ \ s f" shows "p\<^sub>c (f,g,h) \ = p (f,g,h)" proof - have "{j \ {.. max (\\<^sub>0 (f, g, h) A j) (int \ - 1)} = {j \ {.. max (\\<^sub>0 (f, g, h) A j) (- 1)}" using assms(2) unfolding le_max_iff_disj by simp thus ?thesis unfolding p\<^sub>c_def p_def s\<^sub>c_eq_s[OF assms] by (simp add:max_add_distrib_left \\<^sub>1_def del:\\<^sub>0.simps) qed lemma Y\<^sub>c_eq_Y: assumes "(f,g,h) \ sample_pro \" assumes "\ \ s f" shows "Y\<^sub>c (f,g,h) \ = Y (f,g,h)" unfolding Y\<^sub>c_def Y_def s\<^sub>c_eq_s[OF assms] p\<^sub>c_eq_p[OF assms] by simp lemma accuracy_single: "measure \ {\. \\ \ q_max. \Y\<^sub>c \ \ - real X\ > \ * X} \ 1/2^4" (is "?L \ ?R") proof - have "measure \ {\. \\ \ q_max. \Y\<^sub>c \ \ - real X\ > \ * real X} \ measure \ {(f,g,h). \Y (f,g,h) - real X\ > \ * real X \ s f < q_max}" proof (rule pmf_mono) fix \ assume a:"\ \ {\. \\\q_max. \ * real X < \Y\<^sub>c \ \ - real X\}" assume d:"\ \ set_pmf (sample_pro \)" obtain \ where b:"\ \ q_max" and c:" \ * real X < \Y\<^sub>c \ \ - real X\" using a by auto obtain f g h where \_def: "\ = (f,g,h)" by (metis prod_cases3) hence e:"(f,g,h) \ sample_pro \" using d by simp show "\ \ {(f, g, h). \ * real X < \Y (f, g, h) - real X\ \ s f < q_max}" proof (cases "s f \ q_max") case True hence f:"\ \ s f" using b by simp have "\ * real X < \Y \ - real X\" using Y\<^sub>c_eq_Y[OF e f] c unfolding \_def by simp then show ?thesis unfolding \_def by simp next case False then show ?thesis unfolding \_def by simp qed qed also have "... \ 1/2^4" using accuracy_without_cutoff by simp finally show ?thesis by simp qed lemma estimate1_eq: assumes "j < l" shows "estimate1 (\\<^sub>2 \ A \, \) j = Y\<^sub>c (\ j) \" (is "?L = ?R") proof - define t where "t = max 0 (Max ((\\<^sub>2 \ A \ j) ` {.. - \log 2 b\ + 9)" define p where "p = card { k. k \ {.. (\\<^sub>2 \ A \ j k) + \ \ t }" have 0: "int (nat x) = max 0 x" for x by simp have 1: "\log 2 b\ = b_exp" unfolding b_def by simp have "b > 0" using b_min by simp hence 2: " {.. {}" by auto have "t = int (nat (Max ((\\<^sub>2 \ A \ j) ` {.. - b_exp + 9))" unfolding t_def 0 1 by (rule refl) also have "... = int (nat (Max ((\x. x + \) ` (\\<^sub>2 \ A \ j) ` {..\<^sub>1 int,\\<^sub>1 nat,\\<^sub>2(+),\\<^sub>2(-)]" more:hom_Max_commute) (simp_all add:2) also have "... = int (s\<^sub>c (\ j) \)" using assms unfolding s\<^sub>c_def t\<^sub>c_def \\<^sub>2_def image_image by simp finally have 3:"t = int (s\<^sub>c (\ j) \)" by simp have 4: "p = p\<^sub>c (\ j) \" using assms unfolding p_def p\<^sub>c_def 3 \\<^sub>2_def by simp have "?L = 2 powr t * ln (1-p/b) / ln(1-1/b)" unfolding estimate1.simps \_def \\<^sub>3_def by (simp only:t_def p_def Let_def) also have "... = 2 powr (s\<^sub>c (\ j) \) * \_inv p" unfolding 3 \_inv_def by (simp) also have "... = ?R" unfolding Y\<^sub>c_def 3 4 by (simp add:powr_realpow) finally show ?thesis by blast qed lemma estimate_result_1: "measure \ {\. (\\\q_max. \*X < \estimate (\\<^sub>2 \ A \,\)-X\) } \ \/2" (is "?L \ ?R") proof - define I :: "real set" where "I = {x. \x - real X\ \ \*X}" define \ where "\ = measure \ {\. \\\q_max. Y\<^sub>c \ \\I}" have int_I: "interval I" unfolding interval_def I_def by auto have "\ = measure \ {\. \\ \ q_max. \Y\<^sub>c \ \ - real X\ > \ * X}" unfolding \_def I_def by (simp add:not_le) also have "... \ 1 / 2 ^ 4" by (intro accuracy_single) also have "... = 1/ 16" by simp finally have 1:"\ \ 1 / 16" by simp have "(\ + \) \ 1/16 + 1/16" unfolding \_def by (intro add_mono 1) auto also have "... \ 1/8" by simp finally have 2:"(\ + \) \ 1/8" by simp hence 0: "(\ + \) \ 1/2" by simp have "\ \ 0" unfolding \_def by simp hence 3: "\ + \ > 0" by (intro add_nonneg_pos \_gt_0) have "?L = measure \ {\. (\\\q_max. \*X < \median l (estimate1 (\\<^sub>2 \ A \,\))-X\) }" by simp also have "... = measure \ {\. (\\\q_max. median l (estimate1 (\\<^sub>2 \ A \,\)) \ I)}" unfolding I_def by (intro measure_pmf_cong) auto also have "... \ measure \ {\. real(card{i\{..\\q_max. Y\<^sub>c (\ i) \\I)})\ real l/2}" proof (rule pmf_mono) fix \ assume "\ \ set_pmf \" "\ \ {\. \\\q_max. median l (estimate1 (\\<^sub>2 \ A \, \)) \ I}" then obtain \ where \_def: "median l (estimate1 (\\<^sub>2 \ A \, \)) \ I" "\\q_max" by auto - - have "real l = 2 * real l - real l" + hence "real l \ real (2 * card {i. i estimate1 (\\<^sub>2 \ A \, \) i \ I})" + by (intro of_nat_mono median_est_rev[OF int_I]) + also have "... = 2 * real (card {i\{..\<^sub>2 \ A \, \) i \ I})" by simp - also have "... \ 2 * real l - 2 * card {i. i < l \ estimate1 (\\<^sub>2 \ A \, \) i \ I}" - using \_def median_est[OF int_I, where n="l"] not_less - by (intro diff_left_mono Nat.of_nat_mono) (auto simp del:estimate1.simps) - also have "... = 2 * (real (card {.. estimate1 (\\<^sub>2 \ A \, \) i \ I})" - by (simp del:estimate1.simps) - also have "... = 2 * real (card {.. estimate1 (\\<^sub>2 \ A \, \) i \ I})" - by (intro_cong "[\\<^sub>2 (*)]" more:of_nat_diff[symmetric] card_mono) - (auto simp del:estimate1.simps) - also have "... = 2 * real (card ({.. estimate1 (\\<^sub>2 \ A \, \) i \ I}))" - by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat]" more:card_Diff_subset[symmetric]) - (auto simp del:estimate1.simps) - also have "... = 2 * real (card {i\{..\<^sub>2 \ A \, \) i \ I})" - by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat, \\<^sub>1 card]") (auto simp del:estimate1.simps) also have "... = 2 * real (card {i \ {..c (\ i) \ \ I})" using estimate1_eq by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat, \\<^sub>1 card]" more:restr_Collect_cong) auto also have "... \ 2 * real (card {i \ {..\\q_max. Y\<^sub>c (\ i) \ \ I)})" using \_def(2) by (intro mult_left_mono Nat.of_nat_mono card_mono) auto finally have "real l \ 2 * real (card {i \ {..\\q_max. Y\<^sub>c (\ i) \ \ I)})" by simp thus "\ \ {\. real l/2 \ real (card {i \ {..\\q_max. Y\<^sub>c (\ i) \ \ I})}" by simp qed also have "... = measure \ {\. real (card{i\{..\\q_max. Y\<^sub>c (\ i) \\I)}) \ (1/2)*real l}" unfolding p_def by simp also have "... \ exp (- real l * ((1/2) * ln (1 / (\ + \)) - 2 * exp (- 1)))" using 0 unfolding \_def by (intro walk_tail_bound l_gt_0 \_gt_0) auto also have "... = exp (- (real l * ((1/2) * ln (1 / (\ + \)) - 2 * exp (- 1))))" by simp also have "... \ exp (- (real l * ((1/2) * ln 8 - 2 * exp (- 1))))" using 2 3 l_gt_0 by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono diff_mono) (auto simp add:divide_simps) also have "... \ exp (- (real l * (1/4)))" by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono of_nat_0_le_iff) (approximation 5) also have "... \ exp (- (C\<^sub>6 * ln (2/ \)*(1/4)))" by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_right_mono l_lbound) auto also have "... = exp ( - ln (2/ \))" unfolding C\<^sub>6_def by simp also have "... = ?R" using \_gt_0 by (subst ln_inverse[symmetric]) auto finally show ?thesis by simp qed theorem estimate_result: "measure \ {\. \estimate (\ \ A)- X\ > \ * X} \ \" (is "?L \ ?R") proof - let ?P = "measure \" have "?L \ ?P {\. (\\\q_max. \*real X<\estimate (\\<^sub>2 \ A \, \)-real X\)\q \ A> q_max}" unfolding \_def \\<^sub>3_def not_le[symmetric] by (intro pmf_mono) auto also have "...\ ?P {\. (\\\q_max. \*real X<\estimate (\\<^sub>2 \ A \,\)-X\)} + ?P {\. q \ A> q_max}" by (intro pmf_add) auto also have "...\ \/2 + \/2" by (intro add_mono cutoff_level estimate_result_1) also have "... = \" by simp finally show ?thesis by simp qed end lemma (in inner_algorithm) estimate_result: assumes "A \ {.. {}" shows "measure \ {\. \estimate (\ \ A)- real (card A)\ > \ * real (card A)} \ \" (is "?L \ ?R") proof - interpret inner_algorithm_fix_A using assms by unfold_locales auto have "?L = measure \ {\. \estimate (\ \ A)- X\ > \ * X}" unfolding X_def by simp also have "... \ ?R" by (intro estimate_result) finally show ?thesis by simp qed unbundle no_intro_cong_syntax end \ No newline at end of file diff --git a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Outer_Algorithm.thy b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Outer_Algorithm.thy --- a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Outer_Algorithm.thy +++ b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Outer_Algorithm.thy @@ -1,807 +1,795 @@ section \Outer Algorithm\label{sec:outer_algorithm}\ text \This section introduces the final solution with optimal size space usage. Internally it relies on the inner algorithm described in Section~\ref{sec:inner_algorithm}, dependending on the paramaters $n$, $\varepsilon$ and $\delta$ it either uses the inner algorithm directly or if $\varepsilon^{-1}$ is larger than $\ln n$ it runs $\frac{\varepsilon^{-1}}{\ln \ln n}$ copies of the inner algorithm (with the modified failure probability $\frac{1}{\ln n}$) using an expander to select its seeds. The theorems below verify that the probability that the relative accuracy of the median of the copies is too large is below $\varepsilon$.\ theory Distributed_Distinct_Elements_Outer_Algorithm imports Distributed_Distinct_Elements_Accuracy Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators Frequency_Moments.Landau_Ext Landau_Symbols.Landau_More begin unbundle intro_cong_syntax text \The following are non-asymptotic hard bounds on the space usage for the sketches and seeds repsectively. The end of this section contains a proof that the sum is asymptotically in $\mathcal O(\ln( \varepsilon^{-1}) \delta^{-1} + \ln n)$.\ definition "state_space_usage = (\(n,\,\). 2^40 * (ln(1/\)+1)/ \^2 + log 2 (log 2 n + 3))" definition "seed_space_usage = (\(n,\,\). 2^30+2^23*ln n+48*(log 2(1/\)+16)\<^sup>2+336*ln (1/\))" locale outer_algorithm = fixes n :: nat fixes \ :: real fixes \ :: real assumes n_gt_0: "n > 0" assumes \_gt_0: "\ > 0" and \_lt_1: "\ < 1" assumes \_gt_0: "\ > 0" and \_lt_1: "\ < 1" begin definition n\<^sub>0 where "n\<^sub>0 = max (real n) (exp (exp 5))" definition stage_two where "stage_two = (\ < (1/ln n\<^sub>0))" definition \\<^sub>i :: real where "\\<^sub>i = (if stage_two then (1/ln n\<^sub>0) else \)" definition m :: nat where "m = (if stage_two then nat \4 * ln (1/ \)/ln (ln n\<^sub>0)\ else 1)" definition \ where "\ = (if stage_two then (1/ln n\<^sub>0) else 1)" lemma m_lbound: assumes "stage_two" shows "m \ 4 * ln (1/ \)/ln(ln n\<^sub>0)" proof - have "m = real (nat \4 * ln (1 / \) / ln (ln n\<^sub>0)\)" using assms unfolding m_def by simp also have "... \ 4 * ln (1 / \) / ln (ln n\<^sub>0)" by linarith finally show ?thesis by simp qed lemma n_lbound: "n\<^sub>0 \ exp (exp 5)" "ln n\<^sub>0 \ exp 5" "5 \ ln (ln n\<^sub>0)" "ln n\<^sub>0 > 1" "n\<^sub>0 > 1" proof - show 0:"n\<^sub>0 \ exp (exp 5)" unfolding n\<^sub>0_def by simp have "(1::real) \ exp (exp 5)" by (approximation 5) hence "n\<^sub>0 \ 1" using 0 by argo thus 1:"ln n\<^sub>0 \ exp 5" using 0 by (intro iffD2[OF ln_ge_iff]) auto moreover have "1 < exp (5::real)" by (approximation 5) ultimately show 2:"ln n\<^sub>0 > 1" by argo show "5 \ ln (ln n\<^sub>0)" using 1 2 by (subst ln_ge_iff) simp have "(1::real) < exp (exp 5)" by (approximation 5) thus "n\<^sub>0 > 1" using 0 by argo qed lemma \1_gt_0: "0 < \\<^sub>i" using n_lbound(4) \_gt_0 unfolding \\<^sub>i_def by (cases "stage_two") simp_all lemma \1_lt_1: "\\<^sub>i < 1" using n_lbound(4) \_lt_1 unfolding \\<^sub>i_def by (cases "stage_two") simp_all lemma m_gt_0_aux: assumes "stage_two" shows "1 \ ln (1 / \) / ln (ln n\<^sub>0)" proof - have "ln n\<^sub>0 \ 1 / \" using n_lbound(4) using assms unfolding pos_le_divide_eq[OF \_gt_0] stage_two_def by (simp add:divide_simps ac_simps) hence "ln (ln n\<^sub>0) \ ln (1 / \)" using n_lbound(4) \_gt_0 by (intro iffD2[OF ln_le_cancel_iff] divide_pos_pos) auto thus "1 \ ln (1 / \) / ln (ln n\<^sub>0)" using n_lbound(3) by (subst pos_le_divide_eq) auto qed lemma m_gt_0: "m > 0" proof (cases "stage_two") case True have "0 < 4 * ln (1/ \)/ln(ln n\<^sub>0)" using m_gt_0_aux[OF True] by simp also have "... \ m" using m_lbound[OF True] by simp finally have "0 < real m" by simp then show ?thesis by simp next case False then show ?thesis unfolding m_def by simp qed lemma \_gt_0: "\ > 0" using n_lbound(4) unfolding \_def by (cases "stage_two") auto lemma \_le_1: "\ \ 1" using n_lbound(4) unfolding \_def by (cases "stage_two") simp_all sublocale I: inner_algorithm "n" "\\<^sub>i" "\" unfolding inner_algorithm_def using n_gt_0 \_gt_0 \_lt_1 \1_gt_0 \1_lt_1 by auto abbreviation \ where "\ \ \ m \ I.\" lemma \: "m > 0" "\ > 0" using \_gt_0 m_gt_0 by auto type_synonym state = "inner_algorithm.state list" fun single :: "nat \ nat \ state" where "single \ x = map (\j. I.single (pro_select \ \ j) x) [0.. state \ state" where "merge x y = map (\(x,y). I.merge x y) (zip x y)" fun estimate :: "state \ real" where "estimate x = median m (\i. I.estimate (x ! i))" definition \ :: "nat \ nat set \ state" where "\ \ A = map (\i. I.\ (pro_select \ \ i) A) [0..The following three theorems verify the correctness of the algorithm. The term @{term "\"} is a mathematical description of the sketch for a given subset, while @{term "single"}, @{term "merge"} are the actual functions that compute the sketches.\ theorem merge_result: "merge (\ \ A) (\ \ B) = \ \ (A \ B)" (is "?L = ?R") proof - have 0: "zip [0..x. (x,x)) [0..x. I.merge (I.\ (pro_select \ \ x) A) (I.\ (pro_select \ \ x) B)) [0.._def by (simp add:zip_map_map 0 comp_def case_prod_beta) also have "... = map (\x. I.\ (pro_select \ \ x) (A \ B)) [0..]) also have "... = ?R" unfolding \_def by simp finally show ?thesis by simp qed theorem single_result: "single \ x = \ \ {x}" (is "?L = ?R") proof - have "?L = map (\j. I.single (pro_select \ \ j) x) [0.._def by (intro map_cong I.single_result expander_pro_range[OF \]) auto finally show ?thesis by simp qed theorem estimate_result: assumes "A \ {.. {}" defines "p \ (pmf_of_set {..})" shows "measure p {\. \estimate (\ \ A)- real (card A)\ > \ * real (card A)} \ \" (is "?L \ ?R") proof (cases "stage_two") case True define I where "I = {x. \x - real (card A)\ \ \ * real (card A)}" have int_I: "interval I" unfolding interval_def I_def by auto define \ where "\ = measure I.\ {\. I.estimate (I.\ \ A) \ I}" have 0:"\ + \ > 0" unfolding \_def by (intro add_nonneg_pos \_gt_0) auto have "\ \ \\<^sub>i" unfolding \_def I_def using I.estimate_result[OF assms(1,2)] by (simp add: not_le del:I.estimate.simps) also have "... = 1/ln n\<^sub>0" using True unfolding \\<^sub>i_def by simp finally have "\ \ 1/ln n\<^sub>0" by simp hence "\ + \ \ 1/ln n\<^sub>0 + 1/ln n\<^sub>0" unfolding \_def using True by (intro add_mono) auto also have "... = 2/ln n\<^sub>0" by simp finally have 1:"\ + \ \ 2 / ln n\<^sub>0" by simp hence 2:"ln n\<^sub>0 \ 2 / (\ + \)" using 0 n_lbound by (simp add:field_simps) have "\ + \ \ 2/ln n\<^sub>0" by (intro 1) also have "... \ 2/exp 5" using n_lbound by (intro divide_left_mono) simp_all also have "... \ 1/2" by (approximation 5) finally have 3:"\ + \ \ 1/2" by simp have 4: "2 * ln 2 + 8 * exp (- 1) \ (5::real)" by (approximation 5) have "?L = measure p {\. median m (\i. I.estimate (\ \ A ! i)) \ I}" unfolding I_def by (simp add:not_le) also have "... \ measure p {\. real (card {i \ {.. (pro_select \ \ i) A) \ I})\ real m/2}" proof (rule pmf_mono) fix \ assume "\ \ set_pmf p" assume a:"\ \ {\. median m (\i. I.estimate (\ \ A ! i)) \ I}" - have "real m = 2 * real m - real m" + hence "real m \ real (2*card {i. i < m \ I.estimate (\ \ A ! i) \ I})" + by (intro of_nat_mono median_est_rev int_I) auto + also have "... = 2 * real (card {i\{.. \ A ! i) \ I})" by simp - also have "... \ 2 * real m - 2 * card {i. i < m \ I.estimate (\ \ A ! i) \ I}" - using median_est[OF int_I, where n="m"] a - by (intro diff_left_mono Nat.of_nat_mono) - (auto simp add:not_less[symmetric] simp del:I.estimate.simps) - also have "... = 2 * (real (card {.. I.estimate (\ \ A ! i) \ I})" - by (simp del:I.estimate.simps) - also have "... = 2 * real (card {.. I.estimate (\ \ A ! i) \ I})" - by (intro_cong "[\\<^sub>2 (*)]" more:of_nat_diff[symmetric] card_mono) - (auto simp del:I.estimate.simps) - also have "... = 2 * real (card ({.. I.estimate (\ \ A ! i) \ I}))" - by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat]" more:card_Diff_subset[symmetric]) - (auto simp del:I.estimate.simps) - also have "... = 2 * real (card {i\{.. \ A ! i) \ I})" - by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat, \\<^sub>1 card]") (auto simp del:I.estimate.simps) also have "... = 2 * real (card {i \ {.. (pro_select \ \ i) A) \ I})" unfolding \_def by (intro_cong "[\\<^sub>2 (*), \\<^sub>1 of_nat, \\<^sub>1 card]" more:restr_Collect_cong) (simp del:I.estimate.simps) finally have "real m \ 2 * real (card {i \ {.. (pro_select \ \ i) A) \ I})" by simp thus "\ \ {\. real m / 2 \ real (card {i \ {.. (pro_select \ \ i) A) \ I})}" by simp qed also have "...=measure \{\. real(card {i \ {.. (\ i) A) \ I})\(1/2)*real m}" unfolding sample_pro_alt p_def by (simp del:I.estimate.simps) also have "... \ exp (-real m * ((1/2) * ln (1/ (\ + \)) - 2*exp (-1)))" using 3 m_gt_0 \_gt_0 unfolding \_def by (intro walk_tail_bound) force+ also have "... \ exp (-real m * ((1/2) * ln (ln n\<^sub>0 / 2) - 2*exp (-1)))" using 0 2 3 n_lbound by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono mult_left_mono_neg[where c="-real m"] diff_mono mult_left_mono iffD2[OF ln_le_cancel_iff]) (simp_all) also have "... = exp (-real m * (ln (ln n\<^sub>0) / 2 - (ln 2/2 + 2*exp (-1))))" using n_lbound by (subst ln_div) (simp_all add:algebra_simps) also have "... \ exp (-real m * (ln (ln n\<^sub>0) / 2 - (ln (ln (exp(exp 5))) / 4)))" using 4 by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all also have "... \ exp (-real m * (ln (ln n\<^sub>0) / 2 - (ln (ln n\<^sub>0) / 4)))" using n_lbound by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all also have "... = exp (- real m * (ln (ln n\<^sub>0)/ 4) )" by (simp add:algebra_simps) also have "... \ exp (- (4 * ln (1/ \)/ln(ln n\<^sub>0)) * (ln (ln n\<^sub>0)/4))" using m_lbound[OF True] n_lbound by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono divide_nonneg_pos) simp_all also have "... = exp (- ln (1/ \))" using n_lbound by simp also have "... = \" using \_gt_0 by (subst ln_inverse[symmetric]) auto finally show ?thesis by simp next case False have m_eq: "m = 1" unfolding m_def using False by simp hence "?L = measure p {\. \ * real (card A) < \I.estimate (\ \ A ! 0) - real (card A)\}" unfolding estimate.simps m_eq median_def by simp also have "... = measure p {\. \*card A<\I.estimate (I.\ (pro_select \ \ 0) A)-real(card A)\}" unfolding \_def m_eq by (simp del: I.estimate.simps) also have "... = measure \ {\. \*real(card A) < \I.estimate (I.\ (\ 0) A)-real(card A)\}" unfolding sample_pro_alt p_def by (simp del:I.estimate.simps) also have "...= measure (map_pmf (\\. \ 0) \) {\. \*real(card A) < \I.estimate (I.\ \ A)-real(card A)\}" by simp also have "... = measure I.\ {\. \*real(card A) < \I.estimate (I.\ \ A)-real(card A)\}" using m_eq by (subst expander_uniform_property[OF \]) auto also have "... \ \\<^sub>i" by (intro I.estimate_result[OF assms(1,2)]) also have "... = ?R" unfolding \\<^sub>i_def using False by simp finally show ?thesis by simp qed text \The function @{term "encode_state"} can represent states as bit strings. This enables verification of the space usage.\ definition encode_state where "encode_state = Lf\<^sub>e I.encode_state m" lemma encode_state: "is_encoding encode_state" unfolding encode_state_def by (intro fixed_list_encoding I.encode_state) lemma state_bit_count: "bit_count (encode_state (\ \ A)) \ state_space_usage (real n, \, \)" (is "?L \ ?R") proof - have 0: "length (\ \ A) = m" unfolding \_def by simp have "?L = (\x\\ \ A. bit_count (I.encode_state x))" using 0 unfolding encode_state_def fixed_list_bit_count by simp also have "... = (\x\[0.. (pro_select \ \ x) A)))" unfolding \_def by (simp add:comp_def) also have "... \ (\x\[0..\<^sub>i)+ 1)/\\<^sup>2 + log 2 (log 2 (real n) + 3)))" using I.state_bit_count by (intro sum_list_mono I.state_bit_count expander_pro_range[OF \]) also have "... = ereal ( real m * (2^36 *(ln (1/\\<^sub>i)+ 1)/\\<^sup>2 + log 2 (log 2 (real n) + 3)))" unfolding sum_list_triv_ereal by simp also have "... \ 2^40 * (ln(1/\)+1)/ \^2 + log 2 (log 2 n + 3)" (is "?L1 \ ?R1") proof (cases "stage_two") case True have "\4*ln (1/\)/ln(ln n\<^sub>0)\ \ 4*ln (1/\)/ln(ln n\<^sub>0) + 1" by simp also have "... \ 4*ln (1/\)/ln(ln n\<^sub>0) + ln (1/\)/ln(ln n\<^sub>0)" using m_gt_0_aux[OF True] by (intro add_mono) auto also have "... = 5 * ln (1/\)/ln(ln n\<^sub>0)" by simp finally have 3: "\4*ln (1/\)/ln(ln n\<^sub>0)\ \ 5 * ln (1/\)/ln(ln n\<^sub>0)" by simp have 4: "0 \ log 2 (log 2 (real n) + 3)" using n_gt_0 by (intro iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto have 5: "1 / ln 2 + 3 / exp 5 \ exp (1::real)" "1.2 / ln 2 \ (2::real)" by (approximation 5)+ have "log 2(log 2 (real n)+3) \ log 2 (log 2 n\<^sub>0 + 3)" using n_gt_0 by (intro iffD2[OF log_le_cancel_iff] add_mono add_nonneg_pos iffD2[OF zero_le_log_cancel_iff]) (simp_all add:n\<^sub>0_def) also have "... = ln (ln n\<^sub>0 / ln 2 + 3) / ln 2" unfolding log_def by simp also have "... \ ln (ln n\<^sub>0/ln 2 + (3 / exp 5) * ln n\<^sub>0) / ln 2" using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono add_nonneg_pos) (simp_all add:divide_simps) also have "... = ln ( ln n\<^sub>0 * (1 /ln 2 + 3/exp 5)) / ln 2" by (simp add:algebra_simps) also have "... \ ln ( ln n\<^sub>0 * exp 1) / ln 2" using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono mult_left_mono 5 Rings.mult_pos_pos add_pos_nonneg) auto also have "... = (ln (ln n\<^sub>0) + 1) / ln 2" using n_lbound by (subst ln_mult) simp_all also have "... \ (ln (ln n\<^sub>0) + 0.2 * ln (ln n\<^sub>0)) / ln 2" using n_lbound by (intro divide_right_mono add_mono) auto also have "... = (1.2/ ln 2) * ln (ln n\<^sub>0)" by simp also have "... \ 2 * ln (ln n\<^sub>0)" using n_lbound by (intro mult_right_mono 5) simp finally have "log 2(log 2 (real n)+3) \ 2 * ln (ln n\<^sub>0)" by simp hence 6: "log 2(log 2 (real n)+3)/ln(ln n\<^sub>0) \ 2" using n_lbound by (subst pos_divide_le_eq) simp_all have "?L1 = real(nat \4*ln (1/\)/ln(ln n\<^sub>0)\)*(2^36*(ln (ln n\<^sub>0)+1)/\^2+log 2(log 2 (real n)+3))" using True unfolding m_def \\<^sub>i_def by simp also have "... = \4*ln (1/\)/ln(ln n\<^sub>0)\*(2^36*(ln (ln n\<^sub>0)+1)/\^2+log 2(log 2 (real n)+3))" using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all also have "... \ (5*ln (1/\)/ln(ln n\<^sub>0)) *(2^36*(ln (ln n\<^sub>0)+1)/\^2+log 2(log 2 (real n)+3))" using n_lbound(3) \_gt_0 4 by (intro ereal_mono mult_right_mono add_nonneg_nonneg divide_nonneg_pos mult_nonneg_nonneg 3) simp_all also have "... \ (5 * ln (1/\)/ln(ln n\<^sub>0))*((2^36+2^36)*ln (ln n\<^sub>0)/\^2+log 2(log 2 (real n)+3))" using n_lbound \_gt_0 \_lt_1 by (intro ereal_mono mult_left_mono add_mono divide_right_mono divide_nonneg_pos) auto also have "... = 5*(2^37)* ln (1/\)/ \^2 + (5*ln (1/\)) * (log 2(log 2 (real n)+3)/ln(ln n\<^sub>0))" using n_lbound by (simp add:algebra_simps) also have "... \ 5*(2^37)* ln (1/\)/ \^2 + (5*ln(1/ \)) * 2" using \_gt_0 \_lt_1 by (intro add_mono ereal_mono order.refl mult_left_mono 6) auto also have "... = 5*(2^37)* ln (1/\)/ \^2 + 5*2*ln(1/ \) / 1" by simp also have "... \ 5*(2^37)* ln (1/\)/ \^2 + 5*2*ln(1/ \) / \^2" using \_gt_0 \_lt_1 \_gt_0 \_lt_1 by (intro add_mono ereal_mono divide_left_mono Rings.mult_pos_pos power_le_one) auto also have "... = (5*(2^37+2))* (ln (1/\)+0)/ \^2 + 0" by (simp add:algebra_simps) also have "... \ 2^40 * (ln (1 / \)+1) / \^2 + log 2 (log 2 (real n) + 3)" using \_gt_0 \_lt_1 \_gt_0 \_lt_1 n_gt_0 by (intro add_mono ereal_mono divide_right_mono mult_right_mono iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto finally show ?thesis by simp next case False have "?L1 = 2^36 *(ln (1/\)+ 1)/\\<^sup>2 + log 2 (log 2 (real n) + 3)" using False unfolding \\<^sub>i_def m_def by simp also have "... \ ?R1" using \_gt_0 \_lt_1 \_gt_0 \_lt_1 by (intro ereal_mono add_mono divide_right_mono mult_right_mono add_nonneg_nonneg) auto finally show ?thesis by simp qed finally show ?thesis unfolding state_space_usage_def by simp qed text \Encoding function for the seeds which are just natural numbers smaller than @{term "pro_size \"}.\ definition encode_seed where "encode_seed = Nb\<^sub>e (pro_size \)" lemma encode_seed: "is_encoding encode_seed" unfolding encode_seed_def by (intro bounded_nat_encoding) lemma random_bit_count: assumes "\ < pro_size \" shows "bit_count (encode_seed \) \ seed_space_usage (real n, \, \)" (is "?L \ ?R") proof - have 0: "pro_size \ > 0" by (intro pro_size_gt_0) have 1: "pro_size I.\ > 0" by (intro pro_size_gt_0) have "(55+60*ln (ln n\<^sub>0))^3 \ (180+60*ln (ln n\<^sub>0))^3" using n_lbound by (intro power_mono add_mono) auto also have "... = 180^3 * (1+ln (ln n\<^sub>0)/real 3)^3" unfolding power_mult_distrib[symmetric] by simp also have "... \ 180^3 * exp (ln (ln n\<^sub>0))" using n_lbound by (intro mult_left_mono exp_ge_one_plus_x_over_n_power_n) auto also have "... = 180^3 * ln n\<^sub>0" using n_lbound by (subst exp_ln) auto also have "... \ 180^3 * max (ln n) (ln (exp (exp 5)))" using n_gt_0 unfolding n\<^sub>0_def by (subst ln_max_swap) auto also have "... \ 180^3 * (ln n + exp 5)" using n_gt_0 unfolding ln_exp by (intro mult_left_mono) auto finally have 2:"(55+60*ln (ln n\<^sub>0))^3 \ 180^3 * ln n + 180^3*exp 5" by simp have 3:"(1::real)+180^3*exp 5 \ 2^30" "(4::real)/ln 2 + 180^3 \ 2^23" by (approximation 10)+ have "?L = ereal (real (floorlog 2 (pro_size \ - 1)))" using assms unfolding encode_seed_def bounded_nat_bit_count by simp also have "... \ ereal (real (floorlog 2 (pro_size \)))" by (intro ereal_mono Nat.of_nat_mono floorlog_mono) auto also have "... = ereal (1 + of_int \log 2 (real (pro_size \))\)" using 0 unfolding floorlog_def by simp also have "... \ ereal (1 + log 2 (real (pro_size \)))" by (intro add_mono ereal_mono) auto also have "... = 1 + log 2 (real (pro_size I.\) * (2^4) ^ ((m - 1) * nat \ln \ / ln 0.95\))" unfolding expander_pro_size[OF \] by simp also have "... = 1 + log 2 (real (pro_size I.\) * 2^ (4 * (m - 1) * nat \ln \ / ln 0.95\))" unfolding power_mult by simp also have "... = 1 + log 2 (real (pro_size I.\)) + (4*(m-1)* nat\ln \ / ln 0.95\)" using 1 by (subst log_mult) simp_all also have "... \ 1+log 2(2 powr (4*log 2 n + 48 * (log 2 (1/\)+16)\<^sup>2+ (55+60*ln (1/\\<^sub>i))^3))+ (4*(m-1)* nat\ln \ / ln 0.95\)" using 1 by (intro ereal_mono add_mono iffD2[OF log_le_cancel_iff] I.random_bit_count) auto also have "...=1+4*log 2 n+48*(log 2(1/\)+16)\<^sup>2+(55+60*ln (1/\\<^sub>i))^3+(4*(m-1)*nat\ln \/ln 0.95\)" by (subst log_powr_cancel) auto also have "... \ 2^30 + 2^23*ln n+48*(log 2(1/\)+16)\<^sup>2 + 336*ln (1/\)" (is "?L1 \ ?R1") proof (cases "stage_two") case True have "-1 < (0::real)" by simp also have "... \ ln \ / ln 0.95" using \_gt_0 \_le_1 by (intro divide_nonpos_neg) auto finally have 4: "- 1 < ln \ / ln 0.95" by simp have 5: "- 1 / ln 0.95 \ (20::real)" by (approximation 10) have "(4*(m-1)*nat\ln \/ln 0.95\) = 4 * (real m-1) * of_int \ln \/ln 0.95\" using 4 m_gt_0 unfolding of_nat_mult by (subst of_nat_nat) auto also have "... \ 4 * (real m-1) * (ln \/ln 0.95 + 1)" using m_gt_0 by (intro mult_left_mono) auto also have "... = 4 * (real m-1) * (-ln (ln n\<^sub>0)/ln 0.95 + 1)" using n_lbound True unfolding \_def by (subst ln_inverse[symmetric]) (simp_all add:inverse_eq_divide) also have "... = 4 * (real m - 1 ) * (ln (ln n\<^sub>0) * (-1/ln 0.95) + 1)" by simp also have "... \ 4 * (real m - 1 ) * (ln (ln n\<^sub>0) * 20 + 1)" using n_lbound m_gt_0 by (intro mult_left_mono add_mono 5) auto also have "... = 4 * (real (nat \4 * ln (1 / \) / ln (ln n\<^sub>0)\)-1) * (ln (ln n\<^sub>0) * 20 + 1)" using True unfolding m_def by simp also have "... = 4 * (real_of_int \4 * ln (1 / \) / ln (ln n\<^sub>0)\-1) * (ln (ln n\<^sub>0) * 20 + 1)" using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all also have "... \ 4 * (4 * ln (1 / \) / ln (ln n\<^sub>0)) * (ln (ln n\<^sub>0) * 20 + 1)" using n_lbound by (intro mult_left_mono mult_right_mono) auto also have "... \ 4 * (4 * ln (1 / \) / ln (ln n\<^sub>0)) * (ln (ln n\<^sub>0) * 20 + ln (ln n\<^sub>0))" using \_gt_0 \_lt_1 n_lbound by (intro mult_left_mono mult_right_mono add_mono divide_nonneg_pos Rings.mult_nonneg_nonneg) simp_all also have "... = 336 * ln (1 / \)" using n_lbound by simp finally have 6: "4 * (m-1) * nat \ln \/ln 0.95\ \ 336 * ln (1/\)" by simp have "?L1 =1+4*log 2 n+48*(log 2(1/\)+16)\<^sup>2+(55+60*ln (ln n\<^sub>0))^3+(4*(m-1)*nat\ln \/ln 0.95\)" using True unfolding \\<^sub>i_def by simp also have "... \ 1+4*log 2 n+48*(log 2(1/\)+16)\<^sup>2+(180^3 * ln n + 180^3*exp 5) + 336 * ln (1/\)" by (intro add_mono 6 2 ereal_mono order.refl) also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/\)+16)\<^sup>2+ 336 * ln (1/\)" by (simp add:log_def algebra_simps) also have "... \ 2^30 + 2^23*ln n+48*(log 2(1/\)+16)\<^sup>2+ 336 * ln (1/\)" using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono) auto finally show ?thesis by simp next case False hence "1 / \ \ ln n\<^sub>0" using \_gt_0 n_lbound unfolding stage_two_def not_less by (simp add:divide_simps ac_simps) hence 7: "ln (1 / \) \ ln (ln n\<^sub>0)" using n_lbound \_gt_0 \_lt_1 by (intro iffD2[OF ln_le_cancel_iff]) auto have 8: "0 \ 336*ln (1/\) " using \_gt_0 \_lt_1 by auto have "?L1 = 1 + 4 * log 2 (real n) + 48 * (log 2 (1 / \) + 16)\<^sup>2 + (55 + 60 * ln (1 / \)) ^ 3" using False unfolding \\<^sub>i_def m_def by simp also have "... \ 1 + 4 * log 2 (real n) + 48 * (log 2 (1 / \) + 16)\<^sup>2 + (55 + 60 * ln (ln n\<^sub>0))^3" using \_gt_0 \_lt_1 by (intro add_mono order.refl ereal_mono power_mono mult_left_mono add_nonneg_nonneg 7) auto also have "... \ 1+4*log 2(real n)+48*(log 2 (1 / \)+16)\<^sup>2+(180^3*ln (real n) + 180 ^ 3 * exp 5)" by (intro add_mono ereal_mono 2 order.refl) also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/\)+16)\<^sup>2+ 0" by (simp add:log_def algebra_simps) also have "... \ 2^30 + 2^23*ln n+48*(log 2(1/\)+16)\<^sup>2 + 336*ln (1/\)" using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono 8) auto finally show ?thesis by simp qed also have "... = seed_space_usage (real n, \, \)" unfolding seed_space_usage_def by simp finally show ?thesis by simp qed text \The following is an alternative form expressing the correctness and space usage theorems. If @{term "x"} is expression formed by @{term "single"} and @{term "merge"} operations. Then @{term "x"} requires @{term "state_space_usage (real n, \, \)"} bits to encode and @{term "estimate x"} approximates the count of the distinct universe elements in the expression. For example: @{term "estimate (merge (single \ 1) (merge (single \ 5) (single \ 1)))"} approximates the cardinality of @{term "{1::nat, 5, 1}"} i.e. $2$.\ datatype sketch_tree = Single nat | Merge sketch_tree sketch_tree fun eval :: "nat \ sketch_tree \ state" where "eval \ (Single x) = single \ x" | "eval \ (Merge x y) = merge (eval \ x) (eval \ y)" fun sketch_tree_set :: "sketch_tree \ nat set" where "sketch_tree_set (Single x) = {x}" | "sketch_tree_set (Merge x y) = sketch_tree_set x \ sketch_tree_set y" theorem correctness: fixes X assumes "sketch_tree_set t \ {.. pmf_of_set {..}" defines "X \ real (card (sketch_tree_set t))" shows "measure p {\. \estimate (eval \ t) - X\ > \ * X} \ \" (is "?L \ ?R") proof - define A where "A = sketch_tree_set t" have X_eq: "X = real (card A)" unfolding X_def A_def by simp have 0:"eval \ t = \ \ A" for \ unfolding A_def using single_result merge_result by (induction t) (auto simp del:merge.simps single.simps) have 1: "A \ {.. {}" unfolding A_def by (induction t) auto show ?thesis unfolding 0 X_eq p_def by (intro estimate_result 1 2) qed theorem space_usage: assumes "\ < pro_size \" shows "bit_count (encode_state (eval \ t)) \ state_space_usage (real n, \, \)" (is "?A") "bit_count (encode_seed \) \ seed_space_usage (real n, \, \)" (is "?B") proof- define A where "A = sketch_tree_set t" have 0:"eval \ t = \ \ A" for \ unfolding A_def using single_result merge_result by (induction t) (auto simp del:merge.simps single.simps) show ?A unfolding 0 by (intro state_bit_count) show ?B using random_bit_count[OF assms] by simp qed end text \The functions @{term "state_space_usage"} and @{term "seed_space_usage"} are exact bounds on the space usage for the state and the seed. The following establishes asymptotic bounds with respect to the limit $n, \delta^{-1}, \varepsilon^{-1} \rightarrow \infty$.\ context begin text \Some local notation to ease proofs about the asymptotic space usage of the algorithm:\ private definition n_of :: "real \ real \ real \ real" where "n_of = (\(n, \, \). n)" private definition \_of :: "real \ real \ real \ real" where "\_of = (\(n, \, \). \)" private definition \_of :: "real \ real \ real \ real" where "\_of = (\(n, \, \). \)" private abbreviation F :: "(real \ real \ real) filter" where "F \ (at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0)" private lemma var_simps: "n_of = fst" "\_of = (\x. fst (snd x))" "\_of = (\x. snd (snd x))" unfolding n_of_def \_of_def \_of_def by (auto simp add:case_prod_beta) private lemma evt_n: "eventually (\x. n_of x \ n) F" unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_ge_at_top) (simp add:prod_filter_eq_bot) private lemma evt_n_1: "\\<^sub>F x in F. 0 \ ln (n_of x)" by (intro eventually_mono[OF evt_n[of "1"]] ln_ge_zero) simp private lemma evt_n_2: "\\<^sub>F x in F. 0 \ ln (ln (n_of x))" using order_less_le_trans[OF exp_gt_zero] by (intro eventually_mono[OF evt_n[of "exp 1"]] ln_ge_zero iffD2[OF ln_ge_iff]) auto private lemma evt_\: "eventually (\x. 1/\_of x \ \ \ \_of x > 0) F" unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot) private lemma evt_\: "eventually (\x. 1/\_of x \ \ \ \_of x > 0) F" unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot) private lemma evt_\_1: "\\<^sub>F x in F. 0 \ ln (1 / \_of x)" by (intro eventually_mono[OF evt_\[of "1"]] ln_ge_zero) simp theorem asymptotic_state_space_complexity: "state_space_usage \ O[F](\(n, \, \). ln (1/\)/\^2 + ln (ln n))" (is "_ \ O[?F](?rhs)") proof - have 0:"(\x. 1) \ O[?F](\x. ln (1 / \_of x))" using order_less_le_trans[OF exp_gt_zero] by (intro landau_o.big_mono eventually_mono[OF evt_\[of "exp 1"]]) (auto intro!: iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have 1:"(\x. 1) \ O[?F](\x. ln (n_of x))" using order_less_le_trans[OF exp_gt_zero] by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]]) (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have "(\x. ((ln (1/\_of x)+1)* (1/\_of x)\<^sup>2))\ O[?F](\x. ln(1/\_of x)* (1/\_of x)\<^sup>2)" by (intro landau_o.mult sum_in_bigo 0) simp_all hence 2: "(\x. 2^40*((ln (1/\_of x)+1)* (1/\_of x)\<^sup>2))\ O[?F](\x. ln(1/\_of x)* (1/\_of x)\<^sup>2)" unfolding cmult_in_bigo_iff by simp have 3: "(1::real) \ exp 2" by (approximation 5) have "(\x. ln (n_of x) / ln 2 + 3) \ O[?F](\x. ln (n_of x))" using 1 by (intro sum_in_bigo) simp_all hence "(\x. ln (ln (n_of x) / ln 2 + 3)) \ O[?F](\x. ln (ln (n_of x)))" using order_less_le_trans[OF exp_gt_zero] order_trans[OF 3] by (intro landau_ln_2[where a="2"] eventually_mono[OF evt_n[of "exp 2"]]) (auto intro!:iffD2[OF ln_ge_iff] add_nonneg_nonneg divide_nonneg_pos) hence 4: "(\x. log 2 (log 2 (n_of x) + 3))\ O[?F](\x. ln(ln(n_of x)))" unfolding log_def by simp have 5: "\\<^sub>F x in ?F. 0 \ ln (1 / \_of x) * (1 / \_of x)\<^sup>2" by (intro eventually_mono[OF eventually_conj[OF evt_\_1 evt_\[of "1"]]]) auto have "state_space_usage = (\x. state_space_usage (n_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) also have "... = (\x. 2 ^ 40 * ((ln (1 / (\_of x)) + 1)* (1/\_of x)\<^sup>2) + log 2 (log 2 (n_of x)+3))" unfolding state_space_usage_def by (simp add:divide_simps) also have "... \ O[?F](\x. ln (1/\_of x)* (1/\_of x)\<^sup>2 + ln (ln (n_of x)))" by (intro landau_sum 2 4 5 evt_n_2) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def divide_simps) finally show ?thesis by simp qed theorem asymptotic_seed_space_complexity: "seed_space_usage \ O[F](\(n, \, \). ln (1/\)+ln (1/\)^2 + ln n)" (is "_ \ O[?F](?rhs)") proof - have 0: "\\<^sub>F x in ?F. 0 \ (ln (1 / \_of x))\<^sup>2" by simp have 1: "\\<^sub>F x in ?F. 0 \ ln (1 / \_of x) + (ln (1 / \_of x))\<^sup>2" by (intro eventually_mono[OF eventually_conj[OF evt_\_1 0]] add_nonneg_nonneg) auto have 2: "(\x. 1) \ O[?F](\x. ln (1 / \_of x))" using order_less_le_trans[OF exp_gt_zero] by (intro landau_o.big_mono eventually_mono[OF evt_\[of "exp 1"]]) (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have "(\x. 1) \ O[at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\x. ln (n_of x))" using order_less_le_trans[OF exp_gt_zero] by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]]) (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) hence 3: "(\x. 1) \ O[?F](\x. ln (1 / \_of x) + (ln (1 / \_of x))\<^sup>2 + ln (n_of x))" by (intro landau_sum_2 1 evt_n_1 0 evt_\_1) simp have 4: "(\x. ln (n_of x)) \ O[?F](\x. ln (1 / \_of x) + (ln (1 / \_of x))\<^sup>2 + ln (n_of x))" by (intro landau_sum_2 1 evt_n_1) simp have "(\x. log 2 (1 / \_of x) + 16) \ O[?F](\x. ln (1 / \_of x))" using 2 unfolding log_def by (intro sum_in_bigo) simp_all hence 5: "(\x. (log 2 (1 / \_of x) + 16)\<^sup>2) \ O[?F](\x. ln (1/\_of x)+(ln (1/\_of x))\<^sup>2)" using 0 unfolding power2_eq_square by (intro landau_sum_2 landau_o.mult evt_\_1) simp_all have 6: "(\x. (log 2 (1 / \_of x) + 16)\<^sup>2) \ O[?F](\x. ln (1/\_of x)+(ln (1/\_of x))\<^sup>2+ln (n_of x))" by (intro landau_sum_1[OF _ _ 5] 1 evt_n_1) have 7: "(\x. ln (1/\_of x)) \ O[?F](\x. ln (1/\_of x)+(ln (1/\_of x))\<^sup>2+ln (n_of x))" by (intro landau_sum_1 1 evt_\_1 0 evt_n_1) simp have "seed_space_usage = (\x. seed_space_usage (n_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) also have "... = (\x. 2^30+2^23*ln (n_of x)+48*(log 2 (1/(\_of x))+16)\<^sup>2 + 336 * ln (1 / \_of x))" unfolding seed_space_usage_def by (simp add:divide_simps) also have "... \ O[?F](\x. ln (1/\_of x)+ln (1/\_of x)^2 + ln (n_of x))" using 3 4 6 7 by (intro sum_in_bigo) simp_all also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) finally show ?thesis by simp qed definition "space_usage x = state_space_usage x + seed_space_usage x" theorem asymptotic_space_complexity: "space_usage \ O[at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\(n, \, \). ln (1/\)/\^2 + ln n)" proof - let ?f1 = "(\x. ln (1/\_of x)*(1/\_of x^2)+ln (ln (n_of x)))" let ?f2 = "(\x. ln(1/\_of x)+ln(1/\_of x)^2+ln (n_of x))" have 0: "\\<^sub>F x in F. 0 \ (1 / (\_of x)\<^sup>2)" unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_inv) (simp_all add:prod_filter_eq_bot eventually_nonzero_simps) have 1: "\\<^sub>F x in F. 0 \ ln (1 / \_of x) * (1 / (\_of x)\<^sup>2)" by (intro eventually_mono[OF eventually_conj[OF evt_\_1 0]] mult_nonneg_nonneg) auto have 2: "\\<^sub>F x in F. 0 \ ln (1 / \_of x) * (1 / (\_of x)\<^sup>2) + ln (ln (n_of x))" by (intro eventually_mono[OF eventually_conj[OF 1 evt_n_2]] add_nonneg_nonneg) auto have 3: "\\<^sub>F x in F. 0 \ ln (1 / (\_of x)\<^sup>2)" unfolding power_one_over[symmetric] by (intro eventually_mono[OF evt_\[of "1"]] ln_ge_zero) simp have 4: "\\<^sub>F x in F. 0 \ ln (1 / \_of x) + (ln (1 / \_of x))\<^sup>2 + ln (n_of x)" by (intro eventually_mono[OF eventually_conj[OF evt_n_1 eventually_conj[OF evt_\_1 3]]] add_nonneg_nonneg) auto have 5: "(\_. 1) \ O[F](\x. 1 / (\_of x)\<^sup>2)" unfolding var_simps by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:power_divide prod_filter_eq_bot) have 6: "(\_. 1) \ O[F](\x. ln (1 / \_of x))" unfolding var_simps by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:prod_filter_eq_bot) have 7: "state_space_usage \ O[F](\x. ln (1 / \_of x) * (1 / (\_of x)\<^sup>2) + ln (ln (n_of x)))" using asymptotic_state_space_complexity unfolding \_of_def \_of_def n_of_def by (simp add:case_prod_beta') have 8: "seed_space_usage \ O[F](\x. ln (1 / \_of x) + (ln (1 / \_of x))\<^sup>2 + ln (n_of x))" using asymptotic_seed_space_complexity unfolding \_of_def \_of_def n_of_def by (simp add:case_prod_beta') have 9: "(\x. ln (n_of x)) \ O[F](\x. ln (1 / \_of x) * (1 / (\_of x)\<^sup>2) + ln (n_of x))" by (intro landau_sum_2 evt_n_1 1) simp have "(\x. (ln (1 / \_of x))\<^sup>2) \ O[F](\x. 1 / \_of x^2)" unfolding var_simps by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:power_divide prod_filter_eq_bot) hence 10: "(\x. (ln (1 / \_of x))\<^sup>2) \ O[F](\x. ln (1 / \_of x) * (1 / \_of x^2) + ln (n_of x))" by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1' 6) have 11: "(\x. ln (1 / \_of x)) \ O[F](\x. ln (1 / \_of x) * (1 / \_of x^2) + ln (n_of x))" by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1 5) simp have 12: "(\x. ln (1/\_of x) * (1/\_of x^2)) \ O[F](\x. ln (1/\_of x)*(1/\_of x^2)+ln (n_of x))" by (intro landau_sum_1 1 evt_n_1) simp have "(\x. ln (ln (n_of x))) \ O[F](\x. ln (n_of x))" unfolding var_simps by (intro bigo_prod_1 bigo_prod_2) (simp_all add:prod_filter_eq_bot) hence 13: "(\x. ln (ln (n_of x))) \ O[F](\x. ln (1 / \_of x) * (1 / \_of x^2) + ln (n_of x))" by (intro landau_sum_2 evt_n_1 1) have "space_usage = (\x. state_space_usage x + seed_space_usage x)" unfolding space_usage_def by simp also have "... \ O[F](\x. ?f1 x + ?f2 x)" by (intro landau_sum 2 4 7 8) also have "... \ O[F](\x. ln (1 / \_of x) * (1/\_of x^2) + ln (n_of x))" by (intro landau_o.big.subsetI sum_in_bigo 9 10 11 12 13) also have "... = O[F](\(n, \, \). ln (1/\)/\^2 + ln n)" unfolding \_of_def \_of_def n_of_def by (simp add:case_prod_beta') finally show ?thesis by simp qed end unbundle no_intro_cong_syntax end diff --git a/thys/Median_Method/Median.thy b/thys/Median_Method/Median.thy --- a/thys/Median_Method/Median.thy +++ b/thys/Median_Method/Median.thy @@ -1,887 +1,885 @@ section \Intervals are Borel measurable\ theory Median imports "HOL-Probability.Probability" "HOL-Library.Multiset" "Universal_Hash_Families.Universal_Hash_Families_More_Independent_Families" begin text \This section contains a proof that intervals are Borel measurable, where an interval is defined as a convex subset of linearly ordered space, more precisely, a set is an interval, if for each triple of points $x < y < z$: If $x$ and $z$ are in the set so is $y$. This includes ordinary intervals like @{term "{a..b}"}, @{term "{a<.. definition interval :: "('a :: linorder) set \ bool" where "interval I = (\x y z. x \ I \ z \ I \ x \ y \ y \ z \ y \ I)" definition up_ray :: "('a :: linorder) set \ bool" where "up_ray I = (\x y. x \ I \ x \ y \ y \ I)" lemma up_ray_borel: assumes "up_ray (I :: (('a :: linorder_topology) set))" shows "I \ borel" proof (cases "closed I") case True then show ?thesis using borel_closed by blast next case False hence b:"\ closed I" by blast have "open I" proof (rule Topological_Spaces.openI) fix x assume c:"x \ I" show "\T. open T \ x \ T \ T \ I" proof (cases "\y. y < x \ y \ I") case True then obtain y where a:"y < x \ y \ I" by blast have "open {y<..}" by simp moreover have "x \ {y<..}" using a by simp moreover have "{y<..} \ I" using a assms(1) by (auto simp: up_ray_def) ultimately show ?thesis by blast next case False hence "I \ {x..}" using linorder_not_less by auto moreover have "{x..} \ I" using c assms(1) unfolding up_ray_def by blast ultimately have "I = {x..}" by (rule order_antisym) moreover have "closed {x..}" by simp ultimately have "False" using b by auto then show ?thesis by simp qed qed then show ?thesis by simp qed definition down_ray :: "('a :: linorder) set \ bool" where "down_ray I = (\x y. y \ I \ x \ y \ x \ I)" lemma down_ray_borel: assumes "down_ray (I :: (('a :: linorder_topology) set))" shows "I \ borel" proof - have "up_ray (-I)" using assms by (simp add: up_ray_def down_ray_def, blast) hence "(-I) \ borel" using up_ray_borel by blast thus "I \ borel" by (metis borel_comp double_complement) qed text \Main result of this section:\ lemma interval_borel: assumes "interval (I :: (('a :: linorder_topology) set))" shows "I \ borel" proof (cases "I = {}") case True then show ?thesis by simp next case False then obtain x where a:"x \ I" by blast have "\y z. y \ I \ {x..} \ y \ z \ z \ I \ {x..}" by (metis assms a interval_def IntE UnE Un_Int_eq(1) Un_Int_eq(2) atLeast_iff nle_le order.trans) hence "up_ray (I \ {x..})" using up_ray_def by blast hence b:"I \ {x..} \ borel" using up_ray_borel by blast have "\y z. y \ I \ {..x} \ z \ y \ z \ I \ {..x}" by (metis assms a interval_def UnE UnI1 UnI2 atMost_iff dual_order.trans linorder_le_cases) hence "down_ray (I \ {..x})" using down_ray_def by blast hence c:"I \ {..x} \ borel" using down_ray_borel by blast have "I = (I \ {x..}) \ (I \ {..x})" using a by fastforce then show ?thesis using b c by (metis sets.Int) qed section \Order statistics are Borel measurable\ text \This section contains a proof that order statistics of Borel measurable random variables are themselves Borel measurable. The proof relies on the existence of branch-free comparison-sort algorithms. Given a sequence length these algorithms perform compare-swap operations on predefined pairs of positions. In particular the result of a comparison does not affect future operations. An example for a branch-free comparison sort algorithm is shell-sort and also bubble-sort without the early exit. The advantage of using such a comparison-sort algorithm is that it can be lifted to work on random variables, where the result of a comparison-swap operation on two random variables @{term"X"} and @{term"Y"} can be represented as the expressions @{term "\\. min (X \) (Y \)"} and @{term "\\. max (X \) (Y \)"}. Because taking the point-wise minimum (resp. maximum) of two random variables is still Borel measurable, and because the entire sorting operation can be represented using such compare-swap operations, we can show that all order statistics are Borel measuable.\ fun sort_primitive where "sort_primitive i j f k = (if k = i then min (f i) (f j) else (if k = j then max (f i) (f j) else f k))" fun sort_map where "sort_map f n = fold id [sort_primitive j i. i <- [0.. 'b :: linorder" shows "j < n \ i < j \ sort_map f n i \ sort_map f n j" proof (induction n arbitrary: i j) case 0 then show ?case by simp next case (Suc n) define g where "g = (\k. fold id [sort_primitive j n. j <- [0..i j. j < n \ i < j \ g k i \ g k j) \ (\l. l < k \ g k l \ g k n)" proof (induction k) case 0 then show ?case using Suc by (simp add:g_def del:sort_map.simps) next case (Suc k) have "g (Suc k) = sort_primitive k n (g k)" by (simp add:g_def) then show ?case using Suc apply (cases "g k k \ g k n") apply (simp add:min_def max_def) using less_antisym apply blast apply (cases "g k n \ g k k") apply (simp add:min_def max_def) apply (metis less_antisym max.coboundedI2 max.orderE) by simp qed hence "\i j. j < Suc n \ i < j \ g n i \ g n j" apply (simp add:k_def) using less_antisym by blast moreover have "sort_map f (Suc n) = g n" by (simp add:sort_map_ind g_def del:sort_map.simps) ultimately show ?case using Suc by (simp del:sort_map.simps) qed lemma sort_map_mono: fixes f :: "nat \ 'b :: linorder" shows "j < n \ i \ j \ sort_map f n i \ sort_map f n j" by (metis sort_map_strict_mono eq_iff le_imp_less_or_eq) lemma sort_map_perm: fixes f :: "nat \ 'b :: linorder" shows "image_mset (sort_map f n) (mset [0..(ts :: ((nat \ 'b) \ nat \ 'b)). \i < n. \j < n. ts = sort_primitive i j)" define t :: "((nat \ 'b) \ nat \ 'b) list" where "t = [sort_primitive j i. i <- [0..x f. is_swap x \ image_mset (x f) (mset_set {0.. 'b :: linorder" assume "is_swap x" then obtain i j where x_def: "x = sort_primitive i j" and i_bound: "i < n" and j_bound:"j < n" using is_swap_def by blast define inv where "inv = mset_set {k. k < n \ k \ i \ k \ j}" have b:"{0.. k \ i \ k \ j} \ {i,j}" apply (rule order_antisym, rule subsetI, simp, blast, rule subsetI, simp) using i_bound j_bound by meson have c:"\k. k \# inv \ (x f) k = f k" by (simp add:x_def inv_def) have "image_mset (x f) inv = image_mset f inv" apply (rule multiset_eqI) using c multiset.map_cong0 by force moreover have "image_mset (x f) (mset_set {i,j}) = image_mset f (mset_set {i,j})" apply (cases "i = j") by (simp add:x_def max_def min_def)+ moreover have "mset_set {0..x \ set t. is_swap x) \ image_mset (fold id t f) (mset [0..x. x \ set t \ is_swap x" apply (simp add:t_def is_swap_def) by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans) ultimately have "image_mset (fold id t f) (mset [0.. ('b :: linorder)" shows "map (sort_map f n) [0.. 'a \ ('b :: {linorder_topology, second_countable_topology})" assumes "n \ 1" assumes "j < n" assumes "\i. i < n \ X i \ measurable M borel" shows "(\x. (sort_map (\i. X i x) n) j) \ measurable M borel" proof - have n_ge_0:"n > 0" using assms by simp define is_swap where "is_swap = (\(ts :: ((nat \ 'b) \ nat \ 'b)). \i < n. \j < n. ts = sort_primitive i j)" define t :: "((nat \ 'b) \ nat \ 'b) list" where "t = [sort_primitive j i. i <- [0.. 'a \ 'b) \ bool" where "meas_ptw = (\f. (\k. k < n \ f k \ borel_measurable M))" have ind_step: "\x (g :: nat \ 'a \ 'b). meas_ptw g \ is_swap x \ meas_ptw (\k \. x (\i. g i \) k)" proof - fix x g assume "meas_ptw g" hence a:"\k. k < n \ g k \ borel_measurable M" by (simp add:meas_ptw_def) assume "is_swap x" then obtain i j where x_def:"x=sort_primitive i j" and i_le:"i < n" and j_le:"j < n" by (simp add:is_swap_def, blast) have "\k. k < n \ (\\. x (\i. g i \) k) \ borel_measurable M" proof - fix k assume "k < n" thus " (\\. x (\i. g i \) k) \ borel_measurable M" apply (simp add:x_def) apply (cases "k = i", simp) using a i_le j_le borel_measurable_min apply blast apply (cases "k = j", simp) using a i_le j_le borel_measurable_max apply blast using a by simp qed thus "meas_ptw (\k \. x (\i. g i \) k)" by (simp add:meas_ptw_def) qed have "(\x \ set t. is_swap x) \ meas_ptw (\ k \. (fold id t (\k. X k \)) k)" proof (induction t rule:rev_induct) case Nil then show ?case using assms by (simp add:meas_ptw_def) next case (snoc x xs) have a:"meas_ptw (\k \. fold (\a. a) xs (\k. X k \) k)" using snoc by simp have b:"is_swap x" using snoc by simp show ?case using ind_step[OF a b] by simp qed moreover have "\x. x \ set t \ is_swap x" apply (simp add:t_def is_swap_def) by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans) ultimately show ?thesis using assms by (simp add:t_def[symmetric] meas_ptw_def) qed text \Main results of this section:\ lemma order_statistics_measurable: fixes X :: "nat \ 'a \ ('b :: {linorder_topology, second_countable_topology})" assumes "n \ 1" assumes "j < n" assumes "\i. i < n \ X i \ measurable M borel" shows "(\x. (sort (map (\i. X i x) [0.. measurable M borel" apply (subst sort_map_eq_sort[symmetric]) using assms by (simp add:order_statistics_measurable_aux del:sort_map.simps) definition median :: "nat \ (nat \ ('a :: linorder)) \ 'a" where "median n f = sort (map f [0.. 'a \ ('b :: {linorder_topology, second_countable_topology})" assumes "n \ 1" assumes "\i. i < n \ X i \ measurable M borel" shows "(\x. median n (\i. X i x)) \ measurable M borel" apply (simp add:median_def) apply (rule order_statistics_measurable[OF assms(1) _ assms(2)]) using assms(1) by force+ section \The Median Method\ text \This section contains the proof for the probability that the median of independent random variables will be in an interval with high probability if the individual variables are in the same interval with probability larger than $\frac{1}{2}$. The proof starts with the elementary observation that the median of a seqeuence with $n$ elements is in an interval $I$ if at least half of them are in $I$. This works because after sorting the sequence the elements that will be in the interval must necessarily form a consecutive subsequence, if its length is larger than $\frac{n}{2}$ the median must be in it. The remainder follows the proof in \<^cite>\\\textsection 2.1\ in "alon1999"\ using the Hoeffding inequality to estimate the probability that at least half of the sequence elements will be in the interval $I$.\ lemma interval_rule: assumes "interval I" assumes "a \ x" "x \ b" assumes "a \ I" assumes "b \ I" shows "x \ I" using assms(1) apply (simp add:interval_def) using assms by blast lemma sorted_int: assumes "interval I" assumes "sorted xs" assumes "k < length xs" "i \ j" "j \ k " assumes "xs ! i \ I" "xs ! k \ I" shows "xs ! j \ I" apply (rule interval_rule[where a="xs ! i" and b="xs ! k"]) using assms by (simp add: sorted_nth_mono)+ lemma mid_in_interval: assumes "2*length (filter (\x. x \ I) xs) > length xs" assumes "interval I" assumes "sorted xs" shows "xs ! (length xs div 2) \ I" proof - have "length (filter (\x. x \ I) xs) > 0" using assms(1) by linarith then obtain v where v_1: "v < length xs" and v_2: "xs ! v \ I" by (metis filter_False in_set_conv_nth length_greater_0_conv) define J where "J = {k. k < length xs \ xs ! k \ I}" have card_J_min: "2*card J > length xs" using assms(1) by (simp add:J_def length_filter_conv_card) consider (a) "xs ! (length xs div 2) \ I" | (b) "xs ! (length xs div 2) \ I \ v > (length xs div 2)" | (c) "xs ! (length xs div 2) \ I \ v < (length xs div 2)" by (metis linorder_cases v_2) thus ?thesis proof (cases) case a then show ?thesis by simp next case b have p:"\k. k \ length xs div 2 \ xs ! k \ I" using b v_2 sorted_int[OF assms(2) assms(3) v_1, where j="length xs div 2"] apply simp by blast - have "card J \ card {Suc (length xs div 2).. card {Suc (length xs div 2).. length xs - (Suc (length xs div 2))" using card_atLeastLessThan by metis hence "length xs \ 2*( length xs - (Suc (length xs div 2)))" using card_J_min by linarith - hence "False" - apply (simp add:nat_distrib) - apply (subst (asm) le_diff_conv2) using b v_1 apply linarith - by simp + hence "False" using b v_1 by auto then show ?thesis by simp next case c - have p:"\k. k \ length xs div 2 \ k < length xs \ xs ! k \ I" - using c v_1 v_2 sorted_int[OF assms(2) assms(3), where i ="v" and j="length xs div 2"] apply simp by blast - have "card J \ card {0..<(length xs div 2)}" - apply (rule card_mono, simp) - apply (rule subsetI, simp add:J_def not_less_eq_eq[symmetric]) - using p linorder_le_less_linear by blast + have "\k. k \ length xs div 2 \ k < length xs \ xs ! k \ I" + using c v_1 v_2 sorted_int[OF assms(2,3), where i ="v" and j="length xs div 2"] + by simp blast + hence "card J \ card {0..<(length xs div 2)}" + unfolding J_def using linorder_le_less_linear by (intro card_mono subsetI) auto hence "card J \ (length xs div 2)" using card_atLeastLessThan by simp then show ?thesis using card_J_min by linarith qed qed lemma median_est: assumes "interval I" assumes "2*card {k. k < n \ f k \ I} > n" shows "median n f \ I" proof - - have a:"{k. k < n \ f k \ I} = {i. i < n \ map f [0.. I}" - apply (rule order_antisym, rule subsetI, simp) - by (rule subsetI, simp, metis add_0 diff_zero nth_map_upt) + have "{k. k < n \ f k \ I} = {i. i < n \ map f [0.. I}" by auto + thus ?thesis using assms unfolding median_def + by (intro mid_in_interval[OF _ assms(1),where xs="sort (map f [0.. I" + shows "2*card {k. k < n \ f k \ I} \ n" +proof (rule ccontr) + assume a: "\(2*card {k. k < n \ f k \ I} \ n)" + + have "2 * n = 2 * card {k. k < n}" by simp + also have "... = 2 * card ({k. k < n \ f k \ I} \ {k. k < n \ f k \ I})" + by (intro arg_cong2[where f="(*)"] refl arg_cong[where f="card"]) auto + also have "... = 2 * card {k. k < n \ f k \ I} + 2 * card {k. k < n \ f k \ I}" + by (subst card_Un_disjoint) auto + also have "... \ n + 2 * card {k. k < n \ f k \ I}" + using median_est[OF assms(1)] assms(2) not_less by (intro add_mono) auto + also have "... < n + n" + using a by (intro add_strict_left_mono) auto + finally show "False" by auto qed lemma prod_pmf_bernoulli_mono: assumes "finite I" assumes "\i. i \ I \ 0 \ f i \ f i \ g i \ g i \ 1" assumes "\x y. x \ A \ (\i \ I. x i \ y i) \ y \ A" shows "measure (Pi_pmf I d (bernoulli_pmf \ f)) A \ measure (Pi_pmf I d (bernoulli_pmf \ g)) A" (is "?L \ ?R") proof - define q where "q i = pmf_of_list [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" for i have wf:"pmf_of_list_wf [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" if "i \ I" for i using assms(2)[OF that] by (intro pmf_of_list_wfI) auto have 0: "bernoulli_pmf (f i) = map_pmf (\x. x = 0) (q i)" (is "?L1 = ?R1") if "i \ I" for i proof - have "0 \ f i" "f i \ 1" using assms(2)[OF that] by auto hence "pmf ?L1 x = pmf ?R1 x" for x unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]] by (cases x;simp_all add:vimage_def) thus ?thesis by (intro pmf_eqI) auto qed have 1: "bernoulli_pmf (g i) = map_pmf (\x. x \ {0,1}) (q i)" (is "?L1 = ?R1") if "i \ I" for i proof - have "0 \ g i" "g i \ 1" using assms(2)[OF that] by auto hence "pmf ?L1 x = pmf ?R1 x" for x unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]] by (cases x;simp_all add:vimage_def) thus ?thesis by (intro pmf_eqI) auto qed have 2: "(\k. x k = 0) \ A \ (\k. x k = 0 \ x k = Suc 0) \ A" for x by (erule assms(3)) auto have "?L = measure (Pi_pmf I d (\i. map_pmf (\x. x = 0) (q i))) A" unfolding comp_def by (simp add:0 cong: Pi_pmf_cong) also have "... = measure (map_pmf ((\) (\x. x = 0)) (Pi_pmf I (if d then 0 else 2) q)) A" by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1)]) auto also have "... = measure (Pi_pmf I (if d then 0 else 2) q) {x. (\k. x k = 0) \ A}" by (simp add:comp_def vimage_def) also have "... \ measure (Pi_pmf I (if d then 0 else 2) q) {x. (\k. x k \ {0,1}) \ A}" using 2 by (intro measure_pmf.finite_measure_mono subsetI) auto also have "... = measure (map_pmf ((\) (\x. x \ {0,1})) (Pi_pmf I (if d then 0 else 2) q)) A" by (simp add:vimage_def comp_def) also have "... = measure (Pi_pmf I d (\i. map_pmf (\x. x \ {0,1}) (q i))) A" by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1), symmetric]) auto also have "... = ?R" unfolding comp_def by (simp add:1 cong: Pi_pmf_cong) finally show ?thesis by simp qed lemma discrete_measure_eqI: assumes "sets M = count_space UNIV" assumes "sets N = count_space UNIV" assumes "countable \" assumes "\x. x \ \ \ emeasure M {x} = emeasure N {x} \ emeasure M {x} \ \" assumes "AE x in M. x \ \" assumes "AE x in N. x \ \" shows "M = N" proof - define E where "E = insert {} ((\x. {x}) ` \)" have 0: "Int_stable E" unfolding E_def by (intro Int_stableI) auto have 1: "countable E" using assms(3) unfolding E_def by simp have "E \ Pow \" unfolding E_def by auto have "emeasure M A = emeasure N A" if A_range: "A \ E" for A using that assms(4) unfolding E_def by auto moreover have "sets M = sets N" using assms(1,2) by simp moreover have "\ \ sets M" using assms(1) by simp moreover have "E \ {}" unfolding E_def by simp moreover have "\E = \" unfolding E_def by simp moreover have "emeasure M a \ \" if "a \ E" for a using that assms(4) unfolding E_def by auto moreover have "sets (restrict_space M \) = Pow \" using assms(1) by (simp add:sets_restrict_space range_inter) moreover have "sets (restrict_space N \) = Pow \" using assms(2) by (simp add:sets_restrict_space range_inter) moreover have "sigma_sets \ E = Pow \" unfolding E_def by (intro sigma_sets_singletons_and_empty assms(3)) ultimately show ?thesis by (intro measure_eqI_restrict_generator[OF 0 _ _ _ _ _ _ assms(5,6) 1]) auto qed text \Main results of this section:\ text \The next theorem establishes a bound for the probability of the median of independent random variables using the binomial distribution. In a follow-up step, we will establish tail bounds for the binomial distribution and corresponding median bounds. This two-step strategy was suggested by Yong Kiam Tan. In a previous version, I only had verified the exponential tail bound (see theorem \verb+median_bound+ below).\ theorem (in prob_space) median_bound_raw: fixes I :: "('b :: {linorder_topology, second_countable_topology}) set" assumes "n > 0" "p \ 0" assumes "interval I" assumes "indep_vars (\_. borel) X {0..i. i < n \ \

(\ in M. X i \ \ I) \ p" shows "\

(\ in M. median n (\i. X i \) \ I) \ 1 - measure (binomial_pmf n p) {..n div 2}" (is "?L \ ?R") proof - let ?pi = "Pi_pmf {..(\ in M. X i \ \ I)" for i have n_ge_1: "n \ 1" using assms(1) by simp have 0: "{k. k < n \ (k < n \ X k \ \ I)} = {k. k < n \ X k \ \ I}" for \ by auto have "countable ({..\<^sub>E (UNIV :: bool set))" by (intro countable_PiE) auto hence countable_ext: "countable (extensional {.. bool) set)" unfolding PiE_def by auto have m0: "I \ sets borel" using interval_borel[OF assms(3)] by simp have m1: "random_variable borel (\x. X k x)" if "k \ {..x. x \ I) \ borel \\<^sub>M (measure_pmf ((bernoulli_pmf \ q) k))" for k using m0 by measurable hence m3: "random_variable (measure_pmf ((bernoulli_pmf \ q) k)) (\x. X k x \ I)" if "k \ {.. q)) (\\. \k\{.. \ I)" by (intro measurable_restrict) auto moreover have "A \ sets (Pi\<^sub>M {..x. measure_pmf (bernoulli_pmf (q x))))" if "A \ extensional {..a \ A. {a})" by auto also have "... = (\a \ A. PiE {..k. {a k}))" using that by (intro arg_cong[where f="Union"] image_cong refl PiE_singleton[symmetric]) auto also have "... \ sets (Pi\<^sub>M {..x. measure_pmf (bernoulli_pmf (q x))))" using that countable_ext countable_subset by (intro sets.countable_Union countable_image image_subsetI sets_PiM_I_finite) auto finally show ?thesis by simp qed hence m5: "id \ (PiM {.. q)) \\<^sub>M (count_space UNIV)" by (intro measurableI) (simp_all add:vimage_def space_PiM PiE_def) ultimately have "random_variable (count_space UNIV) (id \ (\\. \k\{.. \ I))" by (rule measurable_comp) hence m6: "random_variable (count_space UNIV) (\\. \k\{.. \ I)" by simp have indep: "indep_vars (bernoulli_pmf \ q) (\i x. X i x \ I) {0.. space M. (X k x \ I) = \} = measure (bernoulli_pmf (q k)) {\}" if "k < n" for \ k proof (cases "\") case True then show ?thesis unfolding q_def by (simp add:measure_pmf_single) next case False have "{x \ space M. X k x \ I} \ events" using that m0 by (intro measurable_sets_Collect[OF m1]) auto hence "prob {x \ space M. X k x \ I} = 1 - prob {\ \ space M. X k \ \ I}" by (subst prob_neg) auto thus ?thesis using False unfolding q_def by (simp add:measure_pmf_single) qed hence 1: "emeasure M {x \ space M. (X k x \ I) = \} = emeasure (bernoulli_pmf (q k)) {\}" if "k < n" for \ k using that unfolding emeasure_eq_measure measure_pmf.emeasure_eq_measure by simp interpret product_sigma_finite "(bernoulli_pmf \ q)" by standard have "distr M (count_space UNIV) (\\. (\k\{.. \ I)) = distr (distr M (PiM {.. q)) (\\. \k\{.. \ I)) (count_space UNIV) id" by (subst distr_distr[OF m5 m4]) (simp add:comp_def) also have "... = distr (PiM {..i. (distr M ((bernoulli_pmf \ q) i) (\\. X i \ \ I) ))) (count_space UNIV) id" using assms(1) indep atLeast0LessThan by (intro arg_cong2[where f="\x y. distr x y id"] iffD1[OF indep_vars_iff_distr_eq_PiM'] m3) auto also have "... = distr (PiM {.. q)) (count_space UNIV) id" using m3 1 by (intro distr_cong PiM_cong refl discrete_measure_eqI[where \="UNIV"]) (simp_all add:emeasure_distr vimage_def Int_def conj_commute) also have "... = ?pi (bernoulli_pmf \ q)" proof (rule discrete_measure_eqI[where \="extensional {..M {.. q)) {x} = emeasure (Pi\<^sub>M {.. q)) (PiE {..k. {x k}))" using PiE_singleton[OF 4] by simp also have "... = (\iq)) (PiE_dflt {..k. {x k}))" unfolding measure_pmf.emeasure_eq_measure by (subst measure_Pi_pmf_PiE_dflt) (simp_all add:prod_ennreal) also have "... = emeasure (Pi_pmf {..q)) {x}" using 4 by (intro arg_cong2[where f="emeasure"]) (auto simp add:PiE_dflt_def extensional_def) finally have "emeasure (Pi\<^sub>M {.. q)) {x} = emeasure (Pi_pmf {.. q)) {x}" by simp thus ?case using 4 by (subst (1 2) emeasure_distr[OF m5]) (simp_all add:vimage_def space_PiM PiE_def) next case 5 have "AE x in Pi\<^sub>M {.. q). x \ extensional {..\. (\k\{.. \ I)) = ?pi (bernoulli_pmf\q)" by simp have 3: "n < 2 * card {k. k < n \ y k}" if "n < 2 * card {k. k < n \ x k}" "\i. i < n \ x i \ y i" for x y proof - have "2 * card {k. k < n \ x k} \ 2 * card {k. k < n \ y k}" using that(2) by (intro mult_left_mono card_mono) auto thus ?thesis using that(1) by simp qed have 4: "0 \ p \ p \ q i \ q i \ 1" if "i < n" for i unfolding q_def using assms(2,5) that by auto have p_range: "p \ {0..1}" using 4[OF assms(1)] by auto have "?R = 1 - measure_pmf.prob (binomial_pmf n p) {k. 2 * k \ n}" by (intro arg_cong2[where f="(-)"] arg_cong2[where f="measure_pmf.prob"]) auto also have "... = measure (binomial_pmf n p) {k. n < 2 * k}" by (subst measure_pmf.prob_compl[symmetric]) (simp_all add:set_diff_eq not_le) also have "... = measure (?pi (bernoulli_pmf \ (\_. p))) {\. n < 2 * card {k. k < n \ \ k}}" using p_range by (subst binomial_pmf_altdef'[where A="{.. measure (?pi (bernoulli_pmf \ q)) {\. n < 2 * card {k. k < n \ \ k}}" using 3 4 by (intro prod_pmf_bernoulli_mono) auto also have "... = \

(\ in distr M (count_space UNIV) (\\. \k\{.. \ I). n<2*card {k. k < n \ \ k})" unfolding 2 by simp also have "... = \

(\ in M. n < 2*card {k. k < n \ X k \ \ I})" by (subst measure_distr[OF m6]) (simp_all add:vimage_def Int_def conj_commute 0) also have "... \ ?L" using median_est[OF assms(3)] m0 m1 by (intro finite_measure_mono measurable_sets_Collect[OF median_measurable[OF n_ge_1]]) auto finally show "?R \ ?L" by simp qed text \Cumulative distribution of the binomial distribution (contributed by Yong Kiam Tan):\ lemma prob_binomial_pmf_upto: assumes "0 \ p" "p \ 1" shows "measure_pmf.prob (binomial_pmf n p) {..m} = sum (\i. real (n choose i) * p^i * (1 - p) ^(n-i)) {0..m}" by (auto simp: pmf_binomial[OF assms] measure_measure_pmf_finite intro!: sum.cong) text \A tail bound for the binomial distribution using Hoeffding's inequality:\ lemma binomial_pmf_tail: assumes "p \ {0..1}" "real k \ real n * p" shows "measure (binomial_pmf n p) {..k} \ exp (- 2 * real n * (p - real k / n)^2)" (is "?L \ ?R") proof (cases "n = 0") case True then show ?thesis by simp next case False let ?A = "{.. where "\ = (\ix. (of_bool (x i) :: real) \ ?pi))" define \ :: real where "\ = \ - k" (* eps \ 0 <-> k \ mu *) have "\ = (\ix. (of_bool x :: real) \ (map_pmf (\\. \ i) ?pi)))" unfolding \_def by simp also have "... = (\ix. (of_bool x :: real) \ (bernoulli_pmf p)))" by (simp add: Pi_pmf_component) also have "... = real n * p" using assms(1) by simp finally have \_alt: "\ = real n * p" by simp have \_ge_0: "\ \ 0" using assms(2) unfolding \_def \_alt by auto have indep: "prob_space.indep_vars ?pi (\_. borel) (\k \. of_bool (\ k)) {..k \. of_bool (\ k)" "\_.0" "\_.1" \ using indep unfolding \_def by (unfold_locales) simp_all have "?L = measure (map_pmf (\f. card {x \ ?A. f x}) ?pi) {..k}" by (intro arg_cong2[where f="measure_pmf.prob"] binomial_pmf_altdef' assms(1)) auto also have "... = \

(\ in ?pi. (\i i)) \ \ - \)" unfolding \_def by (simp add:vimage_def Int_def) also have "... \ exp (- 2 * \\<^sup>2 / (\i2))" using False by (intro Hoeffding_ineq_le \_ge_0) auto also have "... = ?R" unfolding \_def \_alt by (simp add:power2_eq_square field_simps) finally show ?thesis by simp qed theorem (in prob_space) median_bound: fixes n :: nat fixes I :: "('b :: {linorder_topology, second_countable_topology}) set" assumes "interval I" assumes "\ > 0" assumes "\ \ {0<..<1}" assumes "indep_vars (\_. borel) X {0.. - ln \ / (2 * \\<^sup>2)" assumes "\i. i < n \ \

(\ in M. X i \ \ I) \ 1/2+\" shows "\

(\ in M. median n (\i. X i \) \ I) \ 1-\" proof - have "0 < -ln \ / (2 * \\<^sup>2)" using assms by (intro divide_pos_pos) auto also have "... \ real n" using assms by simp finally have "real n > 0" by simp hence n_ge_0:"n > 0" by simp have d0: "real_of_int \real n / 2\ * 2 / real n \ 1" using n_ge_0 by simp linarith hence d1: "real (nat \real n / 2\) \ real n * (1 / 2)" using n_ge_0 by (simp add:field_simps) also have "... \ real n * (1 / 2 + \)" using assms(2) by (intro mult_left_mono) auto finally have d1: "real (nat \real n / 2\) \ real n * (1 / 2 + \)" by simp have "1/2 + \ \ \

(\ in M. X 0 \ \ I)" using n_ge_0 by (intro assms(6)) also have "... \ 1" by simp finally have d2: "1 / 2 + \ \ 1" by simp have d3: "nat \real n / 2\ = n div 2" by linarith have "1 - \ \ 1 - exp (- 2 * real n * \\<^sup>2)" using assms(2,3,5) by (intro diff_mono order.refl iffD1[OF ln_ge_iff]) (auto simp:field_simps) also have "... \ 1 - exp (- 2 * real n * ((1/2+\) - real (nat \real n/2\) / real n)\<^sup>2)" using d0 n_ge_0 assms(2) by (intro diff_mono order.refl iffD2[OF exp_le_cancel_iff] mult_left_mono_neg power_mono) auto also have "... \ 1 - measure (binomial_pmf n (1/2+\)) {..nat \real n/2\}" using assms(2) d1 d2 by (intro diff_mono order.refl binomial_pmf_tail) auto also have "... = 1 - measure (binomial_pmf n (1/2+\)) {..n div 2}" by (simp add:d3) also have "... \ \

(\ in M. median n (\i. X i \) \ I)" using assms(2) by (intro median_bound_raw n_ge_0 assms(1,4,6) add_nonneg_nonneg) auto finally show ?thesis by simp qed text \This is a specialization of the above to closed real intervals.\ corollary (in prob_space) median_bound_1: assumes "\ > 0" assumes "\ \ {0<..<1}" assumes "indep_vars (\_. borel) X {0.. - ln \ / (2 * \\<^sup>2)" assumes "\i \ {0..(\ in M. X i \ \ ({a..b} :: real set)) \ 1/2+\" shows "\

(\ in M. median n (\i. X i \) \ {a..b}) \ 1-\" using assms(5) by (intro median_bound[OF _ assms(1,2,3,4)]) (auto simp:interval_def) text \This is a specialization of the above, where $\alpha = \frac{1}{6}$ and the interval is described using a mid point @{term "\"} and radius @{term "\"}. The choice of $\alpha = \frac{1}{6}$ implies a success probability per random variable of $\frac{2}{3}$. It is a commonly chosen success probability for Monte-Carlo algorithms (cf. \<^cite>\\\textsection 4\ in "baryossef2002"\ or \<^cite>\\\textsection 1\ in "kane2010"\).\ corollary (in prob_space) median_bound_2: fixes \ \ :: real assumes "\ \ {0<..<1}" assumes "indep_vars (\_. borel) X {0.. -18 * ln \" assumes "\i. i < n \ \

(\ in M. abs (X i \ - \) > \) \ 1/3" shows "\

(\ in M. abs (median n (\i. X i \) - \) \ \) \ 1-\" proof - - have b:"\i. i < n \ space M - {\ \ space M. X i \ \ {\ - \..\ + \}} = {\ \ space M. abs (X i \ - \) > \}" - apply (rule order_antisym, rule subsetI, simp, linarith) - by (rule subsetI, simp, linarith) + have b:"space M - {\ \ space M. X i \ \ {\-\..\+\}} = {\ \ space M. abs (X i \ - \) > \}" for i + by auto have "\i. i < n \ 1 - \

(\ in M. X i \ \ {\- \..\+\}) \ 1/3" + using assms apply (subst prob_compl[symmetric]) - apply (measurable) - using assms(2) apply (simp add:indep_vars_def) - apply (subst b, simp) - using assms(4) by simp + apply (measurable, simp add:indep_vars_def) + by (subst b, simp) hence a:"\i. i < n \ \

(\ in M. X i \ \ {\- \..\+\}) \ 2/3" by simp - have "1-\ \ \

(\ in M. median n (\i. X i \) \ {\-\..\+\})" - apply (rule median_bound_1[OF _ assms(1) assms(2), where \="1/6"], simp) - using assms(3) apply (simp add:power2_eq_square) - using a by simp + have "1-\ \ \

(\ in M. median n (\i. X i \) \ {\-\..\+\})" using a assms(3) + by (intro median_bound_1[OF _ assms(1,2), where \="1/6"]) (simp_all add:power2_eq_square) also have "... = \

(\ in M. abs (median n (\i. X i \) - \) \ \)" - apply (rule arg_cong2[where f="measure"], simp) - apply (rule order_antisym, rule subsetI, simp, linarith) - by (rule subsetI, simp, linarith) + by (intro arg_cong2[where f="measure"] Collect_cong) auto finally show ?thesis by simp qed section \Some additional results about the median\ lemma sorted_mono_map: assumes "sorted xs" assumes "mono f" shows "sorted (map f xs)" - using assms apply (simp add:sorted_wrt_map) - apply (rule sorted_wrt_mono_rel[where P="(\)"]) - by (simp add:mono_def, simp) + using assms(2) unfolding sorted_wrt_map + by (intro sorted_wrt_mono_rel[OF _ assms(1)]) (simp add:mono_def) text \This could be added to @{theory "HOL.List"}:\ lemma map_sort: assumes "mono f" shows "sort (map f xs) = map f (sort xs)" using assms by (intro properties_for_sort sorted_mono_map) auto lemma median_cong: assumes "\i. i < n \ f i = g i" shows "median n f = median n g" - apply (cases "n = 0", simp add:median_def) - apply (simp add:median_def) - apply (rule arg_cong2[where f="(!)"]) - apply (rule arg_cong[where f="sort"], rule map_cong, simp, simp add:assms) - by simp + unfolding median_def using assms + by (intro arg_cong2[where f="(!)"] arg_cong[where f="sort"] map_cong) auto lemma median_restrict: "median n (\i \ {0.. 0" assumes "mono g" shows "g (median n f) = median n (g \ f)" apply (simp add: median_def del:map_map) apply (subst map_map[symmetric]) apply (subst map_sort[OF assms(2)]) apply (subst nth_map, simp) using assms apply fastforce by simp lemma median_rat: assumes "n > 0" shows "real_of_rat (median n f) = median n (\i. real_of_rat (f i))" apply (subst (2) comp_def[where g="f", symmetric]) apply (rule median_commute_mono[OF assms(1)]) by (simp add: mono_def of_rat_less_eq) lemma median_const: assumes "k > 0" shows "median k (\i \ {0.._. a) [0.._. a) [0.._. a) [0..i \ {0.._. a)" by (subst median_restrict, simp) also have "... = a" using assms by (simp add:median_def a) finally show ?thesis by simp qed end