diff --git a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy_Without_Cutoff.thy b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy_Without_Cutoff.thy --- a/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy_Without_Cutoff.thy +++ b/thys/Distributed_Distinct_Elements/Distributed_Distinct_Elements_Accuracy_Without_Cutoff.thy @@ -1,1007 +1,1007 @@ section \Accuracy without cutoff\label{sec:accuracy_wo_cutoff}\ text \This section verifies that each of the $l$ estimate have the required accuracy with high probability assuming that there was no cut-off, i.e., that $s=0$. Section~\ref{sec:accuracy} will then show that this remains true as long as the cut-off is below @{term "t f"} the subsampling threshold.\ theory Distributed_Distinct_Elements_Accuracy_Without_Cutoff imports Distributed_Distinct_Elements_Inner_Algorithm Distributed_Distinct_Elements_Balls_and_Bins begin no_notation Polynomials.var ("X\") locale inner_algorithm_fix_A = inner_algorithm + fixes A assumes A_range: "A \ {.. A" begin definition X :: nat where "X = card A" definition q_max where "q_max = nat (\log 2 X\ - b_exp)" definition t :: "(nat \ nat) \ int" where "t f = int (Max (f ` A)) - b_exp + 9" definition s :: "(nat \ nat) \ nat" where "s f = nat (t f)" definition R :: "(nat \ nat) \ nat set" where "R f = {a. a \ A \ f a \ s f}" definition r :: "nat \ (nat \ nat) \ nat" where "r x f = card {a. a \ A \ f a \ x}" definition p where "p = (\(f,g,h). card {j\ {..\<^sub>1 (f,g,h) A 0 j \ s f})" definition Y where "Y = (\(f,g,h). 2 ^ s f * \_inv (p (f,g,h)))" lemma fin_A: "finite A" using A_range finite_nat_iff_bounded by auto lemma X_le_n: "X \ n" proof - have "card A \ card {.. 1" unfolding X_def using fin_A A_nonempty by (simp add: leI) lemma of_bool_square: "(of_bool x)\<^sup>2 = ((of_bool x)::real)" by (cases x, auto) lemma r_eq: "r x f = (\ a \ A.( of_bool( x \ f a) :: real))" unfolding r_def of_bool_def sum.If_cases[OF fin_A] by (simp add: Collect_conj_eq) lemma shows r_exp: "(\\. real (r x \) \ \\<^sub>1) = real X * (of_bool (x \ max (nat \log 2 n\) 1) / 2^x)" and r_var: "measure_pmf.variance \\<^sub>1 (\\. real (r x \)) \ (\\. real (r x \) \ \\<^sub>1)" proof - define V :: "nat \ (nat \ nat) \ real" where "V = (\a f. of_bool (x \ f a))" have V_exp: "(\\. V a \ \\\<^sub>1) = of_bool (x \ max (nat \log 2 n\) 1)/2^x" (is "?L = ?R") if "a \ A" for a proof - have a_le_n: "a < n" using that A_range by auto have "?L = (\\. indicator {f. x \ f a} \ \ \\<^sub>1)" unfolding V_def by (intro integral_cong_AE) auto also have "... = measure (map_pmf (\\. \ a) (sample_pmf \\<^sub>1)) {f. x \ f}" by simp also have "... = measure (\ n_exp) {f. x \ f}" unfolding \\<^sub>1.single[OF a_le_n] by simp also have "... = of_bool (x \ max (nat \log 2 n\) 1)/2^x" unfolding \_prob n_exp_def by simp finally show ?thesis by simp qed have b:"(\\. real (r x \) \ \\<^sub>1) = (\ a \ A. (\\. V a \ \\\<^sub>1))" unfolding r_eq V_def using \\<^sub>1.sample_space by (intro Bochner_Integration.integral_sum) auto also have "... = (\ a \ A. of_bool (x \ max (nat \log 2 n\) 1)/2^x)" using V_exp by (intro sum.cong) auto also have "... = X * (of_bool (x \ max (nat \log 2 n\) 1) / 2^x)" using X_def by simp finally show "(\\. real (r x \) \ \\<^sub>1) = real X * (of_bool (x \ max (nat \log 2 n\) 1)/ 2^x)" by simp have "(\\. (V a \)^2 \ \\<^sub>1) = (\\. V a \ \ \\<^sub>1)" for a unfolding V_def of_bool_square by simp hence a:"measure_pmf.variance \\<^sub>1 (V a) \ measure_pmf.expectation \\<^sub>1 (V a)" for a using \\<^sub>1.sample_space by (subst measure_pmf.variance_eq) auto have "J \ A \ card J = 2 \ prob_space.indep_vars \\<^sub>1 (\_. borel) V J" for J unfolding V_def using A_range finite_subset[OF _ fin_A] by (intro prob_space.indep_vars_compose2[where Y="\i y. of_bool(x \ y)" and M'="\_. discrete"] prob_space.k_wise_indep_vars_subset[OF _ \\<^sub>1.indep]) (auto simp:prob_space_measure_pmf) hence "measure_pmf.variance \\<^sub>1 (\\. real (r x \)) = (\ a \ A. measure_pmf.variance \\<^sub>1 (V a))" unfolding r_eq V_def using \\<^sub>1.sample_space - by (intro measure_pmf.var_sum_pairwise_indep_2 fin_A) (simp_all) + by (intro measure_pmf.bienaymes_identity_pairwise_indep_2 fin_A) (simp_all) also have "... \ (\ a \ A. (\\. V a \ \ \\<^sub>1))" by (intro sum_mono a) also have "... = (\\. real (r x \) \ \\<^sub>1)" unfolding b by simp finally show "measure_pmf.variance \\<^sub>1 (\\. real (r x \)) \ (\\. real (r x \) \ \\<^sub>1)" by simp qed definition E\<^sub>1 where "E\<^sub>1 = (\(f,g,h). 2 powr (-t f) * X \ {b/2^16..b/2})" lemma t_low: "measure \\<^sub>1 {f. of_int (t f) < log 2 (real X) + 1 - b_exp} \ 1/2^7" (is "?L \ ?R") proof (cases "log 2 (real X) \ 8") case True define Z :: "(nat \ nat) \ real" where "Z = r (nat \log 2 (real X) - 8\)" have "log 2 (real X) \ log 2 (real n)" using X_le_n X_ge_1 by (intro log_mono) auto hence "nat \log 2 (real X) - 8\ \ nat \log 2 (real n)\" by (intro nat_mono ceiling_mono) simp hence a:"(nat \log 2 (real X) - 8\ \ max (nat \log 2 (real n)\) 1)" by simp have b:"real (nat (\log 2 (real X)\ - 8)) \ log 2 (real X) - 7" using True by linarith have "2 ^ 7 = real X / (2 powr (log 2 X) * 2 powr (-7))" using X_ge_1 by simp also have "... = real X / (2 powr (log 2 X - 7))" by (subst powr_add[symmetric]) simp also have "... \ real X / (2 powr (real (nat \log 2 (real X) - 8\)))" using b by (intro divide_left_mono powr_mono) auto also have "... = real X / 2 ^ nat \log 2 (real X) - 8\" by (subst powr_realpow) auto finally have "2 ^ 7 \ real X / 2 ^ nat \log 2 (real X) - 8\" by simp hence exp_Z_gt_2_7: "(\\. Z \ \\\<^sub>1) \ 2^7" using a unfolding Z_def r_exp by simp have var_Z_le_exp_Z: "measure_pmf.variance \\<^sub>1 Z \ (\\. Z \ \\\<^sub>1)" unfolding Z_def by (intro r_var) have "?L \ measure \\<^sub>1 {f. of_nat (Max (f ` A)) < log 2 (real X) - 8}" unfolding t_def by (intro pmf_mono) (auto simp add:int_of_nat_def) also have "... \ measure \\<^sub>1 {f \ space \\<^sub>1. (\\. Z \ \\\<^sub>1) \ \Z f - (\\. Z \ \\\<^sub>1) \}" proof (rule pmf_mono) fix f assume "f \ set_pmf (sample_pmf \\<^sub>1)" have fin_f_A: "finite (f ` A)" using fin_A finite_imageI by blast assume " f \ {f. real (Max (f ` A)) < log 2 (real X) - 8}" hence "real (Max (f ` A)) < log 2 (real X) - 8" by auto hence "real (f a) < log 2 (real X) - 8" if "a \ A" for a using Max_ge[OF fin_f_A] imageI[OF that] order_less_le_trans by fastforce hence "of_nat (f a) < \log 2 (real X) - 8\" if "a \ A" for a using that by (subst less_ceiling_iff) auto hence "f a < nat \log 2 (real X) - 8\" if "a \ A" for a using that True by fastforce hence "r (nat \log 2 (real X) - 8\) f = 0" unfolding r_def card_eq_0_iff using not_less by auto hence "Z f = 0" unfolding Z_def by simp thus "f \ {f \ space \\<^sub>1. (\\. Z \ \\\<^sub>1) \ \Z f - (\\. Z \ \\\<^sub>1)\}" by auto qed also have "... \ measure_pmf.variance \\<^sub>1 Z / (\\. Z \ \\\<^sub>1)^2" using exp_Z_gt_2_7 \\<^sub>1.sample_space by (intro measure_pmf.second_moment_method) simp_all also have "... \ (\\. Z \ \\\<^sub>1) / (\\. Z \ \\\<^sub>1)^2" by (intro divide_right_mono var_Z_le_exp_Z) simp also have "... = 1 / (\\. Z \ \\\<^sub>1)" using exp_Z_gt_2_7 by (simp add:power2_eq_square) also have "... \ ?R" using exp_Z_gt_2_7 by (intro divide_left_mono) auto finally show ?thesis by simp next case "False" have "?L \ measure \\<^sub>1 {f. of_nat (Max (f ` A)) < log 2 (real X) - 8}" unfolding t_def by (intro pmf_mono) (auto simp add:int_of_nat_def) also have "... \ measure \\<^sub>1 {}" using False by (intro pmf_mono) simp also have "... = 0" by simp also have "... \ ?R" by simp finally show ?thesis by simp qed lemma t_high: "measure \\<^sub>1 {f. of_int (t f) > log 2 (real X) + 16 - b_exp} \ 1/2^7" (is "?L \ ?R") proof - define Z :: "(nat \ nat) \ real" where "Z = r (nat \log 2 (real X) + 8\)" have Z_nonneg: "Z f \ 0" for f unfolding Z_def r_def by simp have "(\\. Z \ \\\<^sub>1) \ real X / (2 ^ nat \log 2 (real X) + 8\)" unfolding Z_def r_exp by simp also have "... \ real X / (2 powr (real (nat \log 2 (real X) + 8\)))" by (subst powr_realpow) auto also have "... \ real X / (2 powr \log 2 (real X) + 8\)" by (intro divide_left_mono powr_mono) auto also have "... \ real X / (2 powr (log 2 (real X) + 7))" by (intro divide_left_mono powr_mono, linarith) auto also have "... = real X / 2 powr (log 2 (real X)) / 2 powr 7" by (subst powr_add) simp also have "... \ 1/2 powr 7" using X_ge_1 by (subst powr_log_cancel) auto finally have Z_exp: "(\\. Z \ \\\<^sub>1) \ 1/2^7" by simp have "?L \ measure \\<^sub>1 {f. of_nat (Max (f ` A)) > log 2 (real X) + 7}" unfolding t_def by (intro pmf_mono) (auto simp add:int_of_nat_def) also have "... \ measure \\<^sub>1 {f. Z f \ 1}" proof (rule pmf_mono) fix f assume "f \ set_pmf (sample_pmf \\<^sub>1)" assume " f \ {f. real (Max (f ` A)) > log 2 (real X) + 7}" hence "real (Max (f ` A)) > log 2 (real X) + 7" by simp hence "int (Max (f ` A)) \ \log 2 (real X) + 8\" by linarith hence "Max (f ` A) \ nat \log 2 (real X) + 8\" by simp moreover have "f ` A \ {}" "finite (f ` A)" using fin_A finite_imageI A_nonempty by auto ultimately obtain fa where "fa \ f ` A" " fa \ nat \log 2 (real X) + 8\" using Max_in by auto then obtain ae where ae_def: "ae \ A" "nat \log 2 (real X) + 8\ \ f ae" by auto hence "r (nat \log 2 (real X) + 8\) f > 0" unfolding r_def card_gt_0_iff using fin_A by auto hence "Z f \ 1" unfolding Z_def by simp thus "f \ {f. 1 \ Z f}" by simp qed also have "... \ (\\. Z \ \\\<^sub>1) / 1" using Z_nonneg using \\<^sub>1.sample_space by (intro pmf_markov) auto also have "... \ ?R" using Z_exp by simp finally show ?thesis by simp qed lemma e_1: "measure \ {\. \E\<^sub>1 \} \ 1/2^6" proof - have "measure \\<^sub>1 {f. 2 powr (of_int (-t f)) * real X \ {real b/2^16..real b/2}} \ measure \\<^sub>1 {f. 2 powr (of_int (-t f)) * real X < real b/2^16} + measure \\<^sub>1 {f. 2 powr (of_int (-t f)) * real X > real b/2}" by (intro pmf_add) auto also have "... \ measure \\<^sub>1 {f. of_int (t f) > log 2 X + 16 - b_exp} + measure \\<^sub>1 {f. of_int (t f) < log 2 X + 1 - b_exp}" proof (rule add_mono) show "measure \\<^sub>1 {f. 2 powr (of_int (-t f)) * real X < real b/2^16} \ measure \\<^sub>1 {f. of_int (t f) > log 2 X + 16 - b_exp}" proof (rule pmf_mono) fix f assume "f \ {f. 2 powr real_of_int (-t f) * real X < real b / 2 ^ 16}" hence "2 powr real_of_int (-t f) * real X < real b / 2 ^ 16" by simp hence "log 2 (2 powr of_int (-t f) * real X) < log 2 (real b / 2^16)" using b_min X_ge_1 by (intro iffD2[OF log_less_cancel_iff]) auto hence "of_int (-t f) + log 2 (real X) < log 2 (real b / 2^16)" using X_ge_1 by (subst (asm) log_mult) auto also have "... = real b_exp - log 2 (2 powr 16)" unfolding b_def by (subst log_divide) auto also have "... = real b_exp - 16" by (subst log_powr_cancel) auto finally have "of_int (-t f) + log 2 (real X) < real b_exp - 16" by simp thus "f \ {f. of_int (t f) > log 2 (real X) + 16 - b_exp}" by simp qed next show "measure \\<^sub>1 {f. 2 powr of_int (-t f) * real X > real b/2} \ measure \\<^sub>1 {f. of_int (t f) < log 2 X + 1 - b_exp}" proof (rule pmf_mono) fix f assume "f \ {f. 2 powr real_of_int (-t f) * real X > real b / 2}" hence "2 powr real_of_int (-t f) * real X > real b / 2" by simp hence "log 2 (2 powr of_int (-t f) * real X) > log 2 (real b / 2)" using b_min X_ge_1 by (intro iffD2[OF log_less_cancel_iff]) auto hence "of_int (-t f) + log 2 (real X) > log 2 (real b / 2)" using X_ge_1 by (subst (asm) log_mult) auto hence "of_int (-t f) + log 2 (real X) > real b_exp - 1" unfolding b_def by (subst (asm) log_divide) auto hence "of_int (t f) < log 2 (real X) + 1 - b_exp" by simp thus "f \ {f. of_int (t f) < log 2 (real X) + 1 - b_exp}" by simp qed qed also have "... \ 1/2^7 + 1/2^7" by (intro add_mono t_low t_high) also have "... = 1/2^6" by simp finally have "measure \\<^sub>1 {f. 2 powr of_int (-t f) * real X \ {real b/2^16..real b/2}} \ 1/2^6" by simp thus ?thesis unfolding sample_pmf_\ E\<^sub>1_def case_prod_beta by (subst pair_pmf_prob_left) qed definition E\<^sub>2 where "E\<^sub>2 = (\(f,g,h). \card (R f) - X / 2^(s f)\ \ \/3 * X / 2^(s f))" lemma e_2: "measure \ {\. E\<^sub>1 \ \ \E\<^sub>2 \} \ 1/2^6" (is "?L \ ?R") proof - define t\<^sub>m :: int where "t\<^sub>m = \log 2 (real X)\ + 16 - b_exp" have t_m_bound: "t\<^sub>m \ \log 2 (real X)\ - 10" unfolding t\<^sub>m_def using b_exp_ge_26 by simp have "real b / 2^16 = (real X * (1/ X)) * (real b / 2^16)" using X_ge_1 by simp also have "... = (real X * 2 powr (-log 2 X)) * (real b / 2^16)" using X_ge_1 by (subst powr_minus_divide) simp also have "... \ (real X * 2 powr (- \log 2 (real X)\)) * (2 powr b_exp / 2^16)" unfolding b_def using powr_realpow by (intro mult_mono powr_mono) auto also have "... = real X * (2 powr (- \log 2 (real X)\) * 2 powr(real b_exp-16))" by (subst powr_diff) simp also have "... = real X * 2 powr (- \log 2 (real X)\ + (int b_exp - 16))" by (subst powr_add[symmetric]) simp also have "... = real X * 2 powr (-t\<^sub>m)" unfolding t\<^sub>m_def by (simp add:algebra_simps) finally have c:"real b / 2^16 \ real X * 2 powr (-t\<^sub>m)" by simp define T :: "nat set" where "T = {x. (real X / 2^x \ real b / 2^16)}" have "x \ T \ int x \ t\<^sub>m" for x proof - have "x \ T \ 2^ x \ real X * 2^16 / b" using b_min by (simp add: field_simps T_def) also have "... \ log 2 (2^x) \ log 2 (real X * 2^16 / b)" using X_ge_1 b_min by (intro log_le_cancel_iff[symmetric] divide_pos_pos) auto also have "... \ x \ log 2 (real X * 2^16) - log 2 b" using X_ge_1 b_min by (subst log_divide) auto also have "... \ x \ log 2 (real X) + log 2 (2 powr 16) - b_exp" unfolding b_def using X_ge_1 by (subst log_mult) auto also have "... \ x \ \log 2 (real X) + log 2 (2 powr 16) - b_exp\" by linarith also have "... \ x \ \log 2 (real X) + 16 - real_of_int (int b_exp)\" by (subst log_powr_cancel) auto also have "... \ x \ t\<^sub>m" unfolding t\<^sub>m_def by linarith finally show ?thesis by simp qed hence T_eq: "T = {x. int x \ t\<^sub>m}" by auto have "T = {x. int x < t\<^sub>m+1}" unfolding T_eq by simp also have "... = {x. x < nat (t\<^sub>m + 1)}" unfolding zless_nat_eq_int_zless by simp finally have T_eq_2: "T = {x. x < nat (t\<^sub>m + 1)}" by simp have inj_1: "inj_on ((-) (nat t\<^sub>m)) T" unfolding T_eq by (intro inj_onI) simp have fin_T: "finite T" unfolding T_eq_2 by simp have r_exp: "(\\. real (r t \) \\\<^sub>1) = real X / 2^t" if "t \ T" for t proof - have "t \ t\<^sub>m" using that unfolding T_eq by simp also have "... \ \log 2 (real X)\ - 10" using t_m_bound by simp also have "... \ \log 2 (real X)\" by simp also have "... \ \log 2 (real n)\" using X_le_n X_ge_1 by (intro floor_mono log_mono) auto also have "... \ \log 2 (real n)\" by simp finally have "t \ \log 2 (real n)\" by simp hence "t \ max (nat \log 2 (real n)\) 1"by simp thus ?thesis unfolding r_exp by simp qed have r_var: "measure_pmf.variance \\<^sub>1 (\\. real (r t \)) \ real X / 2^t" if "t \ T" for t using r_exp[OF that] r_var by metis have "9 = C\<^sub>4 / \\<^sup>2 * \^2/2^23" using \_gt_0 by (simp add:C\<^sub>4_def) also have "... = 2 powr (log 2 (C\<^sub>4 / \\<^sup>2)) * \^2/2^23" using \_gt_0 C\<^sub>4_def by (subst powr_log_cancel) auto also have "... \ 2 powr b_exp * \^2/2^23" unfolding b_exp_def by (intro divide_right_mono mult_right_mono powr_mono, linarith) auto also have "... = b * \^2/2^23" using powr_realpow unfolding b_def by simp also have "... = (b/2^16) * (\^2/2^7)" by simp also have "... \ (X * 2 powr (-t\<^sub>m)) * (\^2/2^7)" by (intro mult_mono c) auto also have "... = X * (2 powr (-t\<^sub>m) * 2 powr (-7)) * \^2" using powr_realpow by simp also have "... = 2 powr (-t\<^sub>m-7) * (\^2 * X)" by (subst powr_add[symmetric]) (simp ) finally have "9 \ 2 powr (-t\<^sub>m-7) * (\^2 * X)" by simp hence b: "9/ (\^2 * X) \ 2 powr (-t\<^sub>m -7)" using \_gt_0 X_ge_1 by (subst pos_divide_le_eq) auto have a: "measure \\<^sub>1 {f.\real (r t f)-real X/2^t\> \/3 *real X/2^t} \ 2 powr (real t-t\<^sub>m-7)" (is"?L1 \ ?R1") if "t \ T" for t proof - have "?L1 \ \

(f in \\<^sub>1. \real (r t f) - real X / 2^t\ \ \/3 * real X / 2^t)" by (intro pmf_mono) auto also have "... = \

(f in \\<^sub>1. \real (r t f)-(\\. real (r t \) \ \\<^sub>1)\ \ \/3 * real X/2^t)" by (simp add: r_exp[OF that]) also have "... \ measure_pmf.variance \\<^sub>1 (\\. real (r t \)) / (\/3 * real X / 2^t)^2" using X_ge_1 \_gt_0 \\<^sub>1.sample_space by (intro measure_pmf.Chebyshev_inequality divide_pos_pos mult_pos_pos) auto also have "... \ (X / 2^t) / (\/3 * X / 2^t)^2" by (intro divide_right_mono r_var[OF that]) simp also have "... = 2^t*(9/ ( \^2 * X))" by (simp add:power2_eq_square algebra_simps) also have "... \ 2^t*(2 powr (-t\<^sub>m-7))" by (intro mult_left_mono b) simp also have "... = 2 powr t * 2 powr (-t\<^sub>m-7)" by (subst powr_realpow[symmetric]) auto also have "... = ?R1" by (subst powr_add[symmetric]) (simp add:algebra_simps) finally show "?L1 \ ?R1" by simp qed have "\ym + 1). x = nat t\<^sub>m - y" if "x < nat (t\<^sub>m+1)" for x using that by (intro exI[where x="nat t\<^sub>m - x"]) simp hence T_reindex: "(-) (nat t\<^sub>m) ` {x. x < nat (t\<^sub>m + 1)} = {..m + 1)}" by (auto simp add: set_eq_iff image_iff) have "?L \ measure \ {\. (\t \ T. \real (r t (fst \))-real X/2^t\ > \/3 * real X / 2^t)}" proof (rule pmf_mono) fix \ assume "\ \ set_pmf (sample_pmf \)" obtain f g h where \_def: "\ = (f,g,h)" by (metis prod_cases3) assume "\ \ {\. E\<^sub>1 \ \ \ E\<^sub>2 \}" hence a:"2 powr ( -real_of_int (t f)) * real X \ {real b/2^16..real b/2}" and b:"\card (R f) - real X / 2^(s f)\ > \/3 * X / 2^(s f)" unfolding E\<^sub>1_def E\<^sub>2_def by (auto simp add:\_def) have "\card (R f) - X / 2^(s f)\ = 0" if "s f= 0" using that by (simp add:R_def X_def) moreover have "( \/3) * (X / 2^s f) \ 0" using \_gt_0 X_ge_1 by (intro mult_nonneg_nonneg) auto ultimately have "False" if "s f = 0" using b that by simp hence "s f > 0" by auto hence "t f = s f" unfolding s_def by simp hence "2 powr (-real (s f)) * X \ b / 2^16" using a by simp hence "X / 2 powr (real (s f)) \ b / 2^16" by (simp add: divide_powr_uminus mult.commute) hence "real X / 2 ^ (s f) \ b / 2^16" by (subst (asm) powr_realpow, auto) hence "s f \ T" unfolding T_def by simp moreover have "\r (s f) f - X / 2^s f\ > \/3 * X / 2^s f" using R_def r_def b by simp ultimately have "\t \ T. \r t (fst \) - X / 2^t\ > \/3 * X / 2^t" using \_def by (intro bexI[where x="s f"]) simp thus "\ \ {\. (\t \ T. \r t (fst \) - X / 2^t\ > \/3 * X / 2^t)}" by simp qed also have "... = measure \\<^sub>1 {f. (\t \ T. \real (r t f)-real X / 2^t\ > \/3 * real X/2^t)}" unfolding sample_pmf_\ by (intro pair_pmf_prob_left) also have "... = measure \\<^sub>1 (\t \ T. {f. \real (r t f)-real X / 2^t\ > \/3 * real X/2^t})" by (intro measure_pmf_cong) auto also have "... \ (\t \ T. measure \\<^sub>1 {f.\real (r t f)-real X / 2^t\ > \/3 * real X/2^t})" by (intro measure_UNION_le fin_T) (simp) also have "... \ (\t \ T. 2 powr (real t - of_int t\<^sub>m - 7))" by (intro sum_mono a) also have "... = (\t \ T. 2 powr (-int (nat t\<^sub>m-t) - 7))" unfolding T_eq by (intro sum.cong refl arg_cong2[where f="(powr)"]) simp also have "... = (\x \ (\x. nat t\<^sub>m - x) ` T. 2 powr (-real x - 7))" by (subst sum.reindex[OF inj_1]) simp also have "... = (\x \ (\x. nat t\<^sub>m - x) ` T. 2 powr (-7) * 2 powr (-real x))" by (subst powr_add[symmetric]) (simp add:algebra_simps) also have "... = 1/2^7 * (\x \ (\x. nat t\<^sub>m - x) ` T. 2 powr (-real x))" by (subst sum_distrib_left) simp also have "... = 1/2^7 * (\x m+1). 2 powr (-real x))" unfolding T_eq_2 T_reindex by (intro arg_cong2[where f="(*)"] sum.cong) auto also have "... = 1/2^7 * (\x m+1). (2 powr (-1)) powr (real x))" by (subst powr_powr) simp also have "... = 1/2^7 * (\x m+1). (1/2)^x)" using powr_realpow by simp also have "... \ 1/2^7 * 2" by(subst geometric_sum) auto also have "... = 1/2^6" by simp finally show ?thesis by simp qed definition E\<^sub>3 where "E\<^sub>3 = (\(f,g,h). inj_on g (R f))" lemma R_bound: fixes f g h assumes "E\<^sub>1 (f,g,h)" assumes "E\<^sub>2 (f,g,h)" shows "card (R f) \ 2/3 * b" proof - have "real (card (R f)) \ ( \ / 3) * (real X / 2 ^ s f) + real X / 2 ^ s f" using assms(2) unfolding E\<^sub>2_def by simp also have "... \ (1/3) * (real X / 2 ^ s f) + real X / 2 ^ s f" using \_lt_1 by (intro add_mono mult_right_mono) auto also have "... = (4/3) * (real X / 2 powr s f)" using powr_realpow by simp also have "... \ (4/3) * (real X / 2 powr t f)" unfolding s_def by (intro mult_left_mono divide_left_mono powr_mono) auto also have "... = (4/3) * (2 powr (-(of_int (t f))) * real X)" by (subst powr_minus_divide) simp also have "... = (4/3) * (2 powr (- t f) * real X)" by simp also have "... \ (4/3) * (b/2)" using assms(1) unfolding E\<^sub>1_def by (intro mult_left_mono) auto also have "... \ (2/3) * b" by simp finally show ?thesis by simp qed lemma e_3: "measure \ {\. E\<^sub>1 \ \ E\<^sub>2 \ \ \E\<^sub>3 \} \ 1/2^6" (is "?L \ ?R") proof - let ?\ = "(\(z,x,y) f. z < C\<^sub>7*b^2 \ x \ R f \ y \ R f \ x < y)" let ?\ = "(\(z,x,y) g. g x = z \ g y = z)" have \_prob: "measure \\<^sub>2 {g. ?\ \ g} \ (1/real (C\<^sub>7*b^2)^2)" if "?\ \ f" for \ f proof - obtain x y z where \_def: "\ = (z,x,y)" by (metis prod_cases3) have a:"prob_space.k_wise_indep_vars \\<^sub>2 2 (\i. discrete) (\x \. \ x = z) {..\<^sub>2.indep]) (simp_all add:prob_space_measure_pmf) have "u \ R f \ u < n" for u unfolding R_def using A_range by auto hence b: "x < n" "y < n" "card {x, y} = 2" using that \_def by auto have c: "z < C\<^sub>7*b\<^sup>2" using \_def that by simp have "measure \\<^sub>2 {g. ?\ \ g} = measure \\<^sub>2 {g. (\\ \ {x,y}. g \ = z)}" by (simp add:\_def) also have "... = (\\ \ {x,y}. measure \\<^sub>2 {g. g \ = z})" using b by (intro measure_pmf.split_indep_events[OF refl, where I="{x,y}"] prob_space.k_wise_indep_vars_subset[OF _ a]) (simp_all add:prob_space_measure_pmf) also have "... = (\\ \ {x,y}. measure (map_pmf (\\. \ \) (sample_pmf \\<^sub>2)) {g. g = z}) " by (simp add:vimage_def) also have "... = (\\ \ {x,y}. measure [C\<^sub>7 * b\<^sup>2]\<^sub>S {g. g=z})" using b \\<^sub>2.single by (intro prod.cong) fastforce+ also have "... = (\\ \ {x,y}. measure (pmf_of_set {..7 * b\<^sup>2}) {z})" by (subst nat_sample_pmf) simp also have "... = (measure (pmf_of_set {..7 * b\<^sup>2}) {z})^2" using b by simp also have "... \ (1 /(C\<^sub>7*b\<^sup>2))^2" using c by (subst measure_pmf_of_set) auto also have "... = (1 /(C\<^sub>7*b\<^sup>2)^2)" by (simp add:algebra_simps power2_eq_square) finally show ?thesis by simp qed have \_card: "card {\. ?\ \ f} \ (C\<^sub>7*b^2) * (card (R f) * (card (R f)-1)/2)" (is "?TL \ ?TR") and fin_\: "finite {\. ?\ \ f}" (is "?T2") for f proof - have t1: "{\. ?\ \ f} \ {..7*b^2} \ {(x,y) \ R f \ R f. x < y}" by (intro subsetI) auto moreover have "card ({..7*b^2} \ {(x,y) \ R f \ R f. x < y}) = ?TR" using card_ordered_pairs'[where M="R f"] by (simp add: card_cartesian_product) moreover have "finite (R f)" unfolding R_def using fin_A finite_subset by simp hence "finite {(x, y). (x, y) \ R f \ R f \ x < y}" by (intro finite_subset[where B="R f \ R f", OF _ finite_cartesian_product]) auto hence t2: "finite ({..7*b^2} \ {(x,y) \ R f \ R f. x < y})" by (intro finite_cartesian_product) auto ultimately show "?TL \ ?TR" using card_mono of_nat_le_iff by (metis (no_types, lifting)) show ?T2 using finite_subset[OF t1 t2] by simp qed have "?L \ measure \ {(f,g,h). card (R f) \ b \ (\ x y z. ?\ (x,y,z) f \ ?\ (x,y,z) g)}" proof (rule pmf_mono) fix \ assume b:"\ \ set_pmf (sample_pmf \)" obtain f g h where \_def:"\ = (f,g,h)" by (metis prod_cases3) have "(f,g,h) \ sample_set \" using sample_space_alt[OF sample_space_\] b \_def by simp hence c:"g x < C\<^sub>7*b^2" for x using g_range by simp assume a:"\ \ {\. E\<^sub>1 \ \ E\<^sub>2 \ \ \ E\<^sub>3 \}" hence "card (R f) \ 2/3 * b" using R_bound \_def by force moreover have "\a b. a \ R f \ b \ R f \ a \ b \ g a = g b" using a unfolding \_def E\<^sub>3_def inj_on_def by auto hence "\x y. x \ R f \ y \ R f \ x < y \ g x = g y" by (metis not_less_iff_gr_or_eq) hence "\x y z. ?\ (x,y,z) f \ ?\ (x,y,z) g" using c by blast ultimately show "\ \ {(f, g, h). card (R f) \ b \ (\ x y z. ?\ (x,y,z) f \ ?\ (x,y,z) g)}" unfolding \_def by auto qed also have "... = (\f. measure (pair_pmf \\<^sub>2 \\<^sub>3) {g. card (R f) \ b \ (\x y z. ?\ (x,y,z) f \ ?\ (x,y,z) (fst g))} \\\<^sub>1)" unfolding sample_pmf_\ split_pair_pmf by (simp add: case_prod_beta) also have "... = (\f. measure \\<^sub>2 {g. card (R f) \ b \ (\x y z. ?\ (x,y,z) f \ ?\ (x,y,z) g)} \\\<^sub>1)" by (subst pair_pmf_prob_left) simp also have "... \ (\f. 1/real (2*C\<^sub>7) \\\<^sub>1)" proof (rule pmf_exp_mono[OF integrable_sample_pmf[OF \\<^sub>1.sample_space] integrable_sample_pmf[OF \\<^sub>1.sample_space]]) fix f assume "f \ set_pmf (sample_pmf \\<^sub>1)" show "measure \\<^sub>2 {g. card (R f) \ b \ (\x y z. ?\ (x,y,z) f \ ?\ (x,y,z) g)} \ 1 / real (2 * C\<^sub>7)" (is "?L1 \ ?R1") proof (cases "card (R f) \ b") case True have "?L1 \ measure \\<^sub>2 (\ \ \ {\. ?\ \ f}. {g. ?\ \ g})" by (intro pmf_mono) auto also have "... \ (\\ \ {\. ?\ \ f}. measure \\<^sub>2 {g. ?\ \ g})" by (intro measure_UNION_le fin_\) auto also have "... \ (\\ \ {\. ?\ \ f}. (1/real (C\<^sub>7*b^2)^2))" by (intro sum_mono \_prob) auto also have "... = card {\. ?\ \ f} /(C\<^sub>7*b^2)^2" by simp also have "... \ (C\<^sub>7*b^2) * (card (R f) * (card (R f)-1)/2) / (C\<^sub>7*b^2)^2" by (intro \_card divide_right_mono) simp also have "... \ (C\<^sub>7*b^2) * (b * b / 2) / (C\<^sub>7*b^2)^2" unfolding C\<^sub>7_def using True by (intro divide_right_mono Nat.of_nat_mono mult_mono) auto also have "... = 1/(2*C\<^sub>7)" using b_min by (simp add:algebra_simps power2_eq_square) finally show ?thesis by simp next case False then show ?thesis by simp qed qed also have "... \ 1/2^6" unfolding C\<^sub>7_def by simp finally show ?thesis by simp qed definition E\<^sub>4 where "E\<^sub>4 = (\(f,g,h). \p (f,g,h) - \ (card (R f))\ \ \/12 * card (R f))" lemma e_4_h: "9 / sqrt b \ \ / 12" proof - have "108 \ sqrt (C\<^sub>4)" unfolding C\<^sub>4_def by (approximation 5) also have "... \ sqrt( \^2 * real b)" using b_lower_bound \_gt_0 by (intro real_sqrt_le_mono) (simp add: pos_divide_le_eq algebra_simps) also have "... = \ * sqrt b" using \_gt_0 by (simp add:real_sqrt_mult) finally have "108 \ \ * sqrt b" by simp thus ?thesis using b_min by (simp add:pos_divide_le_eq) qed lemma e_4: "measure \ {\. E\<^sub>1 \ \ E\<^sub>2 \ \ E\<^sub>3 \ \ \E\<^sub>4 \} \ 1/2^6" (is "?L \ ?R") proof - have a: "measure \\<^sub>3 {h. E\<^sub>1 (f,g,h) \ E\<^sub>2 (f,g,h) \ E\<^sub>3 (f,g,h) \ \E\<^sub>4 (f,g,h)} \ 1/2^6" (is "?L1 \ ?R1") if "f \ set_pmf (sample_pmf \\<^sub>1)" "g \ set_pmf(sample_pmf \\<^sub>2)" for f g proof (cases "card (R f) \ b \ inj_on g (R f)") case True have g_inj: "inj_on g (R f)" using True by simp have fin_R: "finite (g ` R f)" unfolding R_def using fin_A by (intro finite_imageI) simp interpret B:balls_and_bins_abs "g ` R f" "{.. {..7 * b\<^sup>2}" using g_range_1 that(2) unfolding sample_space_alt[OF \\<^sub>2.sample_space] by auto hence g_ran: "g ` R f \ {..7 * b\<^sup>2}" by auto have "sample_pmf [b]\<^sub>S = pmf_of_set {..\. \ x) (sample_pmf (\ k (C\<^sub>7 * b\<^sup>2) [b]\<^sub>S)) = pmf_of_set {.. g ` R f" for x using g_ran \\<^sub>3.single that by auto moreover have "prob_space.k_wise_indep_vars \\<^sub>3 k (\_. discrete) (\x \. \ x) (g ` R f)" by (intro prob_space.k_wise_indep_subset[OF _ _ \\<^sub>3.indep] g_ran prob_space_measure_pmf) ultimately have lim_balls_and_bins: "B.lim_balls_and_bins k (sample_pmf (\ k (C\<^sub>7 * b\<^sup>2) [b]\<^sub>S))" unfolding B.lim_balls_and_bins_def by auto have card_g_R: "card (g ` R f) = card (R f)" using True card_image by auto hence b_mu: "\ (card (R f)) = B.\" unfolding B.\_def \_def using b_min by (simp add:powr_realpow) have card_g_le_b: "card (g ` R f) \ card {.. measure \\<^sub>3 {h. \B.Y h - B.\\ > 9 * real (card (g ` R f)) / sqrt (card {.. {h. E\<^sub>1 (f,g,h) \ E\<^sub>2 (f,g,h) \ E\<^sub>3 (f,g,h) \ \E\<^sub>4 (f,g,h)}" hence b: "\p (f,g,h) -\ (card (R f))\ > \/12 * card (R f)" unfolding E\<^sub>4_def by simp assume "h \ set_pmf (sample_pmf \\<^sub>3)" hence h_range: "h x < b" for x unfolding sample_space_alt[OF \\<^sub>3.sample_space,symmetric] using h_range_1 by simp have "{j \ {.. \\<^sub>1 (f, g, h) A 0 j} = {j \ {.. max (Max ({int (f a) |a. a \ A \ h (g a) = j} \ {-1})) (- 1)}" unfolding \\<^sub>1_def by simp also have "... = {j \ {.. Max ({int (f a) |a. a \ A \ h (g a) = j} \ {-1})}" using fin_A by (subst max_absorb1) (auto intro: Max_ge) also have "... = {j \ {..a \ R f. h (g a) = j)}" unfolding R_def using fin_A by (subst Max_ge_iff) auto also have "... = {j. \a \ R f. h (g a) = j}" using h_range by auto also have "... = (h \ g) ` (R f)" by (auto simp add:set_eq_iff image_iff) also have "... = h ` (g ` (R f))" by (simp add:image_image) finally have c:"{j \ {.. \\<^sub>1 (f, g, h) A 0 j} = h ` (g ` R f)" by simp have "9 * real (card (g ` R f)) / sqrt (card {.. \/12 * card (R f)" by (intro mult_right_mono e_4_h) simp also have "... < \B.Y h - B.\\" using b c unfolding B.Y_def p_def b_mu by simp finally show "h \ {h. \B.Y h - B.\\ > 9 * real (card (g ` R f)) / sqrt (card {.. 1/2^6" using k_min by (intro B.devitation_bound[OF card_g_le_b lim_balls_and_bins]) auto finally show ?thesis by simp next case False have "?L1 \ measure \\<^sub>3 {}" proof (rule pmf_mono) fix h assume b:"h \ {h. E\<^sub>1 (f, g, h) \ E\<^sub>2 (f, g, h) \ E\<^sub>3 (f, g, h) \ \ E\<^sub>4 (f, g, h)}" hence "card (R f) \ (2/3)*b" by (auto intro!: R_bound[simplified]) hence "card (R f) \ b" by simp moreover have "inj_on g (R f)" using b by (simp add:E\<^sub>3_def) ultimately have "False" using False by simp thus "h \ {}" by simp qed also have "... = 0" by simp finally show ?thesis by simp qed have "?L = (\f. (\g. measure \\<^sub>3 {h. E\<^sub>1 (f,g,h) \ E\<^sub>2 (f,g,h) \ E\<^sub>3 (f,g,h) \ \E\<^sub>4 (f,g,h)} \\\<^sub>2) \\\<^sub>1)" unfolding sample_pmf_\ split_pair_pmf by simp also have "... \ (\f. (\g. 1/2^6 \\\<^sub>2) \\\<^sub>1)" using a \\<^sub>1.sample_space \\<^sub>2.sample_space by (intro integral_mono_AE AE_pmfI) simp_all also have "... = 1/2^6" by simp finally show ?thesis by simp qed lemma \_inverse: "\_inv (\ x) = x" proof - have a:"1-1/b \ 0" using b_min by simp have "\ x = b * (1-(1-1/b) powr x)" unfolding \_def by simp hence "\ x / real b = 1-(1-1/b) powr x" by simp hence "ln (1 - \ x / real b) = ln ((1-1/b) powr x)" by simp also have "... = x * ln (1 - 1/ b)" using a by (intro ln_powr) finally have "ln (1 - \ x / real b) = x * ln (1- 1/ b)" by simp moreover have "ln (1-1/b) < 0" using b_min by (subst ln_less_zero_iff) auto ultimately show ?thesis using \_inv_def by simp qed lemma rho_mono: assumes "x \ y" shows "\ x \ \ y" proof- have "(1 - 1 / real b) powr y \ (1 - 1 / real b) powr x" using b_min by (intro powr_mono_rev assms) auto thus ?thesis unfolding \_def by (intro mult_left_mono) auto qed lemma rho_two_thirds: "\ (2/3 * b) \ 3/5 *b" proof - have "1/3 \ exp ( - 13 / 12::real )" by (approximation 8) also have "... \ exp ( - 1 - 2 / real b )" using b_min by (intro iffD2[OF exp_le_cancel_iff]) (simp add:algebra_simps) also have "... \ exp ( b * (-(1/real b)-2*(1/real b)^2))" using b_min by (simp add:algebra_simps power2_eq_square) also have "... \ exp ( b * ln (1-1/real b))" using b_min by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono ln_one_minus_pos_lower_bound) auto also have "... = exp ( ln ( (1-1/real b) powr b))" using b_min by (subst ln_powr) auto also have "... = (1-1/real b) powr b" using b_min by (subst exp_ln) auto finally have a:"1/3 \ (1-1/real b) powr b" by simp have "2/5 \ (1/3) powr (2/3::real)" by (approximation 5) also have "... \ ((1-1/real b) powr b) powr (2/3)" by (intro powr_mono2 a) auto also have "... = (1-1/real b) powr (2/3 * real b)" by (subst powr_powr) (simp add:algebra_simps) finally have "2/5 \ (1 - 1 / real b) powr (2 / 3 * real b)" by simp hence "1 - (1 - 1 / real b) powr (2 / 3 * real b) \ 3/5" by simp hence "\ (2/3 * b) \ b * (3/5)" unfolding \_def by (intro mult_left_mono) auto thus ?thesis by simp qed definition \_inv' :: "real \ real" where "\_inv' x = -1 / (real b * (1-x / real b) * ln (1 - 1 / real b))" lemma \_inv'_bound: assumes "x \ 0" assumes "x \ 59/90*b" shows "\\_inv' x\ \ 4" proof - have c:"ln (1 - 1 / real b) < 0" using b_min by (subst ln_less_zero_iff) auto hence d:"real b * (1 - x / real b) * ln (1 - 1 / real b) < 0" using b_min assms by (intro Rings.mult_pos_neg) auto have "(1::real) \ 31/30" by simp also have "... \ (31/30) * (b * -(- 1 / real b))" using b_min by simp also have "... \ (31/30) * (b * -ln (1 + (- 1 / real b)))" using b_min by (intro mult_left_mono le_imp_neg_le ln_add_one_self_le_self2) auto also have "... \ 3 * (31/90) * (- b * ln (1 - 1 / real b))" by simp also have "... \ 3 * (1 - x / real b) * (- b * ln (1 - 1 / real b))" using assms b_min pos_divide_le_eq[where c="b"] c by (intro mult_right_mono mult_left_mono mult_nonpos_nonpos) auto also have "... \ 3 * (real b * (1 - x / real b) * (-ln (1 - 1 / real b)))" by (simp add:algebra_simps) finally have "3 * (real b * (1 - x / real b) * (-ln (1 - 1 / real b))) \ 1" by simp hence "3 * (real b * (1 - x / real b) * ln (1 - 1 / real b)) \ -1" by simp hence "\_inv' x \ 3" unfolding \_inv'_def using d by (subst neg_divide_le_eq) auto moreover have "\_inv' x > 0" unfolding \_inv'_def using d by (intro divide_neg_neg) auto ultimately show ?thesis by simp qed lemma \_inv': fixes x :: real assumes "x < b" shows "DERIV \_inv x :> \_inv' x" proof - have "DERIV (ln \ (\x. (1 - x / real b))) x :> 1 / (1-x / real b) * (0 -1/b)" using assms b_min by (intro DERIV_chain DERIV_ln_divide DERIV_cdivide derivative_intros) auto hence "DERIV \_inv x :> (1 / (1-x / real b) * (-1/b)) / ln (1-1/real b)" unfolding comp_def \_inv_def by (intro DERIV_cdivide) auto thus ?thesis by (simp add:\_inv'_def algebra_simps) qed lemma accuracy_without_cutoff: "measure \ {(f,g,h). \Y (f,g,h) - real X\ > \ * X \ s f < q_max} \ 1/2^4" (is "?L \ ?R") proof - have "?L \ measure \ {\. \E\<^sub>1 \ \ \E\<^sub>2 \ \ \E\<^sub>3 \ \ \E\<^sub>4 \}" proof (rule pmf_rev_mono) fix \ assume "\ \ set_pmf (sample_pmf \)" obtain f g h where \_def: "\ = (f,g,h)" by (metis prod_cases3) assume "\ \ {\. \ E\<^sub>1 \ \ \ E\<^sub>2 \ \ \ E\<^sub>3 \ \ \ E\<^sub>4 \}" hence assms: "E\<^sub>1 (f,g,h)" "E\<^sub>2 (f,g,h)" "E\<^sub>3 (f,g,h)" "E\<^sub>4 (f,g,h)" unfolding \_def by auto define I :: "real set" where "I = {0..59/90*b}" have "p (f,g,h) \ \ (card (R f)) + \/12 * card (R f)" using assms(4) E\<^sub>4_def unfolding abs_le_iff by simp also have "... \ \(2/3*b) + 1/12* (2/3*b)" using \_lt_1 R_bound[OF assms(1,2)] by (intro add_mono rho_mono mult_mono) auto also have "... \ 3/5 * b + 1/18*b" by (intro add_mono rho_two_thirds) auto also have "... \ 59/90 * b" by simp finally have "p (f,g,h) \ 59/90 * b" by simp hence p_in_I: "p (f,g,h) \ I" unfolding I_def by simp have "\ (card (R f)) \ \(2/3 * b)" using R_bound[OF assms(1,2)] by (intro rho_mono) auto also have "... \ 3/5 * b" using rho_two_thirds by simp also have "... \ b * 59/90" by simp finally have "\ (card (R f)) \ b * 59/90" by simp moreover have "(1 - 1 / real b) powr (real (card (R f))) \ 1 powr (real (card (R f)))" using b_min by (intro powr_mono2) auto hence "\ (card (R f)) \ 0" unfolding \_def by (intro mult_nonneg_nonneg) auto ultimately have "\ (card (R f)) \ I" unfolding I_def by simp moreover have "interval I" unfolding I_def interval_def by simp moreover have "59 / 90 * b < b" using b_min by simp hence "DERIV \_inv x :> \_inv' x" if "x \ I" for x using that I_def by (intro \_inv') simp ultimately obtain \ :: real where \_def: "\ \ I" "\_inv (p(f,g,h)) - \_inv (\ (card (R f))) = (p (f,g,h) - \(card (R f))) * \_inv' \" using p_in_I MVT_interval by blast have "\\_inv(p (f,g,h)) - card (R f)\ = \\_inv(p (f,g,h)) - \_inv(\(card (R f)))\" by (subst \_inverse) simp also have "... = \(p (f,g,h) - \ (card (R f)))\ * \\_inv' \ \" using \_def(2) abs_mult by simp also have "... \ \p (f,g,h) - \ (card (R f))\ * 4" using \_def(1) I_def by (intro mult_left_mono \_inv'_bound) auto also have "... \ ( \/12 * card (R f)) * 4" using assms(4) E\<^sub>4_def by (intro mult_right_mono) auto also have "... = \/3 * card (R f)" by simp finally have b: "\\_inv(p (f,g,h)) - card (R f)\ \ \/3 * card (R f)" by simp have "\\_inv(p (f,g,h)) - X / 2 ^ (s f)\ \ \\_inv(p (f,g,h)) - card (R f)\ + \card (R f) - X / 2 ^ (s f)\" by simp also have "... \ \/3 * card (R f) + \card (R f) - X / 2 ^ (s f)\" by (intro add_mono b) auto also have "... = \/3 * \X / 2 ^ (s f) + (card (R f) - X / 2 ^ (s f))\ + \card (R f) - X / 2 ^ (s f)\" by simp also have "... \ \/3 * (\X / 2 ^ (s f)\ + \card (R f) - X / 2 ^ (s f)\) + \card (R f) - X / 2 ^ (s f)\" using \_gt_0 by (intro mult_left_mono add_mono abs_triangle_ineq) auto also have "... \ \/3 * \X / 2 ^ (s f)\ + (1+ \/3) * \card (R f) - X / 2 ^ (s f)\" using \_gt_0 \_lt_1 by (simp add:algebra_simps) also have "... \ \/3 * \X / 2 ^ s f\ + (4/3) * ( \ / 3 * real X / 2 ^ s f)" using assms(2) \_gt_0 \_lt_1 unfolding E\<^sub>2_def by (intro add_mono mult_mono) auto also have "... = (7/9) * \ * real X / 2^s f" using X_ge_1 by (subst abs_of_nonneg) auto also have "... \ 1 * \ * real X / 2^s f" using \_gt_0 by (intro mult_mono divide_right_mono) auto also have "... = \ * real X / 2^s f" by simp finally have a:"\\_inv(p (f,g,h)) - X / 2 ^ (s f)\ \ \ * X / 2 ^ (s f)" by simp have "\Y (f, g, h) - real X\ = \2 ^ (s f)\ * \\_inv(p (f,g,h)) - real X / 2 ^ (s f)\" unfolding Y_def by (subst abs_mult[symmetric]) (simp add:algebra_simps powr_add[symmetric]) also have "... \ 2 ^ (s f) * (\ * X / 2 ^ (s f))" by (intro mult_mono a) auto also have "... = \ * X" by (simp add:algebra_simps powr_add[symmetric]) finally have "\Y (f, g, h) - real X\ \ \ * X" by simp moreover have "2 powr (\log 2 (real X)\ - t f) \ 2 powr b_exp" (is "?L1 \ ?R1") proof - have "?L1 \ 2 powr (1 + log 2 (real X)- t f)" by (intro powr_mono, linarith) auto also have "... = 2 powr 1 * 2 powr (log 2 (real X)) * 2 powr (- t f)" unfolding powr_add[symmetric] by simp also have "... = 2 * (2 powr (-t f) * X)" using X_ge_1 by simp also have "... \ 2 * (b/2)" using assms(1) unfolding E\<^sub>1_def by (intro mult_left_mono) auto also have "... = b" by simp also have "... = ?R1" unfolding b_def by (simp add: powr_realpow) finally show ?thesis by simp qed hence "\log 2 (real X)\ - t f \ real b_exp" unfolding not_less[symmetric] using powr_less_mono[where x="2"] by simp hence "s f \ q_max" unfolding s_def q_max_def by (intro nat_mono) auto ultimately show "\ \ {(f, g, h). \ * X < \Y (f, g, h) - real X\ \ s f < q_max}" unfolding \_def by auto qed also have "... \ measure \ {\. \E\<^sub>1 \ \ \E\<^sub>2 \ \ \E\<^sub>3 \} + measure \ {\. E\<^sub>1 \ \ E\<^sub>2 \ \ E\<^sub>3 \ \ \E\<^sub>4 \}" by (intro pmf_add) auto also have "... \ (measure \ {\. \E\<^sub>1 \ \ \E\<^sub>2 \} + measure \ {\. E\<^sub>1 \ \ E\<^sub>2 \ \ \E\<^sub>3 \}) + 1/2^6" by (intro add_mono e_4 pmf_add) auto also have "... \ ((measure \ {\. \E\<^sub>1 \} + measure \ {\. E\<^sub>1 \ \ \E\<^sub>2 \}) + 1/2^6) + 1/2^6" by (intro add_mono e_3 pmf_add) auto also have "... \ ((1/2^6 + 1/2^6) + 1/2^6) + 1/2^6" by (intro add_mono e_2 e_1) auto also have "... = ?R" by simp finally show ?thesis by simp qed end end diff --git a/thys/Frequency_Moments/Frequency_Moment_0.thy b/thys/Frequency_Moments/Frequency_Moment_0.thy --- a/thys/Frequency_Moments/Frequency_Moment_0.thy +++ b/thys/Frequency_Moments/Frequency_Moment_0.thy @@ -1,1314 +1,1314 @@ section \Frequency Moment $0$\label{sec:f0}\ theory Frequency_Moment_0 imports Frequency_Moments_Preliminary_Results Median_Method.Median K_Smallest Universal_Hash_Families.Carter_Wegman_Hash_Family Frequency_Moments Landau_Ext Product_PMF_Ext Universal_Hash_Families.Universal_Hash_Families_More_Finite_Fields begin text \This section contains a formalization of a new algorithm for the zero-th frequency moment inspired by ideas described in \<^cite>\"baryossef2002"\. It is a KMV-type ($k$-minimum value) algorithm with a rounding method and matches the space complexity of the best algorithm described in \<^cite>\"baryossef2002"\. In addition to the Isabelle proof here, there is also an informal hand-written proof in Appendix~\ref{sec:f0_proof}.\ type_synonym f0_state = "nat \ nat \ nat \ nat \ (nat \ nat list) \ (nat \ float set)" definition hash where "hash p = ring.hash (mod_ring p)" fun f0_init :: "rat \ rat \ nat \ f0_state pmf" where "f0_init \ \ n = do { let s = nat \-18 * ln (real_of_rat \)\; let t = nat \80 / (real_of_rat \)\<^sup>2\; let p = prime_above (max n 19); let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23); h \ prod_pmf {.._. pmf_of_set (bounded_degree_polynomials (mod_ring p) 2)); return_pmf (s, t, p, r, h, (\_ \ {0.. f0_state \ f0_state pmf" where "f0_update x (s, t, p, r, h, sketch) = return_pmf (s, t, p, r, h, \i \ {.. rat pmf" where "f0_result (s, t, p, r, h, sketch) = return_pmf (median s (\i \ {.. rat \ rat) \ real" where "f0_space_usage (n, \, \) = ( let s = nat \-18 * ln (real_of_rat \)\ in let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23) in let t = nat \80 / (real_of_rat \)\<^sup>2 \ in 6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (real n + 21) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) + real t * (13 + 4 * r + 2 * log 2 (log 2 (real n + 13)))))" definition encode_f0_state :: "f0_state \ bool list option" where "encode_f0_state = N\<^sub>e \\<^sub>e (\s. N\<^sub>e \\<^sub>e ( N\<^sub>e \\<^sub>e (\p. N\<^sub>e \\<^sub>e ( ([0..\<^sub>e (P\<^sub>e p 2)) \\<^sub>e ([0..\<^sub>e (S\<^sub>e F\<^sub>e))))))" lemma "inj_on encode_f0_state (dom encode_f0_state)" proof - have "is_encoding encode_f0_state" unfolding encode_f0_state_def by (intro dependent_encoding exp_golomb_encoding poly_encoding fun_encoding set_encoding float_encoding) thus ?thesis by (rule encoding_imp_inj) qed context fixes \ \ :: rat fixes n :: nat fixes as :: "nat list" fixes result assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ \ {0<..<1}" assumes as_range: "set as \ {.. fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" begin private definition t where "t = nat \80 / (real_of_rat \)\<^sup>2\" private lemma t_gt_0: "t > 0" using \_range by (simp add:t_def) private definition s where "s = nat \-(18 * ln (real_of_rat \))\" private lemma s_gt_0: "s > 0" using \_range by (simp add:s_def) private definition p where "p = prime_above (max n 19)" private lemma p_prime:"Factorial_Ring.prime p" using p_def prime_above_prime by presburger private lemma p_ge_18: "p \ 18" proof - have "p \ 19" by (metis p_def prime_above_lower_bound max.bounded_iff) thus ?thesis by simp qed private lemma p_gt_0: "p > 0" using p_ge_18 by simp private lemma p_gt_1: "p > 1" using p_ge_18 by simp private lemma n_le_p: "n \ p" proof - have "n \ max n 19" by simp also have "... \ p" unfolding p_def by (rule prime_above_lower_bound) finally show ?thesis by simp qed private lemma p_le_n: "p \ 2*n + 40" proof - have "p \ 2 * (max n 19) + 2" by (subst p_def, rule prime_above_upper_bound) also have "... \ 2 * n + 40" by (cases "n \ 19", auto) finally show ?thesis by simp qed private lemma as_lt_p: "\x. x \ set as \ x < p" using as_range atLeastLessThan_iff by (intro order_less_le_trans[OF _ n_le_p]) blast private lemma as_subset_p: "set as \ {..log 2 (1 / real_of_rat \)\ + 23)" private lemma r_bound: "4 * log 2 (1 / real_of_rat \) + 23 \ r" proof - have "0 \ log 2 (1 / real_of_rat \)" using \_range by simp hence "0 \ \log 2 (1 / real_of_rat \)\" by simp hence "0 \ 4 * \log 2 (1 / real_of_rat \)\ + 23" by (intro add_nonneg_nonneg mult_nonneg_nonneg, auto) thus ?thesis by (simp add:r_def) qed private lemma r_ge_23: "r \ 23" proof - have "(23::real) = 0 + 23" by simp also have "... \ 4 * log 2 (1 / real_of_rat \) + 23" using \_range by (intro add_mono mult_nonneg_nonneg, auto) also have "... \ r" using r_bound by simp finally show "23 \ r" by simp qed private lemma two_pow_r_le_1: "0 < 1 - 2 powr - real r" proof - have a: "2 powr (0::real) = 1" by simp show ?thesis using r_ge_23 by (simp, subst a[symmetric], intro powr_less_mono, auto) qed interpretation carter_wegman_hash_family "mod_ring p" 2 rewrites "ring.hash (mod_ring p) = Frequency_Moment_0.hash p" using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] using hash_def p_prime by auto private definition tr_hash where "tr_hash x \ = truncate_down r (hash x \)" private definition sketch_rv where "sketch_rv \ = least t ((\x. float_of (tr_hash x \)) ` set as)" private definition estimate where "estimate S = (if card S < t then of_nat (card S) else of_nat t * of_nat p / rat_of_float (Max S))" private definition sketch_rv' where "sketch_rv' \ = least t ((\x. tr_hash x \) ` set as)" private definition estimate' where "estimate' S = (if card S < t then real (card S) else real t * real p / Max S)" private definition \\<^sub>0 where "\\<^sub>0 = prod_pmf {.._. pmf_of_set space)" private lemma f0_alg_sketch: defines "sketch \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "sketch = map_pmf (\x. (s,t,p,r, x, \i \ {..\<^sub>0" unfolding sketch_rv_def proof (subst sketch_def, induction as rule:rev_induct) case Nil then show ?case by (simp add:s_def p_def[symmetric] map_pmf_def t_def r_def Let_def least_def restrict_def space_def \\<^sub>0_def) next case (snoc x xs) let ?sketch = "\\ xs. least t ((\a. float_of (tr_hash a \)) ` set xs)" have "fold (\a state. state \ f0_update a) (xs @ [x]) (f0_init \ \ n) = (map_pmf (\\. (s, t, p, r, \, \i \ {.. i) xs)) \\<^sub>0) \ f0_update x" by (simp add: restrict_def snoc del:f0_init.simps) also have "... = \\<^sub>0 \ (\\. f0_update x (s, t, p, r, \, \i\{.. i) xs)) " by (simp add:map_pmf_def bind_assoc_pmf bind_return_pmf del:f0_update.simps) also have "... = map_pmf (\\. (s, t, p, r, \, \i\{.. i) (xs@[x]))) \\<^sub>0" by (simp add:least_insert map_pmf_def tr_hash_def cong:restrict_cong) finally show ?case by blast qed private lemma card_nat_in_ball: fixes x :: nat fixes q :: real assumes "q \ 0" defines "A \ {k. abs (real x - real k) \ q \ k \ x}" shows "real (card A) \ 2 * q" and "finite A" proof - have a: "of_nat x \ {\real x-q\..\real x+q\}" using assms by (simp add: ceiling_le_iff) have "card A = card (int ` A)" by (rule card_image[symmetric], simp) also have "... \ card ({\real x-q\..\real x+q\} - {of_nat x})" by (intro card_mono image_subsetI, simp_all add:A_def abs_le_iff, linarith) also have "... = card {\real x-q\..\real x+q\} - 1" by (rule card_Diff_singleton, rule a) also have "... = int (card {\real x-q\..\real x+q\}) - int 1" by (intro of_nat_diff) (metis a card_0_eq empty_iff finite_atLeastAtMost_int less_one linorder_not_le) also have "... \ \q+real x\+1 -\real x-q\ - 1" using assms by (simp, linarith) also have "... \ 2*q" by linarith finally show "card A \ 2 * q" by simp have "A \ {..x + nat \q\}" by (rule subsetI, simp add:A_def abs_le_iff, linarith) thus "finite A" by (rule finite_subset, simp) qed private lemma prob_degree_lt_1: "prob {\. degree \ < 1} \ 1/real p" proof - have "space \ {\. length \ \ Suc 0} = bounded_degree_polynomials (mod_ring p) 1" by (auto simp:set_eq_iff bounded_degree_polynomials_def space_def) moreover have "field_size = p" by (simp add:mod_ring_def) hence "real (card (bounded_degree_polynomials (mod_ring p) (Suc 0))) / real (card space) = 1 / real p" by (simp add:space_def bounded_degree_polynomials_card power2_eq_square) ultimately show ?thesis by (simp add:M_def measure_pmf_of_set) qed private lemma collision_prob: assumes "c \ 1" shows "prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr -(real r) / (real p)\<^sup>2 + 1/real p" (is "prob {\. ?l \} \ ?r1 + ?r2") proof - define \ :: real where "\ = 9/8" have rho_c_ge_0: "\ * c \ 0" unfolding \_def using assms by simp have c_ge_0: "c\0" using assms by simp have "degree \ \ 1 \ \ \ space \ degree \ = 1" for \ by (simp add:bounded_degree_polynomials_def space_def) (metis One_nat_def Suc_1 le_less_Suc_eq less_imp_diff_less list.size(3) pos2) hence a: "\\ x y. x < p \ y < p \ x \ y \ degree \ \ 1 \ \ \ space \ hash x \ \ hash y \" using inj_onD[OF inj_if_degree_1] mod_ring_carr by blast have b: "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" if b_assms: "x \ set as" "y \ set as" "x < y" for x y proof - have c: "real u \ \ * c \ \real u - real v\ \ \ * c * 2 powr (-real r)" if c_assms:"truncate_down r (real u) \ c" "truncate_down r (real u) = truncate_down r (real v)" for u v proof - have "9 * 2 powr - real r \ 9 * 2 powr (- real 23)" using r_ge_23 by (intro mult_left_mono powr_mono, auto) also have "... \ 1" by simp finally have "9 * 2 powr - real r \ 1" by simp hence "1 \ \ * (1 - 2 powr (- real r))" by (simp add:\_def) hence d: "(c*1) / (1 - 2 powr (-real r)) \ c * \" using assms two_pow_r_le_1 by (simp add: pos_divide_le_eq) have "\x. truncate_down r (real x) \ c \ real x * (1 - 2 powr - real r) \ c * 1" using truncate_down_pos[OF of_nat_0_le_iff] order_trans by (simp, blast) hence "\x. truncate_down r (real x) \ c \ real x \ c * \" using two_pow_r_le_1 by (intro order_trans[OF _ d], simp add: pos_le_divide_eq) hence e: "real u \ c * \" "real v \ c * \" using c_assms by auto have " \real u - real v\ \ (max \real u\ \real v\) * 2 powr (-real r)" using c_assms by (intro truncate_down_eq, simp) also have "... \ (c * \) * 2 powr (-real r)" using e by (intro mult_right_mono, auto) finally have "\real u - real v\ \ \ * c * 2 powr (-real r)" by (simp add:algebra_simps) thus ?thesis using e by (simp add:algebra_simps) qed have "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ prob (\ i \ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. {\. hash x \ = fst i \ hash y \ = snd i})" using a by (intro pmf_mono[OF M_def], simp add:tr_hash_def) (metis hash_range mod_ring_carr b_assms as_subset_p lessThan_iff nat_neq_iff subset_eq) also have "... \ (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. prob {\. hash x \ = fst i \ hash y \ = snd i})" by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="{0.. {0.. (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. prob {\. (\u \ {x,y}. hash u \ = (if u = x then (fst i) else (snd i)))})" by (intro sum_mono pmf_mono[OF M_def]) force also have "... \ (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. 1/(real p)\<^sup>2)" using assms as_subset_p b_assms by (intro sum_mono, subst hash_prob) (auto simp add: mod_ring_def power2_eq_square) also have "... = 1/(real p)\<^sup>2 * card {(u,v) \ {0.. {0.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}" by simp also have "... \ 1/(real p)\<^sup>2 * card {(u,v) \ {.. {.. v \ real u \ \ * c \ abs (real u - real v) \ \ * c * 2 powr (-real r)}" using c by (intro mult_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * card (\u' \ {u. u < p \ real u \ \ * c}. {(u::nat,v::nat). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {(u,v). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono card_UN_le, auto) also have "... = 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card ((\x. (u' ,x)) ` {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'}))" by (intro arg_cong2[where f="(*)"] arg_cong[where f="real"] sum.cong arg_cong[where f="card"]) (auto simp add:set_eq_iff) also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono sum_mono card_image_le, auto) also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'})" by (intro mult_left_mono sum_mono of_nat_mono card_mono card_nat_in_ball subsetI) auto also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. real (card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'}))" by simp also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. 2 * (\ * c * 2 powr (-real r)))" by (intro mult_left_mono sum_mono card_nat_in_ball(1), auto) also have "... = 1/(real p)\<^sup>2 * (real (card {u. u < p \ real u \ \ * c}) * (2 * (\ * c * 2 powr (-real r))))" by simp also have "... \ 1/(real p)\<^sup>2 * (real (card {u. u \ nat (\\ * c \)}) * (2 * (\ * c * 2 powr (-real r))))" using rho_c_ge_0 le_nat_floor by (intro mult_left_mono mult_right_mono of_nat_mono card_mono subsetI) auto also have "... \ 1/(real p)\<^sup>2 * ((1+\ * c) * (2 * (\ * c * 2 powr (-real r))))" using rho_c_ge_0 by (intro mult_left_mono mult_right_mono, auto) also have "... \ 1/(real p)\<^sup>2 * (((1+\) * c) * (2 * (\ * c * 2 powr (-real r))))" using assms by (intro mult_mono, auto simp add:distrib_left distrib_right \_def) also have "... = (\ * (2 + \ * 2)) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (simp add:ac_simps power2_eq_square) also have "... \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (intro divide_right_mono mult_right_mono) (auto simp add:\_def) finally show ?thesis by simp qed have "prob {\. ?l \ \ degree \ \ 1} \ prob (\ i \ {(x,y) \ (set as) \ (set as). x < y}. {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ tr_hash (fst i) \ = tr_hash (snd i) \})" by (rule pmf_mono[OF M_def], simp, metis linorder_neqE_nat) also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. prob {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ tr_hash (fst i) \ = tr_hash (snd i) \})" unfolding M_def by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="(set as) \ (set as)"]) auto also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2)" using b by (intro sum_mono, simp add:case_prod_beta) also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (2 * card {(x,y) \ (set as) \ (set as). x < y})" by simp also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (card (set as) * (card (set as) - 1))" by (subst card_ordered_pairs, auto) also have "... \ ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (real (card (set as)))\<^sup>2" by (intro mult_left_mono) (auto simp add:power2_eq_square mult_left_mono) also have "... = (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (simp add:algebra_simps) finally have f:"prob {\. ?l \ \ degree \ \ 1} \ ?r1" by simp have "prob {\. ?l \} \ prob {\. ?l \ \ degree \ \ 1} + prob {\. degree \ < 1}" by (rule pmf_add[OF M_def], auto) also have "... \ ?r1 + ?r2" by (intro add_mono f prob_degree_lt_1) finally show ?thesis by simp qed private lemma of_bool_square: "(of_bool x)\<^sup>2 = ((of_bool x)::real)" by (cases x, auto) private definition Q where "Q y \ = card {x \ set as. int (hash x \) < y}" private definition m where "m = card (set as)" private lemma assumes "a \ 0" assumes "a \ int p" shows exp_Q: "expectation (\\. real (Q a \)) = real m * (of_int a) / p" and var_Q: "variance (\\. real (Q a \)) \ real m * (of_int a) / p" proof - have exp_single: "expectation (\\. of_bool (int (hash x \) < a)) = real_of_int a /real p" if a:"x \ set as" for x proof - have x_le_p: "x < p" using a as_lt_p by simp have "expectation (\\. of_bool (int (hash x \) < a)) = expectation (indicat_real {\. int (Frequency_Moment_0.hash p x \) < a})" by (intro arg_cong2[where f="integral\<^sup>L"] ext, simp_all) also have "... = prob {\. hash x \ \ {k. int k < a}}" by (simp add:M_def) also have "... = card ({k. int k < a} \ {..\. of_bool (int (hash x \) < a)) = real_of_int a /real p" by simp qed have "expectation(\\. real (Q a \)) = expectation (\\. (\x \ set as. of_bool (int (hash x \) < a)))" by (simp add:Q_def Int_def) also have "... = (\x \ set as. expectation (\\. of_bool (int (hash x \) < a)))" by (rule Bochner_Integration.integral_sum, simp) also have "... = (\ x \ set as. a /real p)" by (rule sum.cong, simp, subst exp_single, simp, simp) also have "... = real m * real_of_int a / real p" by (simp add:m_def) finally show "expectation (\\. real (Q a \)) = real m * real_of_int a / p" by simp have indep: "J \ set as \ card J = 2 \ indep_vars (\_. borel) (\i x. of_bool (int (hash i x) < a)) J" for J using as_subset_p mod_ring_carr by (intro indep_vars_compose2[where Y="\i x. of_bool (int x < a)" and M'="\_. discrete"] k_wise_indep_vars_subset[OF k_wise_indep] finite_subset[OF _ finite_set]) auto have rv: "\x. x \ set as \ random_variable borel (\\. of_bool (int (hash x \) < a))" by (simp add:M_def) have "variance (\\. real (Q a \)) = variance (\\. (\x \ set as. of_bool (int (hash x \) < a)))" by (simp add:Q_def Int_def) also have "... = (\x \ set as. variance (\\. of_bool (int (hash x \) < a)))" - by (intro var_sum_pairwise_indep_2 indep rv) auto + by (intro bienaymes_identity_pairwise_indep_2 indep rv) auto also have "... \ (\ x \ set as. a / real p)" by (rule sum_mono, simp add: variance_eq of_bool_square, simp add: exp_single) also have "... = real m * real_of_int a /real p" by (simp add:m_def) finally show "variance (\\. real (Q a \)) \ real m * real_of_int a / p" by simp qed private lemma t_bound: "t \ 81 / (real_of_rat \)\<^sup>2" proof - have "t \ 80 / (real_of_rat \)\<^sup>2 + 1" using t_def t_gt_0 by linarith also have "... \ 80 / (real_of_rat \)\<^sup>2 + 1 / (real_of_rat \)\<^sup>2" using \_range by (intro add_mono, simp, simp add:power_le_one) also have "... = 81 / (real_of_rat \)\<^sup>2" by simp finally show ?thesis by simp qed private lemma t_r_bound: "18 * 40 * (real t)\<^sup>2 * 2 powr (-real r) \ 1" proof - have "720 * (real t)\<^sup>2 * 2 powr (-real r) \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * 2 powr (-4 * log 2 (1 / real_of_rat \) - 23)" using r_bound t_bound by (intro mult_left_mono mult_mono power_mono powr_mono, auto) also have "... \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * (2 powr (-4 * log 2 (1 / real_of_rat \)) * 2 powr (-23))" using \_range by (intro mult_left_mono mult_mono power_mono add_mono) (simp_all add:power_le_one powr_diff) also have "... = 720 * (81\<^sup>2 / (real_of_rat \)^4) * (2 powr (log 2 ((real_of_rat \)^4)) * 2 powr (-23))" using \_range by (intro arg_cong2[where f="(*)"]) (simp_all add:power2_eq_square power4_eq_xxxx log_divide log_powr[symmetric]) also have "... = 720 * 81\<^sup>2 * 2 powr (-23)" using \_range by simp also have "... \ 1" by simp finally show ?thesis by simp qed private lemma m_eq_F_0: "real m = of_rat (F 0 as)" by (simp add:m_def F_def) private lemma estimate'_bounds: "prob {\. of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ 1/3" proof (cases "card (set as) \ t") case True define \' where "\' = 3 * real_of_rat \ / 4" define u where "u = \real t * p / (m * (1+\'))\" define v where "v = \real t * p / (m * (1-\'))\" define has_no_collision where "has_no_collision = (\\. \x\ set as. \y \ set as. (tr_hash x \ = tr_hash y \ \ x = y) \ tr_hash x \ > v)" have "2 powr (-real r) \ 2 powr (-(4 * log 2 (1 / real_of_rat \) + 23))" using r_bound by (intro powr_mono, linarith, simp) also have "... = 2 powr (-4 * log 2 (1 /real_of_rat \) -23)" by (rule arg_cong2[where f="(powr)"], auto simp add:algebra_simps) also have "... \ 2 powr ( -1 * log 2 (1 /real_of_rat \) -4)" using \_range by (intro powr_mono diff_mono, auto) also have "... = 2 powr ( -1 * log 2 (1 /real_of_rat \)) / 16" by (simp add: powr_diff) also have "... = real_of_rat \ / 16" using \_range by (simp add:log_divide) also have "... < real_of_rat \ / 8" using \_range by (subst pos_divide_less_eq, auto) finally have r_le_\: "2 powr (-real r) < real_of_rat \ / 8" by simp have \'_gt_0: "\' > 0" using \_range by (simp add:\'_def) have "\' < 3/4" using \_range by (simp add:\'_def)+ also have "... < 1" by simp finally have \'_lt_1: "\' < 1" by simp have "t \ 81 / (real_of_rat \)\<^sup>2" using t_bound by simp also have "... = (81*9/16) / (\')\<^sup>2" by (simp add:\'_def power2_eq_square) also have "... \ 46 / \'\<^sup>2" by (intro divide_right_mono, simp, simp) finally have t_le_\': "t \ 46/ \'\<^sup>2" by simp have "80 \ (real_of_rat \)\<^sup>2 * (80 / (real_of_rat \)\<^sup>2)" using \_range by simp also have "... \ (real_of_rat \)\<^sup>2 * t" by (intro mult_left_mono, simp add:t_def of_nat_ceiling, simp) finally have "80 \ (real_of_rat \)\<^sup>2 * t" by simp hence t_ge_\': "45 \ t * \' * \'" by (simp add:\'_def power2_eq_square) have "m \ card {.. p" using n_le_p by simp finally have m_le_p: "m \ p" by simp hence t_le_m: "t \ card (set as)" using True by simp have m_ge_0: "real m > 0" using m_def True t_gt_0 by simp have "v \ real t * real p / (real m * (1 - \'))" by (simp add:v_def) also have "... \ real t * real p / (real m * (1/4))" using \'_lt_1 m_ge_0 \_range by (intro divide_left_mono mult_left_mono mult_nonneg_nonneg mult_pos_pos, simp_all add:\'_def) finally have v_ubound: "v \ 4 * real t * real p / real m" by (simp add:algebra_simps) have a_ge_1: "u \ 1" using \'_gt_0 p_gt_0 m_ge_0 t_gt_0 by (auto intro!:mult_pos_pos divide_pos_pos simp add:u_def) hence a_ge_0: "u \ 0" by simp have "real m * (1 - \') < real m" using \'_gt_0 m_ge_0 by simp also have "... \ 1 * real p" using m_le_p by simp also have "... \ real t * real p" using t_gt_0 by (intro mult_right_mono, auto) finally have " real m * (1 - \') < real t * real p" by simp hence v_gt_0: "v > 0" using mult_pos_pos m_ge_0 \'_lt_1 by (simp add:v_def) hence v_ge_1: "real_of_int v \ 1" by linarith have "real t \ real m" using True m_def by linarith also have "... < (1 + \') * real m" using \'_gt_0 m_ge_0 by force finally have a_le_p_aux: "real t < (1 + \') * real m" by simp have "u \ real t * real p / (real m * (1 + \'))+1" by (simp add:u_def) also have "... < real p + 1" using m_ge_0 \'_gt_0 a_le_p_aux a_le_p_aux p_gt_0 by (simp add: pos_divide_less_eq ac_simps) finally have "u \ real p" by (metis int_less_real_le not_less of_int_le_iff of_int_of_nat_eq) hence u_le_p: "u \ int p" by linarith have "prob {\. Q u \ \ t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q u \) - expectation (\\. real (Q u \))) \ 3 * sqrt (m * real_of_int u / p)}" proof (rule pmf_mono[OF M_def]) fix \ assume "\ \ {\. t \ Q u \}" hence t_le: "t \ Q u \" by simp have "real m * real_of_int u / real p \ real m * (real t * real p / (real m * (1 + \'))+1) / real p" using m_ge_0 p_gt_0 by (intro divide_right_mono mult_left_mono, simp_all add: u_def) also have "... = real m * real t * real p / (real m * (1+\') * real p) + real m / real p" by (simp add:distrib_left add_divide_distrib) also have "... = real t / (1+\') + real m / real p" using p_gt_0 m_ge_0 by simp also have "... \ real t / (1+\') + 1" using m_le_p p_gt_0 by (intro add_mono, auto) finally have "real m * real_of_int u / real p \ real t / (1 + \') + 1" by simp hence "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ 3 * sqrt (t / (1+\')+1)+(t/(1+\')+1)" by (intro add_mono mult_left_mono real_sqrt_le_mono, auto) also have "... \ 3 * sqrt (real t+1) + ((t * (1 - \' / (1+\'))) + 1)" using \'_gt_0 t_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono) (simp_all add: pos_divide_le_eq left_diff_distrib) also have "... = 3 * sqrt (real t+1) + (t - \' * t / (1+\')) + 1" by (simp add:algebra_simps) also have "... \ 3 * sqrt (46 / \'\<^sup>2 + 1 / \'\<^sup>2) + (t - \' * t/2) + 1 / \'" using \'_gt_0 t_gt_0 \'_lt_1 add_pos_pos t_le_\' by (intro add_mono mult_left_mono real_sqrt_le_mono add_mono) (simp_all add: power_le_one pos_le_divide_eq) also have "... \ (21 / \' + (t - 45 / (2*\'))) + 1 / \'" using \'_gt_0 t_ge_\' by (intro add_mono) (simp_all add:real_sqrt_divide divide_le_cancel real_le_lsqrt pos_divide_le_eq ac_simps) also have "... \ t" using \'_gt_0 by simp also have "... \ Q u \" using t_le by simp finally have "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ Q u \" by simp hence " 3 * sqrt (real m * real_of_int u / real p) \ \real (Q u \) - expectation (\\. real (Q u \))\" using a_ge_0 u_le_p True by (simp add:exp_Q abs_ge_iff) thus "\ \ {\ \ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int u / real p) \ \real (Q u \) - expectation (\\. real (Q u \))\}" by (simp add: M_def) qed also have "... \ variance (\\. real (Q u \)) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" using a_ge_1 p_gt_0 m_ge_0 by (intro Chebyshev_inequality, simp add:M_def, auto) also have "... \ (real m * real_of_int u / real p) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" using a_ge_0 u_le_p by (intro divide_right_mono var_Q, auto) also have "... \ 1/9" using a_ge_0 by simp finally have case_1: "prob {\. Q u \ \ t} \ 1/9" by simp have case_2: "prob {\. Q v \ < t} \ 1/9" proof (cases "v \ p") case True have "prob {\. Q v \ < t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q v \) - expectation (\\. real (Q v \))) \ 3 * sqrt (m * real_of_int v / p)}" proof (rule pmf_mono[OF M_def]) fix \ assume "\ \ set_pmf (pmf_of_set space)" have "(real t + 3 * sqrt (real t / (1 - \') )) * (1 - \') = real t - \' * t + 3 * ((1-\') * sqrt( real t / (1-\') ))" by (simp add:algebra_simps) also have "... = real t - \' * t + 3 * sqrt ( (1-\')\<^sup>2 * (real t / (1-\')))" using \'_lt_1 by (subst real_sqrt_mult, simp) also have "... = real t - \' * t + 3 * sqrt ( real t * (1- \'))" by (simp add:power2_eq_square distrib_left) also have "... \ real t - 45/ \' + 3 * sqrt ( real t )" using \'_gt_0 t_ge_\' \'_lt_1 by (intro add_mono mult_left_mono real_sqrt_le_mono) (simp_all add:pos_divide_le_eq ac_simps left_diff_distrib power_le_one) also have "... \ real t - 45/ \' + 3 * sqrt ( 46 / \'\<^sup>2)" using t_le_\' \'_lt_1 \'_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono, simp_all add:pos_divide_le_eq power_le_one) also have "... = real t + (3 * sqrt(46) - 45)/ \'" using \'_gt_0 by (simp add:real_sqrt_divide diff_divide_distrib) also have "... \ t" using \'_gt_0 by (simp add:pos_divide_le_eq real_le_lsqrt) finally have aux: "(real t + 3 * sqrt (real t / (1 - \'))) * (1 - \') \ real t " by simp assume "\ \ {\. Q v \ < t}" hence "Q v \ < t" by simp hence "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) \ real t - 1 + 3 * sqrt (real m * real_of_int v / real p)" using m_le_p p_gt_0 by (intro add_mono, auto simp add: algebra_simps add_divide_distrib) also have "... \ (real t-1) + 3 * sqrt (real m * (real t * real p / (real m * (1- \'))) / real p)" by (intro add_mono mult_left_mono real_sqrt_le_mono divide_right_mono) (auto simp add:v_def) also have "... \ real t + 3 * sqrt(real t / (1-\')) - 1" using m_ge_0 p_gt_0 by simp also have "... \ real t / (1-\')-1" using \'_lt_1 aux by (simp add: pos_le_divide_eq) also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - 1" using p_gt_0 m_ge_0 by simp also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - real m / real p" using m_le_p p_gt_0 by (intro diff_mono, auto) also have "... = real m * (real t * real p / (real m * (1-\'))-1) / real p" by (simp add: left_diff_distrib right_diff_distrib diff_divide_distrib) also have "... \ real m * real_of_int v / real p" by (intro divide_right_mono mult_left_mono, simp_all add:v_def) finally have "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) \ real m * real_of_int v / real p" by simp hence " 3 * sqrt (real m * real_of_int v / real p) \ \real (Q v \) -expectation (\\. real (Q v \))\" using v_gt_0 True by (simp add: exp_Q abs_ge_iff) thus "\ \ {\\ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int v / real p) \ \real (Q v \) - expectation (\\. real (Q v \))\}" by (simp add:M_def) qed also have "... \ variance (\\. real (Q v \)) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" using v_gt_0 p_gt_0 m_ge_0 by (intro Chebyshev_inequality, simp add:M_def, auto) also have "... \ (real m * real_of_int v / real p) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" using v_gt_0 True by (intro divide_right_mono var_Q, auto) also have "... = 1/9" using p_gt_0 v_gt_0 m_ge_0 by (simp add:power2_eq_square) finally show ?thesis by simp next case False have "prob {\. Q v \ < t} \ prob {\. False}" proof (rule pmf_mono[OF M_def]) fix \ assume a:"\ \ {\. Q v \ < t}" assume "\ \ set_pmf (pmf_of_set space)" hence b:"\x. x < p \ hash x \ < p" using hash_range mod_ring_carr by (simp add:M_def measure_pmf_inverse) have "t \ card (set as)" using True by simp also have "... \ Q v \" unfolding Q_def using b False as_lt_p by (intro card_mono subsetI, simp, force) also have "... < t" using a by simp finally have "False" by auto thus "\ \ {\. False}" by simp qed also have "... = 0" by auto finally show ?thesis by simp qed have "prob {\. \has_no_collision \} \ prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real_of_int v \ tr_hash x \ = tr_hash y \}" by (rule pmf_mono[OF M_def]) (simp add:has_no_collision_def M_def, force) also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" using collision_prob v_ge_1 by blast also have "... \ (5/2) * (real m)\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" by (intro divide_right_mono add_mono mult_right_mono mult_mono power_mono, simp_all add:m_def) also have "... \ (5/2) * (real m)\<^sup>2 * (4 * real t * real p / real m)\<^sup>2 * (2 powr - real r) / (real p)\<^sup>2 + 1 / real p" using v_def v_ge_1 v_ubound by (intro add_mono divide_right_mono mult_right_mono mult_left_mono, auto) also have "... = 40 * (real t)\<^sup>2 * (2 powr -real r) + 1 / real p" using p_gt_0 m_ge_0 t_gt_0 by (simp add:algebra_simps power2_eq_square) also have "... \ 1/18 + 1/18" using t_r_bound p_ge_18 by (intro add_mono, simp_all add: pos_le_divide_eq) also have "... = 1/9" by simp finally have case_3: "prob {\. \has_no_collision \} \ 1/9" by simp have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ prob {\. Q u \ \ t \ Q v \ < t \ \(has_no_collision \)}" proof (rule pmf_mono[OF M_def], rule ccontr) fix \ assume "\ \ set_pmf (pmf_of_set space)" assume "\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" hence est: "real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\" by simp assume "\ \ {\. t \ Q u \ \ Q v \ < t \ \ has_no_collision \}" hence "\( t \ Q u \ \ Q v \ < t \ \ has_no_collision \)" by simp hence lb: "Q u \ < t" and ub: "Q v \ \ t" and no_col: "has_no_collision \" by simp+ define y where "y = nth_mset (t-1) {#int (hash x \). x \# mset_set (set as)#}" define y' where "y' = nth_mset (t-1) {#tr_hash x \. x \# mset_set (set as)#}" have rank_t_lb: "u \ y" unfolding y_def using True t_gt_0 lb by (intro nth_mset_bound_left, simp_all add:count_less_def swap_filter_image Q_def) have rank_t_ub: "y \ v - 1" unfolding y_def using True t_gt_0 ub by (intro nth_mset_bound_right, simp_all add:Q_def swap_filter_image count_le_def) have y_ge_0: "real_of_int y \ 0" using rank_t_lb a_ge_0 by linarith have "mono (\x. truncate_down r (real_of_int x))" by (metis truncate_down_mono mono_def of_int_le_iff) hence y'_eq: "y' = truncate_down r y" unfolding y_def y'_def using True t_gt_0 by (subst nth_mset_commute_mono[where f="(\x. truncate_down r (of_int x))"]) (simp_all add: multiset.map_comp comp_def tr_hash_def) have "real_of_int u * (1 - 2 powr -real r) \ real_of_int y * (1 - 2 powr (-real r))" using rank_t_lb of_int_le_iff two_pow_r_le_1 by (intro mult_right_mono, auto) also have "... \ y'" using y'_eq truncate_down_pos[OF y_ge_0] by simp finally have rank_t_lb': "u * (1 - 2 powr -real r) \ y'" by simp have "y' \ real_of_int y" by (subst y'_eq, rule truncate_down_le, simp) also have "... \ real_of_int (v-1)" using rank_t_ub of_int_le_iff by blast finally have rank_t_ub': "y' \ v-1" by simp have "0 < u * (1-2 powr -real r)" using a_ge_1 two_pow_r_le_1 by (intro mult_pos_pos, auto) hence y'_pos: "y' > 0" using rank_t_lb' by linarith have no_col': "\x. x \ y' \ count {#tr_hash x \. x \# mset_set (set as)#} x \ 1" using rank_t_ub' no_col by (simp add:vimage_def card_le_Suc0_iff_eq count_image_mset has_no_collision_def) force have h_1: "Max (sketch_rv' \) = y'" using True t_gt_0 no_col' by (simp add:sketch_rv'_def y'_def nth_mset_max) have "card (sketch_rv' \) = card (least ((t-1)+1) (set_mset {#tr_hash x \. x \# mset_set (set as)#}))" using t_gt_0 by (simp add:sketch_rv'_def) also have "... = (t-1) +1" using True t_gt_0 no_col' by (intro nth_mset_max(2), simp_all add:y'_def) also have "... = t" using t_gt_0 by simp finally have "card (sketch_rv' \) = t" by simp hence h_3: "estimate' (sketch_rv' \) = real t * real p / y'" using h_1 by (simp add:estimate'_def) have "(real t) * real p \ (1 + \') * real m * ((real t) * real p / (real m * (1 + \')))" using \'_lt_1 m_def True t_gt_0 \'_gt_0 by auto also have "... \ (1+\') * m * u" using \'_gt_0 by (intro mult_left_mono, simp_all add:u_def) also have "... < ((1 + real_of_rat \)*(1-real_of_rat \/8)) * m * u" using True m_def t_gt_0 a_ge_1 \_range by (intro mult_strict_right_mono, auto simp add:\'_def right_diff_distrib) also have "... \ ((1 + real_of_rat \)*(1-2 powr (-r))) * m * u" using r_le_\ \_range a_ge_0 by (intro mult_right_mono mult_left_mono, auto) also have "... = (1 + real_of_rat \) * m * (u * (1-2 powr -real r))" by simp also have "... \ (1 + real_of_rat \) * m * y'" using \_range by (intro mult_left_mono rank_t_lb', simp) finally have "real t * real p < (1 + real_of_rat \) * m * y'" by simp hence f_1: "estimate' (sketch_rv' \) < (1 + real_of_rat \) * m" using y'_pos by (simp add: h_3 pos_divide_less_eq) have "(1 - real_of_rat \) * m * y' \ (1 - real_of_rat \) * m * v" using \_range rank_t_ub' y'_pos by (intro mult_mono rank_t_ub', simp_all) also have "... = (1-real_of_rat \) * (real m * v)" by simp also have "... < (1-\') * (real m * v)" using \_range m_ge_0 v_ge_1 by (intro mult_strict_right_mono mult_pos_pos, simp_all add:\'_def) also have "... \ (1-\') * (real m * (real t * real p / (real m * (1-\'))))" using \'_gt_0 \'_lt_1 by (intro mult_left_mono, auto simp add:v_def) also have "... = real t * real p" using \'_gt_0 \'_lt_1 t_gt_0 p_gt_0 m_ge_0 by auto finally have "(1 - real_of_rat \) * m * y' < real t * real p" by simp hence f_2: "estimate' (sketch_rv' \) > (1 - real_of_rat \) * m" using y'_pos by (simp add: h_3 pos_less_divide_eq) have "abs (estimate' (sketch_rv' \) - real_of_rat (F 0 as)) < real_of_rat \ * (real_of_rat (F 0 as))" using f_1 f_2 by (simp add:abs_less_iff algebra_simps m_eq_F_0) thus "False" using est by linarith qed also have "... \ 1/9 + (1/9 + 1/9)" by (intro pmf_add_2[OF M_def] case_1 case_2 case_3) also have "... = 1/3" by simp finally show ?thesis by simp next case False have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" proof (rule pmf_mono[OF M_def]) fix \ assume a:"\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" assume b:"\ \ set_pmf (pmf_of_set space)" have c: "card (set as) < t" using False by auto hence "card ((\x. tr_hash x \) ` set as) < t" using card_image_le order_le_less_trans by blast hence d:"card (sketch_rv' \) = card ((\x. tr_hash x \) ` (set as))" by (simp add:sketch_rv'_def card_least) have "card (sketch_rv' \) < t" by (metis List.finite_set c d card_image_le order_le_less_trans) hence "estimate' (sketch_rv' \) = card (sketch_rv' \)" by (simp add:estimate'_def) hence "card (sketch_rv' \) \ real_of_rat (F 0 as)" using a \_range by simp (metis abs_zero cancel_comm_monoid_add_class.diff_cancel of_nat_less_0_iff pos_prod_lt zero_less_of_rat_iff) hence "card (sketch_rv' \) \ card (set as)" using m_def m_eq_F_0 by linarith hence "\inj_on (\x. tr_hash x \) (set as)" using card_image d by auto moreover have "tr_hash x \ \ real p" if a:"x \ set as" for x proof - have "hash x \ < p" using hash_range as_lt_p a b by (simp add:mod_ring_carr M_def) thus "tr_hash x \ \ real p" using truncate_down_le by (simp add:tr_hash_def) qed ultimately show "\ \ {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" by (simp add:inj_on_def, blast) qed also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real p)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" using p_gt_0 by (intro collision_prob, auto) also have "... = (5/2) * (real (card (set as)))\<^sup>2 * 2 powr (- real r) + 1 / real p" using p_gt_0 by (simp add:ac_simps power2_eq_square) also have "... \ (5/2) * (real t)\<^sup>2 * 2 powr (-real r) + 1 / real p" using False by (intro add_mono mult_right_mono mult_left_mono power_mono, auto) also have "... \ 1/6 + 1/6" using t_r_bound p_ge_18 by (intro add_mono, simp_all) also have "... \ 1/3" by simp finally show ?thesis by simp qed private lemma median_bounds: "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1 - real_of_rat \" proof - have "strict_mono_on A real_of_float" for A by (meson less_float.rep_eq strict_mono_onI) hence real_g_2: "\\. sketch_rv' \ = real_of_float ` sketch_rv \" by (simp add: sketch_rv'_def sketch_rv_def tr_hash_def least_mono_commute image_comp) moreover have "inj_on real_of_float A" for A using real_of_float_inject by (simp add:inj_on_def) ultimately have card_eq: "\\. card (sketch_rv \) = card (sketch_rv' \)" using real_g_2 by (auto intro!: card_image[symmetric]) have "Max (sketch_rv' \) = real_of_float (Max (sketch_rv \))" if a:"card (sketch_rv' \) \ t" for \ proof - have "mono real_of_float" using less_eq_float.rep_eq mono_def by blast moreover have "finite (sketch_rv \)" by (simp add:sketch_rv_def least_def) moreover have " sketch_rv \ \ {}" using card_eq[symmetric] card_gt_0_iff t_gt_0 a by (simp, force) ultimately show ?thesis by (subst mono_Max_commute[where f="real_of_float"], simp_all add:real_g_2) qed hence real_g: "\\. estimate' (sketch_rv' \) = real_of_rat (estimate (sketch_rv \))" by (simp add:estimate_def estimate'_def card_eq of_rat_divide of_rat_mult of_rat_add real_of_rat_of_float) have indep: "prob_space.indep_vars (measure_pmf \\<^sub>0) (\_. borel) (\i \. estimate' (sketch_rv' (\ i))) {0..\<^sub>0_def by (rule indep_vars_restrict_intro', auto simp add:restrict_dfl_def lessThan_atLeast0) moreover have "- (18 * ln (real_of_rat \)) \ real s" using of_nat_ceiling by (simp add:s_def) blast moreover have "i < s \ measure \\<^sub>0 {\. of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' (\ i)) - of_rat (F 0 as)\} \ 1/3" for i using estimate'_bounds unfolding \\<^sub>0_def M_def by (subst prob_prod_pmf_slice, simp_all) ultimately have "1-real_of_rat \ \ \

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate' (sketch_rv' (\ i))) - real_of_rat (F 0 as)\ \ real_of_rat \ * real_of_rat (F 0 as))" using \_range prob_space_measure_pmf by (intro prob_space.median_bound_2) auto also have "... = \

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as)" using s_gt_0 median_rat[symmetric] real_g by (intro arg_cong2[where f="measure"]) (simp_all add:of_rat_diff[symmetric] of_rat_mult[symmetric] of_rat_less_eq) finally show "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1-real_of_rat \" by blast qed lemma f0_alg_correct': "\

(\ in measure_pmf result. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" proof - have f0_result_elim: "\x. f0_result (s, t, p, r, x, \i\{..i. estimate (sketch_rv (x i))))" by (simp add:estimate_def, rule median_cong, simp) have "result = map_pmf (\x. (s, t, p, r, x, \i\{..\<^sub>0 \ f0_result" by (subst result_def, subst f0_alg_sketch, simp) also have "... = \\<^sub>0 \ (\x. return_pmf (s, t, p, r, x, \i\{.. f0_result" by (simp add:t_def p_def r_def s_def map_pmf_def) also have "... = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" by (subst bind_assoc_pmf, subst bind_return_pmf, subst f0_result_elim) simp finally have a:"result = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" by simp show ?thesis using median_bounds by (simp add: a map_pmf_def[symmetric]) qed private lemma f_subset: assumes "g ` A \ h ` B" shows "(\x. f (g x)) ` A \ (\x. f (h x)) ` B" using assms by auto lemma f0_exact_space_usage': defines "\ \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" proof - have log_2_4: "log 2 4 = 2" by (metis log2_of_power_eq mult_2 numeral_Bit0 of_nat_numeral power2_eq_square) have a: "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ ereal (12 + 4 * real r + 2 * log 2 (log 2 (n+13)))" if a_1:"y \ {.. 1") case True have aux_1: " 0 < 2 + log 2 (real y)" using True by (intro add_pos_nonneg, auto) have aux_2: "0 < 2 + log 2 (real p)" using p_gt_1 by (intro add_pos_nonneg, auto) have "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ ereal (10 + 4 * real r + 2 * log 2 (2 + \log 2 \real y\\))" by (rule truncate_float_bit_count) also have "... = ereal (10 + 4 * real r + 2 * log 2 (2 + (log 2 (real y))))" using True by simp also have "... \ ereal (10 + 4 * real r + 2 * log 2 (2 + log 2 p))" using aux_1 aux_2 True p_gt_0 a_1 by simp also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 4 + log 2 (2 * n + 40)))" using log_2_4 p_le_n p_gt_0 by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg, auto) also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 (8 * n + 160)))" by (simp add:log_mult[symmetric]) also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 ((n+13) powr 2)))" by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg) (auto simp add:power2_eq_square algebra_simps) also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 4 * log 2 (n + 13)))" by (subst log_powr, simp_all add:log_2_4) also have "... = ereal (12 + 4 * real r + 2 * log 2 (log 2 (n + 13)))" by (subst log_mult, simp_all add:log_2_4) finally show ?thesis by simp next case False hence "y = 0" using a_1 by simp then show ?thesis by (simp add:float_bit_count_zero) qed have "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. f0_space_usage (n, \, \)" if b: "x \ {..\<^sub>E space" for x proof - have c: "x \ extensional {.. (\k. float_of (truncate_down r k)) ` {.. (\xa. float_of (truncate_down r (hash xa (x y)))) ` set as" using least_subset by (auto simp add:sketch_rv_def tr_hash_def) also have "... \ (\k. float_of (truncate_down r (real k))) ` {..y. y < s \ finite (sketch_rv (x y))" unfolding sketch_rv_def by (rule finite_subset[OF least_subset], simp) moreover have card_sketch: "\y. y < s \ card (sketch_rv (x y)) \ t " by (simp add:sketch_rv_def card_least) moreover have "\y z. y < s \ z \ sketch_rv (x y) \ bit_count (F\<^sub>e z) \ ereal (12 + 4 * real r + 2 * log 2 (log 2 (real n + 13)))" using a d by auto ultimately have e: "\y. y < s \ bit_count (S\<^sub>e F\<^sub>e (sketch_rv (x y))) \ ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1" using float_encoding by (intro set_bit_count_est, auto) have f: "\y. y < s \ bit_count (P\<^sub>e p 2 (x y)) \ ereal (real 2 * (log 2 (real p) + 1))" using p_gt_1 b by (intro bounded_degree_polynomial_bit_count) (simp_all add:space_def PiE_def Pi_def) have "bit_count (encode_f0_state (s, t, p, r, x, \i\{..e s) + bit_count (N\<^sub>e t) + bit_count (N\<^sub>e p) + bit_count (N\<^sub>e r) + bit_count (([0..\<^sub>e P\<^sub>e p 2) x) + bit_count (([0..\<^sub>e S\<^sub>e F\<^sub>e) (\i\{.. ereal (2* log 2 (real s + 1) + 1) + ereal (2* log 2 (real t + 1) + 1) + ereal (2* log 2 (real p + 1) + 1) + ereal (2 * log 2 (real r + 1) + 1) + (ereal (real s) * (ereal (real 2 * (log 2 (real p) + 1)))) + (ereal (real s) * ((ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1)))" using c e f by (intro add_mono exp_golomb_bit_count fun_bit_count_est[where xs="[0.. ereal ( 4 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (2 * (21 + real n)) + 2 * log 2 (real r + 1) + real s * (3 + 2 * log 2 (2 * (21 + real n)) + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" using p_le_n p_gt_0 by (intro ereal_mono add_mono mult_left_mono, auto) also have "... = ereal (6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (21 + real n) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" by (subst (1 2) log_mult, auto) also have "... \ f0_space_usage (n, \, \)" by (simp add:s_def[symmetric] r_def[symmetric] t_def[symmetric] Let_def) (simp add:algebra_simps) finally show "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. f0_space_usage (n, \, \)" by simp qed hence "\x. x \ set_pmf \\<^sub>0 \ bit_count (encode_f0_state (s, t, p, r, x, \i\{.. ereal (f0_space_usage (n, \, \))" by (simp add:\\<^sub>0_def set_prod_pmf del:f0_space_usage.simps) hence "\y. y \ set_pmf \ \ bit_count (encode_f0_state y) \ ereal (f0_space_usage (n, \, \))" by (simp add: \_def f0_alg_sketch del:f0_space_usage.simps f0_init.simps) (metis (no_types, lifting) image_iff pmf.set_map) thus ?thesis by (simp add: AE_measure_pmf_iff del:f0_space_usage.simps) qed end text \Main results of this section:\ theorem f0_alg_correct: assumes "\ \ {0<..<1}" assumes "\ \ {0<..<1}" assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" shows "\

(\ in measure_pmf \. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" using f0_alg_correct'[OF assms(1-3)] unfolding \_def by blast theorem f0_exact_space_usage: assumes "\ \ {0<..<1}" assumes "\ \ {0<..<1}" assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" using f0_exact_space_usage'[OF assms(1-3)] unfolding \_def by blast theorem f0_asymptotic_space_complexity: "f0_space_usage \ O[at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\(n, \, \). ln (1 / of_rat \) * (ln (real n) + 1 / (of_rat \)\<^sup>2 * (ln (ln (real n)) + ln (1 / of_rat \))))" (is "_ \ O[?F](?rhs)") proof - define n_of :: "nat \ rat \ rat \ nat" where "n_of = (\(n, \, \). n)" define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" define t_of where "t_of = (\x. nat \80 / (real_of_rat (\_of x))\<^sup>2\)" define s_of where "s_of = (\x. nat \-(18 * ln (real_of_rat (\_of x)))\)" define r_of where "r_of = (\x. nat (4 * \log 2 (1 / real_of_rat (\_of x))\ + 23))" define g where "g = (\x. ln (1 / of_rat (\_of x)) * (ln (real (n_of x)) + 1 / (of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / of_rat (\_of x)))))" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have exp_pos: "exp k \ real x \ x > 0" for k x using exp_gt_zero gr0I by force have exp_gt_1: "exp 1 \ (1::real)" by simp have 1: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have 2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have 3: " (\x. 1) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_2 2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) have "(\x. 80 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (subst landau_o.big.cmult_in_iff, auto) hence 5: "(\x. real (t_of x)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" unfolding t_of_def by (intro landau_real_nat landau_ceil 4, auto) have "(\x. ln (real_of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt[where \="1"], auto simp add:ln_div) hence 6: "(\x. real (s_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding s_of_def by (intro landau_nat_ceil 1, simp) have 7: " (\x. 1) \ O[?F](\x. ln (real (n_of x)))" using exp_pos by (auto intro!: landau_o.big_mono evt[where n="exp 1"] iffD2[OF ln_ge_iff] simp: abs_ge_iff) have 8:" (\_. 1) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using order_trans[OF exp_gt_1] exp_pos by (intro landau_sum_1 7 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) auto have "(\x. ln (real (s_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_ln_3 sum_in_bigo 6 1, simp) hence 9: "(\x. log 2 (real (s_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8, auto simp:log_def) have 10: "(\x. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8 1) have "(\x. ln (real (t_of x) + 1)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using 5 by (intro landau_o.big_mult_1 3 landau_ln_3 sum_in_bigo 4, simp_all) hence " (\x. log 2 (real (t_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using order_trans[OF exp_gt_1] exp_pos by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) hence 11: "(\x. log 2 (real (t_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' 1, auto) have " (\x. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt[where n="1"], auto) hence "(\x. ln (real (n_of x) + 21)) \ O[?F](\x. ln (real (n_of x)))" by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) hence 12: "(\x. log 2 (real (n_of x) + 21)) \ O[?F](g)" unfolding g_def using exp_pos order_trans[OF exp_gt_1] by (intro landau_o.big_mult_1' 1 landau_sum_1 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) have "(\x. ln (1 / real_of_rat (\_of x))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (intro landau_ln_3 evt[where \="1"] landau_o.big_mono) (auto simp add:power_one_over[symmetric] self_le_power) hence " (\x. real (nat (4*\log 2 (1 / real_of_rat (\_of x))\+23))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using 4 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) hence " (\x. ln (real (r_of x) + 1)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" unfolding r_of_def by (intro landau_ln_3 sum_in_bigo 4, auto) hence " (\x. log 2 (real (r_of x) + 1)) \ O[?F](\x. (1 / (real_of_rat (\_of x))\<^sup>2) * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" by (intro landau_o.big_mult_1 3, simp add:log_def) hence " (\x. log 2 (real (r_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using exp_pos order_trans[OF exp_gt_1] by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] add_nonneg_nonneg mult_nonneg_nonneg) (auto) hence 13: "(\x. log 2 (real (r_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' 1, auto) have 14: "(\x. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt[where n="1"], auto) have "(\x. ln (real (n_of x) + 13)) \ O[?F](\x. ln (real (n_of x)))" using 14 by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) hence "(\x. ln (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))))" using exp_pos by (intro landau_ln_2[where a="2"] iffD2[OF ln_ge_iff] evt[where n="exp 2"]) (auto simp add:log_def) hence "(\x. log 2 (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_1 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff]) (auto simp add:log_def) moreover have "(\x. real (r_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding r_of_def using 2 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) hence "(\x. real (r_of x)) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) ultimately have 15:" (\x. real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using 5 3 by (intro landau_o.mult sum_in_bigo, auto) have "(\x. 5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" proof - have "\\<^sub>F x in ?F. 0 \ ln (real (n_of x))" by (intro evt[where n="1"] ln_ge_zero, auto) moreover have "\\<^sub>F x in ?F. 0 \ 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro evt[where n="exp 1" and \="1"] mult_nonneg_nonneg add_nonneg_nonneg ln_ge_zero iffD2[OF ln_ge_iff]) auto moreover have " (\x. ln (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" using 14 by (intro landau_ln_2[where a="2"] sum_in_bigo evt[where n="2"], auto) hence "(\x. 5 + 2 * log 2 (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" using 7 by (intro sum_in_bigo, auto simp add:log_def) ultimately show ?thesis using 15 by (rule landau_sum) qed hence 16: "(\x. real (s_of x) * (5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))) \ O[?F](g)" unfolding g_def by (intro landau_o.mult 6, auto) have "f0_space_usage = (\x. f0_space_usage (n_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) also have "... \ O[?F](g)" using 9 10 11 12 13 16 by (simp add:fun_cong[OF s_of_def[symmetric]] fun_cong[OF t_of_def[symmetric]] fun_cong[OF r_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Frequency_Moment_2.thy b/thys/Frequency_Moments/Frequency_Moment_2.thy --- a/thys/Frequency_Moments/Frequency_Moment_2.thy +++ b/thys/Frequency_Moments/Frequency_Moment_2.thy @@ -1,721 +1,721 @@ section \Frequency Moment $2$\ theory Frequency_Moment_2 imports Universal_Hash_Families.Carter_Wegman_Hash_Family Universal_Hash_Families.Universal_Hash_Families_More_Finite_Fields Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration Landau_Ext Median_Method.Median Product_PMF_Ext Frequency_Moments begin text \This section contains a formalization of the algorithm for the second frequency moment. It is based on the algorithm described in \<^cite>\\\textsection 2.2\ in "alon1999"\. The only difference is that the algorithm is adapted to work with prime field of odd order, which greatly reduces the implementation complexity.\ fun f2_hash where "f2_hash p h k = (if even (ring.hash (mod_ring p) k h) then int p - 1 else - int p - 1)" type_synonym f2_state = "nat \ nat \ nat \ (nat \ nat \ nat list) \ (nat \ nat \ int)" fun f2_init :: "rat \ rat \ nat \ f2_state pmf" where "f2_init \ \ n = do { let s\<^sub>1 = nat \6 / \\<^sup>2\; let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\; let p = prime_above (max n 3); h \ prod_pmf ({..1} \ {..2}) (\_. pmf_of_set (bounded_degree_polynomials (mod_ring p) 4)); return_pmf (s\<^sub>1, s\<^sub>2, p, h, (\_ \ {..1} \ {..2}. (0 :: int))) }" fun f2_update :: "nat \ f2_state \ f2_state pmf" where "f2_update x (s\<^sub>1, s\<^sub>2, p, h, sketch) = return_pmf (s\<^sub>1, s\<^sub>2, p, h, \i \ {..1} \ {..2}. f2_hash p (h i) x + sketch i)" fun f2_result :: "f2_state \ rat pmf" where "f2_result (s\<^sub>1, s\<^sub>2, p, h, sketch) = return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {..2}. (\i\<^sub>1\{..1} . (rat_of_int (sketch (i\<^sub>1, i\<^sub>2)))\<^sup>2) / (((rat_of_nat p)\<^sup>2-1) * rat_of_nat s\<^sub>1)))" fun f2_space_usage :: "(nat \ nat \ rat \ rat) \ real" where "f2_space_usage (n, m, \, \) = ( let s\<^sub>1 = nat \6 / \\<^sup>2 \ in let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in 3 + 2 * log 2 (s\<^sub>1 + 1) + 2 * log 2 (s\<^sub>2 + 1) + 2 * log 2 (9 + 2 * real n) + s\<^sub>1 * s\<^sub>2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))" definition encode_f2_state :: "f2_state \ bool list option" where "encode_f2_state = N\<^sub>e \\<^sub>e (\s\<^sub>1. N\<^sub>e \\<^sub>e (\s\<^sub>2. N\<^sub>e \\<^sub>e (\p. (List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) \\<^sub>e (List.product [0..1] [0..2] \\<^sub>e I\<^sub>e))))" lemma "inj_on encode_f2_state (dom encode_f2_state)" proof - have " is_encoding encode_f2_state" unfolding encode_f2_state_def by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding) thus ?thesis by (rule encoding_imp_inj) qed context fixes \ \ :: rat fixes n :: nat fixes as :: "nat list" fixes result assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ > 0" assumes as_range: "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" begin private definition s\<^sub>1 where "s\<^sub>1 = nat \6 / \\<^sup>2\" lemma s1_gt_0: "s\<^sub>1 > 0" using \_range by (simp add:s\<^sub>1_def) private definition s\<^sub>2 where "s\<^sub>2 = nat \-(18* ln (real_of_rat \))\" lemma s2_gt_0: "s\<^sub>2 > 0" using \_range by (simp add:s\<^sub>2_def) private definition p where "p = prime_above (max n 3)" lemma p_prime: "Factorial_Ring.prime p" unfolding p_def using prime_above_prime by blast lemma p_ge_3: "p \ 3" unfolding p_def by (meson max.boundedE prime_above_lower_bound) lemma p_gt_0: "p > 0" using p_ge_3 by linarith lemma p_gt_1: "p > 1" using p_ge_3 by simp lemma p_ge_n: "p \ n" unfolding p_def by (meson max.boundedE prime_above_lower_bound ) interpretation carter_wegman_hash_family "mod_ring p" 4 using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] using p_prime by auto definition sketch where "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" private definition \ where"\ = prod_pmf ({..1} \ {..2}) (\_. pmf_of_set space)" private definition \\<^sub>p where"\\<^sub>p = measure_pmf \" private definition sketch_rv where "sketch_rv \ = of_int (sum_list (map (f2_hash p \) as))^2" private definition mean_rv where "mean_rv \ = (\i\<^sub>2. (\i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) / (((of_nat p)\<^sup>2 - 1) * of_nat s\<^sub>1))" private definition result_rv where "result_rv \ = median s\<^sub>2 (\i\<^sub>2\{..2}. mean_rv \ i\<^sub>2)" lemma mean_rv_alg_sketch: "sketch = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" proof - have "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" by (simp add:sketch_def) also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" proof (induction as rule:rev_induct) case Nil then show ?case by (simp add:s\<^sub>1_def s\<^sub>2_def space_def p_def[symmetric] \_def restrict_def Let_def) next case (snoc a as) have "fold (\a state. state \ f2_update a) (as @ [a]) (f2_init \ \ n) = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \s \ {..1} \ {..2}. (\x \ as. f2_hash p (\ s) x)) \ f2_update a)" using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps) also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. (\x \ as@[a]. f2_hash p (\ i) x)))" by (subst bind_return_pmf) (simp add: add.commute del:f2_hash.simps cong:restrict_cong) finally show ?case by blast qed finally show ?thesis by auto qed lemma distr: "result = map_pmf result_rv \" proof - have "result = sketch \ f2_result" by (simp add:result_def sketch_def) also have "... = \ \ (\x. f2_result (s\<^sub>1, s\<^sub>2, p, x, \i\{..1} \ {..2}. sum_list (map (f2_hash p (x i)) as)))" by (simp add: mean_rv_alg_sketch bind_assoc_pmf bind_return_pmf) also have "... = map_pmf result_rv \" by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong) finally show ?thesis by simp qed private lemma f2_hash_pow_exp: assumes "k < p" shows "expectation (\\. real_of_int (f2_hash p \ k) ^m) = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" proof - have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp then obtain t where t_def: "p=2*t+1" using oddE by blast have "Collect even \ {..<2 * t + 1} \ (*) 2 ` {.. Collect even \ {..<2 * t + 1}" by (rule image_subsetI, simp) ultimately have "card ({k. even k} \ {..x. 2*x) ` {.. {.. {... hash k \ \ Collect even} = (real p + 1)/(2*real p)" using assms by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def) have "p = card {.. {.. ({k. even k} \ {.. {.. {.. {.. {.. {... hash k \ \ Collect odd} = (real p - 1)/(2*real p)" using assms by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def) have "expectation (\x. real_of_int (f2_hash p x k) ^ m) = expectation (\\. indicator {\. even (hash k \)} \ * (real p - 1)^m + indicator {\. odd (hash k \)} \ * (-real p - 1)^m)" by (rule Bochner_Integration.integral_cong, simp, simp) also have "... = prob {\. hash k \ \ Collect even} * (real p - 1) ^ m + prob {\. hash k \ \ Collect odd} * (-real p - 1) ^ m " by (simp, simp add:M_def) also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)" by (subst prob_even, subst prob_odd, simp) also have "... = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by (simp add:add_divide_distrib ac_simps) finally show "expectation (\x. real_of_int (f2_hash p x k) ^ m) = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp qed lemma shows var_sketch_rv:"variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" (is "?A") and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)\<^sup>2-1)" (is "?B") proof - define h where "h = (\\ x. real_of_int (f2_hash p \ x))" define c where "c = (\x. real (count_list as x))" define r where "r = (\(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))" define h_prod where "h_prod = (\as \. prod_list (map (h \) as))" define exp_h_prod :: "nat list \ real" where "exp_h_prod = (\as. (\i \ set as. r (count_list as i)))" have f_eq: "sketch_rv = (\\. (\x \ set as. c x * h \ x)^2)" by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps) have r_one: "r (Suc 0) = 0" by (simp add:r_def algebra_simps) have r_two: "r 2 = (real p^2-1)" using p_gt_0 unfolding r_def power2_eq_square by (simp add:nonzero_divide_eq_eq, simp add:algebra_simps) have"(real p)^2 \ 2^2" by (rule power_mono, use p_gt_1 in linarith, simp) hence p_square_ge_4: "(real p)\<^sup>2 \ 4" by simp have "r 4 = (real p)^4+2*(real p)\<^sup>2 - 3" using p_gt_0 unfolding r_def by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps) also have "... \ (real p)^4+2*(real p)\<^sup>2 + 3" by simp also have "... \ 3 * r 2 * r 2" using p_square_ge_4 by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono) finally have r_four_est: "r 4 \ 3 * r 2 * r 2" by simp have exp_h_prod_elim: "exp_h_prod = (\as. prod_list (map (r \ count_list as) (remdups as)))" by (simp add:exp_h_prod_def prod.set_conv_list[symmetric]) have exp_h_prod: "\x. set x \ set as \ length x \ 4 \ expectation (h_prod x) = exp_h_prod x" proof - fix x assume "set x \ set as" hence x_sub_p: "set x \ {..k. k \ set x \ k < p" by auto assume "length x \ 4" hence card_x: "card (set x) \ 4" using card_length dual_order.trans by blast have "set x \ carrier (mod_ring p) " using x_sub_p by (simp add:mod_ring_def) hence h_indep: "indep_vars (\_. borel) (\i \. h \ i ^ count_list x i) (set x)" using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def by (auto intro:indep_vars_compose2[where X="hash" and M'=" (\_. discrete)"]) have "expectation (h_prod x) = expectation (\\. \ i \ set x. h \ i^(count_list x i))" by (simp add:h_prod_def prod_list_eval) also have "... = (\i \ set x. expectation (\\. h \ i^(count_list x i)))" by (simp add: indep_vars_lebesgue_integral[OF _ h_indep]) also have "... = (\i \ set x. r (count_list x i))" using f2_hash_pow_exp x_le_p by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps) also have "... = exp_h_prod x" by (simp add:exp_h_prod_def) finally show "expectation (h_prod x) = exp_h_prod x" by simp qed have "\x y. kernel_of x = kernel_of y \ exp_h_prod x = exp_h_prod y" proof - fix x y :: "nat list" assume a:"kernel_of x = kernel_of y" then obtain f where b:"bij_betw f (set x) (set y)" and c:"\z. z \ set x \ count_list x z = count_list y (f z)" using kernel_of_eq_imp_bij by blast have "exp_h_prod x = prod ( (\i. r(count_list y i)) \ f) (set x)" by (simp add:exp_h_prod_def c) also have "... = (\i \ f ` (set x). r(count_list y i))" by (metis b bij_betw_def prod.reindex) also have "... = exp_h_prod y" unfolding exp_h_prod_def by (rule prod.cong, metis b bij_betw_def) simp finally show "exp_h_prod x = exp_h_prod y" by simp qed hence exp_h_prod_cong: "\p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p = of_bool (kernel_of x = kernel_of p) * exp_h_prod x" by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left) have c:"(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r" if a:"length xs = n" for xs :: "nat list" and n and r :: real proof - have "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)" using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def) thus "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)" by (simp add:sum_list_mult_const) qed have "expectation sketch_rv = (\i\set as. (\j\set as. c i * c j * expectation (h_prod [i,j])))" by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) also have "... = (\i\set as. (\j\set as. c i * c j * exp_h_prod [i,j]))" by (simp add:exp_h_prod) also have "... = (\i \ set as. (\j \ set as. c i * c j * (sum_list (map (\p. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))" by (subst exp_h_prod_cong, simp add:c) also have "... = (\i\set as. c i * c i * r 2)" by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse) also have "... = real_of_rat (F 2 as) * ((real p)^2-1)" by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two) finally show b:?B by simp have "expectation (\x. (sketch_rv x)\<^sup>2) = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))" by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))" by (simp add:exp_h_prod) also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * (sum_list (map (\p. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))" by (subst exp_h_prod_cong, simp add:c) also have "... = 3 * (\i \ set as. (\j \ set as. c i^2 * c j^2 * r 2 * r 2)) + ((\ i \ set as. c i^4 * r 4) - 3 * (\ i \ set as. c i ^ 4 * r 2 * r 2))" apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one) (* large intermediate terms *) apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute of_bool_not_iff) apply (simp add: algebra_simps sum_subtractf sum_collapse) apply (simp add: sum_distrib_left algebra_simps) done also have "... = 3 * (\i \ set as. c i^2 * r 2)^2 + (\ i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf) also have "... = 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" by (simp add:power_mult_distrib sum_distrib_right[symmetric]) also have "... \ 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * 0)" using r_four_est by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos) also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by (simp add:c_def r_two F_def of_rat_sum of_rat_power) finally have "expectation (\x. (sketch_rv x)\<^sup>2) \ 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by simp thus "variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by (simp add: variance_eq, simp add:power_mult_distrib b) qed lemma space_omega_1 [simp]: "Sigma_Algebra.space \\<^sub>p = UNIV" by (simp add:\\<^sub>p_def) interpretation \: prob_space "\\<^sub>p" by (simp add:\\<^sub>p_def prob_space_measure_pmf) lemma integrable_\: fixes f :: "((nat \ nat) \ (nat list)) \ real" shows "integrable \\<^sub>p f" unfolding \\<^sub>p_def \_def by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf) lemma sketch_rv_exp: assumes "i\<^sub>2 < s\<^sub>2" assumes "i\<^sub>1 \ {0..1}" shows "\.expectation (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) = real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)" proof - have "\.expectation (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2))) :: real) = expectation sketch_rv" using integrable_\ integrable_M assms unfolding \_def \\<^sub>p_def M_def by (subst expectation_Pi_pmf_slice, auto) also have "... = (real_of_rat (F 2 as)) * ((real p)\<^sup>2 - 1)" using exp_sketch_rv by simp finally show ?thesis by simp qed lemma sketch_rv_var: assumes "i\<^sub>2 < s\<^sub>2" assumes "i\<^sub>1 \ {0..1}" shows "\.variance (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" proof - have "\.variance (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2)) :: real)) = variance sketch_rv" using integrable_\ integrable_M assms unfolding \_def \\<^sub>p_def M_def by (subst variance_prod_pmf_slice, auto) also have "... \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" using var_sketch_rv by simp finally show ?thesis by simp qed lemma mean_rv_exp: assumes "i < s\<^sub>2" shows "\.expectation (\\. mean_rv \ i) = real_of_rat (F 2 as)" proof - have a:"(real p)\<^sup>2 > 1" using p_gt_1 by simp have "\.expectation (\\. mean_rv \ i) = (\i\<^sub>1 = 0..1. \.expectation (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" using assms integrable_\ by (simp add:mean_rv_def) also have "... = (\i\<^sub>1 = 0..1. real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" using sketch_rv_exp[OF assms] by simp also have "... = real_of_rat (F 2 as)" using s1_gt_0 a by simp finally show ?thesis by simp qed lemma mean_rv_var: assumes "i < s\<^sub>2" shows "\.variance (\\. mean_rv \ i) \ (real_of_rat (\ * F 2 as))\<^sup>2 / 3" proof - have a: "\.indep_vars (\_. borel) (\i\<^sub>1 x. sketch_rv (x (i\<^sub>1, i))) {0..1}" using assms unfolding \\<^sub>p_def \_def by (intro indep_vars_restrict_intro'[where f="fst"]) (auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0) have p_sq_ne_1: "(real p)^2 \ 1" by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff) have s1_bound: " 6 / (real_of_rat \)\<^sup>2 \ real s\<^sub>1" unfolding s\<^sub>1_def by (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge) have "\.variance (\\. mean_rv \ i) = \.variance (\\. \i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" unfolding mean_rv_def by (subst \.variance_divide[OF integrable_\], simp) also have "... = (\i\<^sub>1 = 0..1. \.variance (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" - by (subst \.var_sum_all_indep[OF _ _ integrable_\ a]) (auto simp: \_def \\<^sub>p_def) + by (subst \.bienaymes_identity_full_indep[OF _ _ integrable_\ a]) (auto simp: \_def \\<^sub>p_def) also have "... \ (\i\<^sub>1 = 0..1. 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto) also have "... = 2 * (real_of_rat (F 2 as)^2) / real s\<^sub>1" using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square) also have "... \ 2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat \)\<^sup>2)" using s1_gt_0 \_range by (intro divide_left_mono mult_pos_pos s1_bound) auto also have "... = (real_of_rat (\ * F 2 as))\<^sup>2 / 3" by (simp add:of_rat_mult algebra_simps) finally show ?thesis by simp qed lemma mean_rv_bounds: assumes "i < s\<^sub>2" shows "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ 1/3" proof (cases "as = []") case True then show ?thesis using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def) next case False hence "F 2 as > 0" using F_gr_0 by auto hence a: "0 < real_of_rat (\ * F 2 as)" using \_range by simp have [simp]: "(\\. mean_rv \ i) \ borel_measurable \\<^sub>p" by (simp add:\_def \\<^sub>p_def) have "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ \.prob {\. real_of_rat (\ * F 2 as) \ \mean_rv \ i - real_of_rat (F 2 as)\}" by (rule \.pmf_mono[OF \\<^sub>p_def], simp add:of_rat_mult) also have "... \ \.variance (\\. mean_rv \ i) / (real_of_rat (\ * F 2 as))\<^sup>2" using \.Chebyshev_inequality[where a="real_of_rat (\ * F 2 as)" and f="\\. mean_rv \ i",simplified] a prob_space_measure_pmf[where p="\"] mean_rv_exp[OF assms] integrable_\ by simp also have "... \ ((real_of_rat (\ * F 2 as))\<^sup>2/3) / (real_of_rat (\ * F 2 as))\<^sup>2" by (rule divide_right_mono, rule mean_rv_var[OF assms], simp) also have "... = 1/3" using a by force finally show ?thesis by blast qed lemma f2_alg_correct': "\

(\ in measure_pmf result. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" proof - have a: "\.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" using s1_gt_0 unfolding \\<^sub>p_def \_def by (intro indep_vars_restrict_intro'[where f="snd"]) (auto simp: \\<^sub>p_def \_def mean_rv_def restrict_dfl_def) have b: "- 18 * ln (real_of_rat \) \ real s\<^sub>2" unfolding s\<^sub>2_def using of_nat_ceiling by auto have "1 - of_rat \ \ \.prob {\. \median s\<^sub>2 (mean_rv \) - real_of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" using \_range \.median_bound_2[OF _ a b, where \="real_of_rat \ * real_of_rat (F 2 as)" and \="real_of_rat (F 2 as)"] mean_rv_bounds by simp also have "... = \.prob {\. \real_of_rat (result_rv \) - of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0] mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power) also have "... = \.prob {\. \result_rv \ - F 2 as\ \ \ * F 2 as} " by (simp add:of_rat_less_eq of_rat_mult[symmetric] of_rat_diff[symmetric] set_eq_iff) finally have "\.prob {y. \result_rv y - F 2 as\ \ \ * F 2 as} \ 1-of_rat \ " by simp thus ?thesis by (simp add: distr \\<^sub>p_def) qed lemma f2_exact_space_usage': "AE \ in sketch . bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" proof - have "p \ 2 * max n 3 + 2" by (subst p_def, rule prime_above_upper_bound) also have "... \ 2 * n + 8" by (cases "n \ 2", simp_all) finally have p_bound: "p \ 2 * n + 8" by simp have "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (real p + 1) + 1)" by (rule exp_golomb_bit_count) also have "... \ ereal (2 * log 2 (2 * real n + 9) + 1)" using p_bound by simp finally have p_bit_count: "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (2 * real n + 9) + 1)" by simp have a: "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (f2_space_usage (n, length as, \, \))" if a:"y\{..1} \ {..2} \\<^sub>E bounded_degree_polynomials (mod_ring p) 4" for y proof - have "y \ extensional ({..1} \ {..2})" using a PiE_iff by blast hence y_ext: "y \ extensional (set (List.product [0..1] [0..2]))" by (simp add:lessThan_atLeast0) have h_bit_count_aux: "bit_count (P\<^sub>e p 4 (y x)) \ ereal (4 + 4 * log 2 (8 + 2 * real n))" if b:"x \ set (List.product [0..1] [0..2])" for x proof - have "y x \ bounded_degree_polynomials (mod_ring p) 4" using b a by force hence "bit_count (P\<^sub>e p 4 (y x)) \ ereal (real 4 * (log 2 (real p) + 1))" by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] ) also have "... \ ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )" using p_gt_0 p_bound by simp also have "... \ ereal (4 + 4 * log 2 (8 + 2 * real n))" by simp finally show ?thesis by blast qed have h_bit_count: "bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) \ ereal (real s\<^sub>1 * real s\<^sub>2 * (4 + 4 * log 2 (8 + 2 * real n)))" using fun_bit_count_est[where e="P\<^sub>e p 4", OF y_ext h_bit_count_aux] by simp have sketch_bit_count_aux: "bit_count (I\<^sub>e (sum_list (map (f2_hash p (y x)) as))) \ ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs \ ?rhs") if " x \ {0..1} \ {0..2}" for x proof - have "\sum_list (map (f2_hash p (y x)) as)\ \ sum_list (map (abs \ (f2_hash p (y x))) as)" by (subst map_map[symmetric]) (rule sum_list_abs) also have "... \ sum_list (map (\_. (int p+1)) as)" by (rule sum_list_mono) (simp add:p_gt_0) also have "... = int (length as) * (int p+1)" by (simp add: sum_list_triv) also have "... \ int (length as) * (9+2*(int n))" using p_bound by (intro mult_mono, auto) finally have "\sum_list (map (f2_hash p (y x)) as)\ \ int (length as) * (9 + 2 * int n)" by simp hence "?lhs \ ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)" by (rule int_bit_count_est) also have "... = ?rhs" by (simp add:algebra_simps) finally show "?thesis" by simp qed have "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (real (length (List.product [0..1] [0..2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by (intro fun_bit_count_est) (simp_all add:extensional_def lessThan_atLeast0 sketch_bit_count_aux del:f2_hash.simps) also have "... = ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp finally have sketch_bit_count: "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp have "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ bit_count (N\<^sub>e s\<^sub>1) + bit_count (N\<^sub>e s\<^sub>2) +bit_count (N\<^sub>e p) + bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) + bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as)))" by (simp add:Let_def s\<^sub>1_def s\<^sub>2_def encode_f2_state_def dependent_bit_count add.assoc) also have "... \ ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (4 + 4 * log 2 (8 + 2 * real n))) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))" by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count) also have "... = ereal (f2_space_usage (n, length as, \, \))" by (simp add:distrib_left add.commute s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) finally show "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (f2_space_usage (n, length as, \, \))" by simp qed have "set_pmf \ = {..1} \ {..2} \\<^sub>E bounded_degree_polynomials (mod_ring p) 4" by (simp add: \_def set_prod_pmf) (simp add: space_def) thus ?thesis by (simp add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a) qed end text \Main results of this section:\ theorem f2_alg_correct: assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. \ fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" shows "\

(\ in measure_pmf \. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" using f2_alg_correct'[OF assms(1,2,3)] \_def by auto theorem f2_exact_space_usage: assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n)" shows "AE \ in M. bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" using f2_exact_space_usage'[OF assms(1,2,3)] by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp) theorem f2_asymptotic_space_complexity: "f2_space_usage \ O[at_top \\<^sub>F at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\ (n, m, \, \). (ln (1 / of_rat \)) / (of_rat \)\<^sup>2 * (ln (real n) + ln (real m)))" (is "_ \ O[?F](?rhs)") proof - define n_of :: "nat \ nat \ rat \ rat \ nat" where "n_of = (\(n, m, \, \). n)" define m_of :: "nat \ nat \ rat \ rat \ nat" where "m_of = (\(n, m, \, \). m)" define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" define g where "g = (\x. (1/ (of_rat (\_of x))\<^sup>2) * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ real (m_of x) \ m\ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n m P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def m_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have unit_1: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) have unit_2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt[where \="exp 1"]) (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have unit_3: "(\_. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt, auto) have unit_4: "(\_. 1) \ O[?F](\x. real (m_of x))" by (intro landau_o.big_mono evt, auto) have unit_5: "(\_. 1) \ O[?F](\x. ln (real (n_of x)))" by (auto intro!: landau_o.big_mono evt[where n="exp 1"]) (metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans) have unit_6: "(\_. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt unit_5 iffD2[OF ln_ge_iff], auto) have unit_7: "(\_. 1) \ O[?F](\x. 1 / real_of_rat (\_of x))" by (intro landau_o.big_mono evt[where \="1"], auto) have unit_8: "(\_. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6) have unit_9: "(\_. 1) \ O[?F](\x. real (n_of x) * real (m_of x))" by (intro landau_o.big_mult_1 unit_3 unit_4) have " (\x. 6 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (subst landau_o.big.cmult_in_iff, simp_all) hence l1: "(\x. real (nat \6 / (\_of x)\<^sup>2\)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (intro landau_real_nat landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power) have "(\x. - ( ln (real_of_rat (\_of x)))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt) (subst ln_div, auto) hence l2: "(\x. real (nat \- (18 * ln (real_of_rat (\_of x)))\)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_real_nat landau_ceil[OF unit_2], simp) have l3_aux: " (\x. real (m_of x) * (18 + 4 * real (n_of x)) + 1) \ O[?F](\x. real (n_of x) * real (m_of x))" by (rule sum_in_bigo[OF _unit_9], subst mult.commute) (intro landau_o.mult sum_in_bigo, auto simp:unit_3) have "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x) * real (m_of x)))" apply (rule landau_ln_2[where a="2"], simp, simp) apply (rule evt[where m="2" and n="1"]) apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1)) using l3_aux by simp also have "(\x. ln (real (n_of x) * real (m_of x))) \ O[?F](\x. ln (real (n_of x)) + ln(real (m_of x)))" by (intro landau_o.big_mono evt[where m="1" and n="1"], auto simp add:ln_mult) finally have l3: "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" using landau_o.big_trans by simp have l4: "(\x. ln (8 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) (auto intro!: sum_in_bigo simp add:unit_3) have l5: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) (auto intro!: sum_in_bigo simp add:unit_3) have l6: "(\x. ln (real (nat \6 / (\_of x)\<^sup>2\) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp) have l7: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' unit_1 unit_2 l5) have l8: "(\x. ln (real (nat \- (18 * ln (real_of_rat (\_of x)))\) + 1) ) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp have l9: "(\x. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro sum_in_bigo, auto simp: l3 l4 unit_6) have l10: "(\x. real (nat \6 / (\_of x)\<^sup>2\) * real (nat \- (18 * ln (real_of_rat (\_of x)))\) * (5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)) \ O[?F](g)" unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9) have "f2_space_usage = (\x. f2_space_usage (n_of x, m_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def m_of_def) also have "... \ O[?F](g)" by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def m_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Frequency_Moment_k.thy b/thys/Frequency_Moments/Frequency_Moment_k.thy --- a/thys/Frequency_Moments/Frequency_Moment_k.thy +++ b/thys/Frequency_Moments/Frequency_Moment_k.thy @@ -1,1015 +1,1015 @@ section \Frequency Moment $k$\ theory Frequency_Moment_k imports Frequency_Moments Landau_Ext Lp.Lp Median_Method.Median Product_PMF_Ext begin text \This section contains a formalization of the algorithm for the $k$-th frequency moment. It is based on the algorithm described in \<^cite>\\\textsection 2.1\ in "alon1999"\.\ type_synonym fk_state = "nat \ nat \ nat \ nat \ (nat \ nat \ (nat \ nat))" fun fk_init :: "nat \ rat \ rat \ nat \ fk_state pmf" where "fk_init k \ \ n = do { let s\<^sub>1 = nat \3 * real k * n powr (1-1/real k) / (real_of_rat \)\<^sup>2\; let s\<^sub>2 = nat \-18 * ln (real_of_rat \)\; return_pmf (s\<^sub>1, s\<^sub>2, k, 0, (\_ \ {0..1} \ {0..2}. (0,0))) }" fun fk_update :: "nat \ fk_state \ fk_state pmf" where "fk_update a (s\<^sub>1, s\<^sub>2, k, m, r) = do { coins \ prod_pmf ({0..1} \ {0..2}) (\_. bernoulli_pmf (1/(real m+1))); return_pmf (s\<^sub>1, s\<^sub>2, k, m+1, \i \ {0..1} \ {0..2}. if coins i then (a,0) else ( let (x,l) = r i in (x, l + of_bool (x=a)) ) ) }" fun fk_result :: "fk_state \ rat pmf" where "fk_result (s\<^sub>1, s\<^sub>2, k, m, r) = return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {0..2}. (\i\<^sub>1\{0..1}. rat_of_nat (let t = snd (r (i\<^sub>1, i\<^sub>2)) + 1 in m * (t^k - (t - 1)^k))) / (rat_of_nat s\<^sub>1)) )" lemma bernoulli_pmf_1: "bernoulli_pmf 1 = return_pmf True" by (rule pmf_eqI, simp add:indicator_def) fun fk_space_usage :: "(nat \ nat \ nat \ rat \ rat) \ real" where "fk_space_usage (k, n, m, \, \) = ( let s\<^sub>1 = nat \3*real k* (real n) powr (1-1/ real k) / (real_of_rat \)\<^sup>2 \ in let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in 4 + 2 * log 2 (s\<^sub>1 + 1) + 2 * log 2 (s\<^sub>2 + 1) + 2 * log 2 (real k + 1) + 2 * log 2 (real m + 1) + s\<^sub>1 * s\<^sub>2 * (2 + 2 * log 2 (real n+1) + 2 * log 2 (real m+1)))" definition encode_fk_state :: "fk_state \ bool list option" where "encode_fk_state = N\<^sub>e \\<^sub>e (\s\<^sub>1. N\<^sub>e \\<^sub>e (\s\<^sub>2. N\<^sub>e \\<^sub>e N\<^sub>e \\<^sub>e (List.product [0..1] [0..2] \\<^sub>e (N\<^sub>e \\<^sub>e N\<^sub>e))))" lemma "inj_on encode_fk_state (dom encode_fk_state)" proof - have "is_encoding encode_fk_state" by (simp add:encode_fk_state_def) (intro dependent_encoding exp_golomb_encoding fun_encoding) thus ?thesis by (rule encoding_imp_inj) qed text \This is an intermediate non-parallel form @{term "fk_update"} used only in the correctness proof.\ fun fk_update_2 :: "'a \ (nat \ 'a \ nat) \ (nat \ 'a \ nat) pmf" where "fk_update_2 a (m,x,l) = do { coin \ bernoulli_pmf (1/(real m+1)); return_pmf (m+1,if coin then (a,0) else (x, l + of_bool (x=a))) }" definition sketch where "sketch as i = (as ! i, count_list (drop (i+1) as) (as ! i))" lemma fk_update_2_distr: assumes "as \ []" shows "fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0)) = pmf_of_set {.. (\k. return_pmf (length as, sketch as k))" using assms proof (induction as rule:rev_nonempty_induct) case (single x) show ?case using single by (simp add:bind_return_pmf pmf_of_set_singleton bernoulli_pmf_1 lessThan_def sketch_def) next case (snoc x xs) let ?h = "(\xs k. count_list (drop (Suc k) xs) (xs ! k))" let ?q = "(\xs k. (length xs, sketch xs k))" have non_empty: " {.. {}" " {.. {}" using snoc by auto have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" by auto have "pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) = bernoulli_pmf (1 / (real (length xs) + 1)) \ (\y. pmf_of_set {.. (\k. return_pmf (if y then length xs else k)))" by (subst bind_commute_pmf, simp) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) = pmf_of_set {..x s. (s \ fk_update_2 x)) (xs@[x]) (return_pmf (0,0,0)) = (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ fk_update_2 x" using snoc by (simp add:case_prod_beta') also have "... = (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ (\(m,a,l). bernoulli_pmf (1 / (real m + 1)) \ (\coin. return_pmf (m + 1, if coin then (x, 0) else (a, (l + of_bool (a = x))))))" by (subst fk_update_2_eta, subst fk_update_2.simps, simp add:case_prod_beta') also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (length xs + 1, if coin then (x, 0) else (xs ! k, ?h xs k + of_bool (xs ! k = x)))))" by (subst bind_assoc_pmf, simp add: bind_return_pmf sketch_def) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k) \ (\k'. return_pmf (?q (xs@[x]) k'))))" using non_empty by (intro bind_pmf_cong, auto simp add:bind_return_pmf nth_append count_list_append sketch_def) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) \ (\k'. return_pmf (?q (xs@[x]) k'))" by (subst bind_assoc_pmf, subst bind_assoc_pmf, simp) also have "... = pmf_of_set {.. (\k'. return_pmf (?q (xs@[x]) k'))" by (subst b, simp) finally show ?case by simp qed context fixes \ \ :: rat fixes n k :: nat fixes as assumes k_ge_1: "k \ 1" assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ > 0" assumes as_range: "set as \ {..1 where "s\<^sub>1 = nat \3 * real k * (real n) powr (1-1/real k) / (real_of_rat \)\<^sup>2\" definition s\<^sub>2 where "s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\" definition "M\<^sub>1 = {(u, v). v < count_list as u}" definition "\\<^sub>1 = measure_pmf (pmf_of_set M\<^sub>1)" definition "M\<^sub>2 = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set M\<^sub>1)" definition "\\<^sub>2 = measure_pmf M\<^sub>2" interpretation prob_space "\\<^sub>1" unfolding \\<^sub>1_def by (simp add:prob_space_measure_pmf) interpretation \\<^sub>2:prob_space "\\<^sub>2" unfolding \\<^sub>2_def by (simp add:prob_space_measure_pmf) lemma split_space: "(\a\M\<^sub>1. f (snd a)) = (\u \ set as. (\v \{0..u. {u} \ {v. v < count_list as u})" have a: "inj_on snd (A x)" for x by (simp add:A_def inj_on_def) have "\u v. u < count_list as v \ v \ set as" by (subst count_list_gr_1, force) hence "M\<^sub>1 = \ (A ` set as)" by (auto simp add:set_eq_iff A_def M\<^sub>1_def) hence "(\a\M\<^sub>1. f (snd a)) = sum (f \ snd) (\ (A ` set as))" by (intro sum.cong, auto) also have "... = sum (\x. sum (f \ snd) (A x)) (set as)" by (rule sum.UNION_disjoint, simp, simp add:A_def, simp add:A_def, blast) also have "... = sum (\x. sum f (snd ` A x)) (set as)" by (intro sum.cong, auto simp add:sum.reindex[OF a]) also have "... = (\u \ set as. (\v \{0.. []" shows fin_space: "finite M\<^sub>1" and non_empty_space: "M\<^sub>1 \ {}" and card_space: "card M\<^sub>1 = length as" proof - have "M\<^sub>1 \ set as \ {k. k < length as}" proof (rule subsetI) fix x assume a:"x \ M\<^sub>1" have "fst x \ set as" using a by (simp add:case_prod_beta count_list_gr_1 M\<^sub>1_def) moreover have "snd x < length as" using a count_le_length order_less_le_trans by (simp add:case_prod_beta M\<^sub>1_def) fast ultimately show "x \ set as \ {k. k < length as}" by (simp add:mem_Times_iff) qed thus fin_space: "finite M\<^sub>1" using finite_subset by blast have "(as ! 0, 0) \ M\<^sub>1" using assms(1) unfolding M\<^sub>1_def by (simp, metis count_list_gr_1 gr0I length_greater_0_conv not_one_le_zero nth_mem) thus "M\<^sub>1 \ {}" by blast show "card M\<^sub>1 = length as" using fin_space split_space[where f="\_. (1::nat)"] by (simp add:sum_count_set[where X="set as" and xs="as", simplified]) qed lemma assumes "as \ []" shows integrable_1: "integrable \\<^sub>1 (f :: _ \ real)" and integrable_2: "integrable \\<^sub>2 (g :: _ \ real)" proof - have fin_omega: "finite (set_pmf (pmf_of_set M\<^sub>1))" using fin_space[OF assms] non_empty_space[OF assms] by auto thus "integrable \\<^sub>1 f" unfolding \\<^sub>1_def by (rule integrable_measure_pmf_finite) have "finite (set_pmf M\<^sub>2)" unfolding M\<^sub>2_def using fin_omega by (subst set_prod_pmf) (auto intro:finite_PiE) thus "integrable \\<^sub>2 g" unfolding \\<^sub>2_def by (intro integrable_measure_pmf_finite) qed lemma sketch_distr: assumes "as \ []" shows "pmf_of_set {.. (\k. return_pmf (sketch as k)) = pmf_of_set M\<^sub>1" proof - have "x < y \ y < length as \ count_list (drop (y+1) as) (as ! y) < count_list (drop (x+1) as) (as ! y)" for x y by (intro count_list_lt_suffix suffix_drop_drop, simp_all) (metis Suc_diff_Suc diff_Suc_Suc diff_add_inverse lessI less_natE) hence a1: "inj_on (sketch as) {k. k < length as}" unfolding sketch_def by (intro inj_onI) (metis Pair_inject mem_Collect_eq nat_neq_iff) have "x < length as \ count_list (drop (x+1) as) (as ! x) < count_list as (as ! x)" for x by (rule count_list_lt_suffix, auto simp add:suffix_drop) hence "sketch as ` {k. k < length as} \ M\<^sub>1" by (intro image_subsetI, simp add:sketch_def M\<^sub>1_def) moreover have "card M\<^sub>1 \ card (sketch as ` {k. k < length as})" by (simp add: card_space[OF assms(1)] card_image[OF a1]) ultimately have "sketch as ` {k. k < length as} = M\<^sub>1" using fin_space[OF assms(1)] by (intro card_seteq, simp_all) hence "bij_betw (sketch as) {k. k < length as} M\<^sub>1" using a1 by (simp add:bij_betw_def) hence "map_pmf (sketch as) (pmf_of_set {k. k < length as}) = pmf_of_set M\<^sub>1" using assms by (intro map_pmf_of_set_bij_betw, auto) thus ?thesis by (simp add: sketch_def map_pmf_def lessThan_def) qed lemma fk_update_distr: "fold (\x s. s \ fk_update x) as (fk_init k \ \ n) = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0))) \ (\x. return_pmf (s\<^sub>1,s\<^sub>2,k, length as, \i\{0..1}\{0..2}. snd (x i)))" proof (induction as rule:rev_induct) case Nil then show ?case by (auto simp:Let_def s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] bind_return_pmf) next case (snoc x xs) have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" by auto have a: "fk_update x (s\<^sub>1, s\<^sub>2, k, length xs, \i\{0..1} \ {0..2}. snd (f i)) = prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ (\a. return_pmf (s\<^sub>1,s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" if b: "f \ set_pmf (prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))))" for f proof - have c:"fst (f i) = length xs" if d:"i \ {0..1} \{0..2}" for i proof (cases "xs = []") case True then show ?thesis using b d by (simp add: set_Pi_pmf) next case False hence "{.. {}" by force thus ?thesis using b d by (simp add:set_Pi_pmf fk_update_2_distr[OF False] PiE_dflt_def) force qed show ?thesis apply (subst fk_update_2_eta, subst fk_update_2.simps, simp) apply (simp add: Pi_pmf_bind_return[where d'="undefined"] bind_assoc_pmf) apply (rule bind_pmf_cong, simp add:c cong:Pi_pmf_cong) by (auto simp add:bind_return_pmf case_prod_beta) qed have "fold (\x s. s \ fk_update x) (xs @ [x]) (fk_init k \ \ n) = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) xs (return_pmf (0,0,0))) \ (\\. return_pmf (s\<^sub>1,s\<^sub>2,k, length xs, \i\{0..1}\{0..2}. snd (\ i)) \ fk_update x)" using snoc by (simp add:restrict_def bind_assoc_pmf del:fk_init.simps) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i))))" using a by (intro bind_pmf_cong, simp_all add:bind_return_pmf del:fk_update.simps) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i))) \ (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" by (simp add:bind_assoc_pmf) also have "... = (prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) (xs@[x]) (return_pmf (0,0,0))) \ (\a. return_pmf (s\<^sub>1,s\<^sub>2,k, length (xs@[x]), \i\{0..1}\{0..2}. snd (a i))))" by (simp, subst Pi_pmf_bind, auto) finally show ?case by blast qed lemma power_diff_sum: fixes a b :: "'a :: {comm_ring_1,power}" assumes "k > 0" shows "a^k - b^k = (a-b) * (\i = 0.. insert m {Suc m..i. a * (a^i * b^(k-1-i))) {0..i. b * (a^i * b^(k-1-i))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * (b^(1+(k-1-i))))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * b^(k-i))) {0..i. (a^i * b^(k-i))) (insert k {1..i. (a^i * b^(k-i))) (insert 0 {Suc 0.. 0" assumes "(a :: real) \ b" assumes "b \ 0" shows "a^k -b^k \ (a-b) * k * a^(k-1)" proof - have " \i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ i * a ^ (k-1-i)" using assms by (intro mult_left_mono power_mono) auto also have "\i. i < k \ a ^ i * a ^ (k - 1 - i) = a ^ (k - Suc 0)" using assms(1) by (subst power_add[symmetric], simp) finally have a: "\i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ (k - Suc 0)" by blast have "a^k - b^k = (a-b) * (\i = 0.. (a-b) * (\i = 0..Specialization of the Hoelder inquality for sums.\ lemma Holder_inequality_sum: assumes "p > (0::real)" "q > 0" "1/p + 1/q = 1" assumes "finite A" shows "\\x\A. f x * g x\ \ (\x\A. \f x\ powr p) powr (1/p) * (\x\A. \g x\ powr q) powr (1/q)" proof - have "\LINT x|count_space A. f x * g x\ \ (LINT x|count_space A. \f x\ powr p) powr (1 / p) * (LINT x|count_space A. \g x\ powr q) powr (1 / q)" using assms integrable_count_space by (intro Lp.Holder_inequality, auto) thus ?thesis using assms by (simp add: lebesgue_integral_count_space_finite[symmetric]) qed lemma real_count_list_pos: assumes "x \ set as" shows "real (count_list as x) > 0" using count_list_gr_1 assms by force lemma fk_estimate: assumes "as \ []" shows "length as * of_rat (F (2*k-1) as) \ n powr (1 - 1 / real k) * (of_rat (F k as))^2" (is "?lhs \ ?rhs") proof (cases "k \ 2") case True define M where "M = Max (count_list as ` set as)" have "M \ count_list as ` set as" unfolding M_def using assms by (intro Max_in, auto) then obtain m where m_in: "m \ set as" and m_def: "M = count_list as m" by blast have a: "real M > 0" using m_in count_list_gr_1 by (simp add:m_def, force) have b: "2*k-1 = (k-1) + k" by simp have " 0 < real (count_list as m)" using m_in count_list_gr_1 by force hence "M powr k = real (count_list as m) ^ k" by (simp add: powr_realpow m_def) also have "... \ (\x\set as. real (count_list as x) ^ k)" using m_in by (intro member_le_sum, simp_all) also have "... \ real_of_rat (F k as)" by (simp add:F_def of_rat_sum of_rat_power) finally have d: "M powr k \ real_of_rat (F k as)" by simp have e: "0 \ real_of_rat (F k as)" using F_gr_0[OF assms(1)] by (simp add: order_le_less) have "real (k - 1) / real k + 1 = real (k - 1) / real k + real k / real k" using assms True by simp also have "... = real (2 * k - 1) / real k" using b by (subst add_divide_distrib[symmetric], force) finally have f: "real (k - 1) / real k + 1 = real (2 * k - 1) / real k" by blast have "real_of_rat (F (2*k-1) as) = (\x\set as. real (count_list as x) ^ (k - 1) * real (count_list as x) ^ k)" using b by (simp add:F_def of_rat_sum sum_distrib_left of_rat_mult power_add of_rat_power) also have "... \ (\x\set as. real M ^ (k - 1) * real (count_list as x) ^ k)" by (intro sum_mono mult_right_mono power_mono of_nat_mono) (auto simp:M_def) also have "... = M powr (k-1) * of_rat (F k as)" using a by (simp add:sum_distrib_left F_def of_rat_mult of_rat_sum of_rat_power powr_realpow) also have "... = (M powr k) powr (real (k - 1) / real k) * of_rat (F k as) powr 1" using e by (simp add:powr_powr) also have "... \ (real_of_rat (F k as)) powr ((k-1)/k) * (real_of_rat (F k as) powr 1)" using d by (intro mult_right_mono powr_mono2, auto) also have "... = (real_of_rat (F k as)) powr ((2*k-1) / k)" by (subst powr_add[symmetric], subst f, simp) finally have a: "real_of_rat (F (2*k-1) as) \ (real_of_rat (F k as)) powr ((2*k-1) / k)" by blast have g: "card (set as) \ n" using card_mono[OF _ as_range] by simp have "length as = abs (sum (\x. real (count_list as x)) (set as))" by (subst of_nat_sum[symmetric], simp add: sum_count_set) also have "... \ card (set as) powr ((k-Suc 0)/k) * (sum (\x. \real (count_list as x)\ powr k) (set as)) powr (1/k)" using assms True by (intro Holder_inequality_sum[where p="k/(k-1)" and q="k" and f="\_.1", simplified]) (auto simp add:algebra_simps add_divide_distrib[symmetric]) also have "... = (card (set as)) powr ((k-1) / real k) * of_rat (F k as) powr (1/ k)" using real_count_list_pos by (simp add:F_def of_rat_sum of_rat_power powr_realpow) also have "... = (card (set as)) powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" using k_ge_1 by (subst of_nat_diff[OF k_ge_1], subst diff_divide_distrib, simp) also have "... \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" using k_ge_1 g by (intro mult_right_mono powr_mono2, auto) finally have h: "length as \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/real k)" by blast have i:"1 / real k + real (2 * k - 1) / real k = real 2" using True by (subst add_divide_distrib[symmetric], simp_all add:of_nat_diff) have "?lhs \ n powr (1 - 1/k) * of_rat (F k as) powr (1/k) * (of_rat (F k as)) powr ((2*k-1) / k)" using a h F_ge_0 by (intro mult_mono mult_nonneg_nonneg, auto) also have "... = ?rhs" using i F_gr_0[OF assms] by (simp add:powr_add[symmetric] powr_realpow[symmetric]) finally show ?thesis by blast next case False have "n = 0 \ False" using as_range assms by auto hence "n > 0" by auto moreover have "k = 1" using assms k_ge_1 False by linarith moreover have "length as = real_of_rat (F (Suc 0) as)" by (simp add:F_def sum_count_set of_nat_sum[symmetric] del:of_nat_sum) ultimately show ?thesis by (simp add:power2_eq_square) qed definition result where "result a = of_nat (length as) * of_nat (Suc (snd a) ^ k - snd a ^ k)" lemma result_exp_1: assumes "as \ []" shows "expectation result = real_of_rat (F k as)" proof - have "expectation result = (\a\M\<^sub>1. result a * pmf (pmf_of_set M\<^sub>1) a)" unfolding \\<^sub>1_def using non_empty_space assms fin_space by (subst integral_measure_pmf_real) auto also have "... = (\a\M\<^sub>1. result a / real (length as))" using non_empty_space assms fin_space card_space by simp also have "... = (\a\M\<^sub>1. real (Suc (snd a) ^ k - snd a ^ k))" using assms by (simp add:result_def) also have "... = (\u\set as. \v = 0..u\set as. real (count_list as u)^k)" using k_ge_1 by (subst sum_Suc_diff') (auto simp add:zero_power) also have "... = of_rat (F k as)" by (simp add:F_def of_rat_sum of_rat_power) finally show ?thesis by simp qed lemma result_var_1: assumes "as \ []" shows "variance result \ (of_rat (F k as))\<^sup>2 * k * n powr (1 - 1 / real k)" proof - have k_gt_0: "k > 0" using k_ge_1 by linarith have c:"real (Suc v ^ k) - real (v ^ k) \ k * real (count_list as a) ^ (k - Suc 0)" if c_1: "v < count_list as a" for a v proof - have "real (Suc v ^ k) - real (v ^ k) \ (real (v+1) - real v) * k * (1 + real v) ^ (k - Suc 0)" using k_gt_0 power_diff_est[where a="Suc v" and b="v"] by simp moreover have "(real (v+1) - real v) = 1" by auto ultimately have "real (Suc v ^ k) - real (v ^ k) \ k * (1 + real v) ^ (k - Suc 0)" by auto also have "... \ k * real (count_list as a) ^ (k- Suc 0)" using c_1 by (intro mult_left_mono power_mono, auto) finally show ?thesis by blast qed have "length as * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) = length as * (\a\ set as. (\v \ {0.. length as * (\a\ set as. (\v \ {0..a\ set as. real (count_list as a) ^ (k-1) * (\v \ {0..a\ set as. real (count_list as a ^ (2*k-1)))" using assms k_ge_1 by (subst sum_Suc_diff', auto simp: zero_power[OF k_gt_0] mult_2 power_add[symmetric]) also have "... = k * (length as * of_rat (F (2*k-1) as))" by (simp add:sum_distrib_left[symmetric] F_def of_rat_sum of_rat_power) also have "... \ k * (of_rat (F k as)^2 * n powr (1 - 1 / real k))" using fk_estimate[OF assms] by (intro mult_left_mono) (auto simp: mult.commute) finally have b: "real (length as) * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) \ k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" by blast have "expectation (\\. (result \ :: real)^2) - (expectation result)^2 \ expectation (\\. result \^2)" by simp also have "... = (\a\M\<^sub>1. (length as * real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2 * pmf (pmf_of_set M\<^sub>1) a)" using fin_space non_empty_space assms unfolding \\<^sub>1_def result_def by (subst integral_measure_pmf_real[where A="M\<^sub>1"], auto) also have "... = (\a\M\<^sub>1. length as * (real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2)" using assms non_empty_space fin_space by (subst pmf_of_set) (simp_all add:card_space power_mult_distrib power2_eq_square ac_simps) also have "... \ k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" using b by (simp add:sum_distrib_left[symmetric]) also have "... = of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" by (simp add:ac_simps) finally have "expectation (\\. result \^2) - (expectation result)^2 \ of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" by blast thus ?thesis using integrable_1[OF assms] by (simp add:variance_eq) qed theorem fk_alg_sketch: assumes "as \ []" shows "fold (\a state. state \ fk_update a) as (fk_init k \ \ n) = map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" (is "?lhs = ?rhs") proof - have "?lhs = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0, 0, 0))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst fk_update_distr, simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (length as, sketch as k))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst fk_update_2_distr[OF assms], simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k)) \ (\s. return_pmf (length as, s))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst bind_assoc_pmf, subst bind_return_pmf, simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k))) \ (\x. return_pmf (\i \ {0..1} \ {0..2}. (length as, x i))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst Pi_pmf_bind_return[where d'="undefined"], simp, simp add:restrict_def) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" by (subst bind_assoc_pmf, simp add:bind_return_pmf cong:restrict_cong) also have "... = M\<^sub>2 \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" by (subst sketch_distr[OF assms], simp add:M\<^sub>2_def) also have "... = M\<^sub>2 \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, x))" by (rule bind_pmf_cong, auto simp add:PiE_dflt_def M\<^sub>2_def set_Pi_pmf) also have "... = ?rhs" by (simp add:map_pmf_def) finally show ?thesis by simp qed definition mean_rv where "mean_rv \ i\<^sub>2 = (\i\<^sub>1 = 0..1. result (\ (i\<^sub>1, i\<^sub>2))) / of_nat s\<^sub>1" definition median_rv where "median_rv \ = median s\<^sub>2 (\i\<^sub>2. mean_rv \ i\<^sub>2)" lemma fk_alg_correct': defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" proof (cases "as = []") case True have a: "nat \- (18 * ln (real_of_rat \))\ > 0" using \_range by simp show ?thesis using True \_range by (simp add:F_def M_def bind_return_pmf median_const[OF a] Let_def) next case False have "set as \ {}" using assms False by blast hence n_nonzero: "n > 0" using as_range by fastforce have fk_nonzero: "F k as > 0" using F_gr_0[OF False] by simp have s1_nonzero: "s\<^sub>1 > 0" using \_range k_ge_1 n_nonzero by (simp add:s\<^sub>1_def) have s2_nonzero: "s\<^sub>2 > 0" using \_range by (simp add:s\<^sub>2_def) have real_of_rat_mean_rv: "\x i. mean_rv x = (\i. real_of_rat (mean_rv x i))" by (rule ext, simp add:of_rat_divide of_rat_sum of_rat_mult result_def mean_rv_def) have real_of_rat_median_rv: "\x. median_rv x = real_of_rat (median_rv x)" unfolding median_rv_def using s2_nonzero by (subst real_of_rat_mean_rv, simp add: median_rat median_restrict) have space_\\<^sub>2: "space \\<^sub>2 = UNIV" by (simp add:\\<^sub>2_def) have fk_result_eta: "fk_result = (\(x,y,z,u,v). fk_result (x,y,z,u,v))" by auto have a:"fold (\x state. state \ fk_update x) as (fk_init k \ \ n) = map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" by (subst fk_alg_sketch[OF False]) (simp add:s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric]) have "M = map_pmf (\x. (s\<^sub>1, s\<^sub>2, k, length as, x)) M\<^sub>2 \ fk_result" by (subst M_def, subst a, simp) also have "... = M\<^sub>2 \ return_pmf \ median_rv" by (subst fk_result_eta) (auto simp add:map_pmf_def bind_assoc_pmf bind_return_pmf median_rv_def mean_rv_def comp_def M\<^sub>1_def result_def median_restrict) finally have b: "M = M\<^sub>2 \ return_pmf \ median_rv" by simp have result_exp: "i\<^sub>1 < s\<^sub>1 \ i\<^sub>2 < s\<^sub>2 \ \\<^sub>2.expectation (\x. result (x (i\<^sub>1, i\<^sub>2))) = real_of_rat (F k as)" for i\<^sub>1 i\<^sub>2 unfolding \\<^sub>2_def M\<^sub>2_def using integrable_1[OF False] result_exp_1[OF False] by (subst expectation_Pi_pmf_slice, auto simp:\\<^sub>1_def) have result_var: "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2))) \ of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" if result_var_assms: "i\<^sub>1 < s\<^sub>1" "i\<^sub>2 < s\<^sub>2" for i\<^sub>1 i\<^sub>2 proof - have "3 * real k * n powr (1 - 1 / real k) = (of_rat \)\<^sup>2 * (3 * real k * n powr (1 - 1 / real k) / (of_rat \)\<^sup>2)" using \_range by simp also have "... \ (real_of_rat \)\<^sup>2 * (real s\<^sub>1)" unfolding s\<^sub>1_def by (intro mult_mono of_nat_ceiling, simp_all) finally have f2_var_2: "3 * real k * n powr (1 - 1 / real k) \ (of_rat \)\<^sup>2 * (real s\<^sub>1)" by blast have "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2)) :: real) = variance result" using result_var_assms integrable_1[OF False] unfolding \\<^sub>2_def M\<^sub>2_def \\<^sub>1_def by (subst variance_prod_pmf_slice, auto) also have "... \ of_rat (F k as)^2 * real k * n powr (1 - 1 / real k)" using assms False result_var_1 \\<^sub>1_def by simp also have "... = of_rat (F k as)^2 * (real k * n powr (1 - 1 / real k))" by (simp add:ac_simps) also have "... \ of_rat (F k as)^2 * (of_rat \^2 * (real s\<^sub>1 / 3))" using f2_var_2 by (intro mult_left_mono, auto) also have "... = of_rat (F k as * \)^2 * (real s\<^sub>1 / 3)" by (simp add: of_rat_mult power_mult_distrib) also have "... = of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" by (simp add:ac_simps) finally show ?thesis by simp qed have mean_rv_exp: "\\<^sub>2.expectation (\\. mean_rv \ i) = real_of_rat (F k as)" if mean_rv_exp_assms: "i < s\<^sub>2" for i proof - have "\\<^sub>2.expectation (\\. mean_rv \ i) = \\<^sub>2.expectation (\\. \n = 0..1. result (\ (n, i)) / real s\<^sub>1)" by (simp add:mean_rv_def sum_divide_distrib) also have "... = (\n = 0..1. \\<^sub>2.expectation (\\. result (\ (n, i))) / real s\<^sub>1)" using integrable_2[OF False] by (subst Bochner_Integration.integral_sum, auto) also have "... = of_rat (F k as)" using s1_nonzero mean_rv_exp_assms by (simp add:result_exp) finally show ?thesis by simp qed have mean_rv_var: "\\<^sub>2.variance (\\. mean_rv \ i) \ real_of_rat (\ * F k as)^2/3" if mean_rv_var_assms: "i < s\<^sub>2" for i proof - have a:"\\<^sub>2.indep_vars (\_. borel) (\n x. result (x (n, i)) / real s\<^sub>1) {0..1}" unfolding \\<^sub>2_def M\<^sub>2_def using mean_rv_var_assms by (intro indep_vars_restrict_intro'[where f="fst"], simp, simp add:restrict_dfl_def, simp, simp) have "\\<^sub>2.variance (\\. mean_rv \ i) = \\<^sub>2.variance (\\. \j = 0..1. result (\ (j, i)) / real s\<^sub>1)" by (simp add:mean_rv_def sum_divide_distrib) also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i)) / real s\<^sub>1))" using a integrable_2[OF False] - by (subst \\<^sub>2.var_sum_all_indep, auto simp add:\\<^sub>2_def) + by (subst \\<^sub>2.bienaymes_identity_full_indep, auto simp add:\\<^sub>2_def) also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i))) / real s\<^sub>1^2)" using integrable_2[OF False] by (subst \\<^sub>2.variance_divide, auto) also have "... \ (\j = 0..1. ((real_of_rat (\ * F k as))\<^sup>2 * real s\<^sub>1 / 3) / (real s\<^sub>1^2))" using result_var[OF _ mean_rv_var_assms] by (intro sum_mono divide_right_mono, auto) also have "... = real_of_rat (\ * F k as)^2/3" using s1_nonzero by (simp add:algebra_simps power2_eq_square) finally show ?thesis by simp qed have "\\<^sub>2.prob {y. of_rat (\ * F k as) < \mean_rv y i - real_of_rat (F k as)\} \ 1/3" (is "?lhs \ _") if c_assms: "i < s\<^sub>2" for i proof - define a where "a = real_of_rat (\ * F k as)" have c: "0 < a" unfolding a_def using assms \_range fk_nonzero by (metis zero_less_of_rat_iff mult_pos_pos) have "?lhs \ \\<^sub>2.prob {y \ space \\<^sub>2. a \ \mean_rv y i - \\<^sub>2.expectation (\\. mean_rv \ i)\}" by (intro \\<^sub>2.pmf_mono[OF \\<^sub>2_def], simp add:a_def mean_rv_exp[OF c_assms] space_\\<^sub>2) also have "... \ \\<^sub>2.variance (\\. mean_rv \ i)/a^2" by (intro \\<^sub>2.Chebyshev_inequality integrable_2 c False) (simp add:\\<^sub>2_def) also have "... \ 1/3" using c using mean_rv_var[OF c_assms] by (simp add:algebra_simps, simp add:a_def) finally show ?thesis by blast qed moreover have "\\<^sub>2.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" using s1_nonzero unfolding \\<^sub>2_def M\<^sub>2_def by (intro indep_vars_restrict_intro'[where f="snd"] finite_cartesian_product) (simp_all add:mean_rv_def restrict_dfl_def space_\\<^sub>2) moreover have " - (18 * ln (real_of_rat \)) \ real s\<^sub>2" by (simp add:s\<^sub>2_def, linarith) ultimately have "1 - of_rat \ \ \\<^sub>2.prob {y \ space \\<^sub>2. \median s\<^sub>2 (mean_rv y) - real_of_rat (F k as)\ \ of_rat (\ * F k as)}" using \_range by (intro \\<^sub>2.median_bound_2, simp_all add:space_\\<^sub>2) also have "... = \\<^sub>2.prob {y. \median_rv y - real_of_rat (F k as)\ \ real_of_rat (\ * F k as)}" by (simp add:median_rv_def space_\\<^sub>2) also have "... = \\<^sub>2.prob {y. \median_rv y - F k as\ \ \ * F k as}" by (simp add:real_of_rat_median_rv of_rat_less_eq flip: of_rat_diff) also have "... = \

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as)" by (simp add: b comp_def map_pmf_def[symmetric] \\<^sub>2_def) finally show ?thesis by simp qed lemma fk_exact_space_usage': defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" (is "AE \ in M. (_ \ ?rhs)") proof - define H where "H = (if as = [] then return_pmf (\i\ {0..1}\{0..2}. (0,0)) else M\<^sub>2)" have a:"M = map_pmf (\x.(s\<^sub>1,s\<^sub>2,k,length as, x)) H" proof (cases "as \ []") case True then show ?thesis unfolding M_def fk_alg_sketch[OF True] H_def by (simp add:M\<^sub>2_def) next case False then show ?thesis by (simp add:H_def M_def s\<^sub>1_def[symmetric] Let_def s\<^sub>2_def[symmetric] map_pmf_def bind_return_pmf) qed have "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" if b:"y \ set_pmf H" for y proof - have b0:" as \ [] \ y \ {0..1} \ {0..2} \\<^sub>E M\<^sub>1" using b non_empty_space fin_space by (simp add:H_def M\<^sub>2_def set_prod_pmf) have "bit_count ((N\<^sub>e \\<^sub>e N\<^sub>e) (y x)) \ ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1)" (is "_ \ ?rhs1") if b1_assms: "x \ {0..1}\{0..2}" for x proof - have "fst (y x) \ n" proof (cases "as = []") case True then show ?thesis using b b1_assms by (simp add:H_def) next case False hence "1 \ count_list as (fst (y x))" using b0 b1_assms by (simp add:PiE_iff case_prod_beta M\<^sub>1_def, fastforce) hence "fst (y x) \ set as" using count_list_gr_1 by metis then show ?thesis by (meson lessThan_iff less_imp_le_nat subsetD as_range) qed moreover have "snd (y x) \ length as" proof (cases "as = []") case True then show ?thesis using b b1_assms by (simp add:H_def) next case False hence "(y x) \ M\<^sub>1" using b0 b1_assms by auto hence "snd (y x) \ count_list as (fst (y x))" by (simp add:M\<^sub>1_def case_prod_beta) then show ?thesis using count_le_length by (metis order_trans) qed ultimately have "bit_count (N\<^sub>e (fst (y x))) + bit_count (N\<^sub>e (snd (y x))) \ ?rhs1" using exp_golomb_bit_count_est by (intro add_mono, auto) thus ?thesis by (subst dependent_bit_count_2, simp) qed moreover have "y \ extensional ({0..1} \ {0..2})" using b0 b PiE_iff by (cases "as = []", auto simp:H_def PiE_iff) ultimately have "bit_count ((List.product [0..1] [0..2] \\<^sub>e N\<^sub>e \\<^sub>e N\<^sub>e) y) \ ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1))" by (intro fun_bit_count_est[where xs="(List.product [0..1] [0..2])", simplified], auto) hence "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + (ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + (ereal (2 * log 2 (real k + 1) + 1) + (ereal (2 * log 2 (real (length as) + 1) + 1) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n+1) + 1) + ereal (2 * log 2 (real (length as)+1) + 1))))))" unfolding encode_fk_state_def dependent_bit_count by (intro add_mono exp_golomb_bit_count, auto) also have "... \ ?rhs" by (simp add: s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) (simp add:ac_simps) finally show "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" by blast qed thus ?thesis by (simp add: a AE_measure_pmf_iff del:fk_space_usage.simps) qed end text \Main results of this section:\ theorem fk_alg_correct: assumes "k \ 1" assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" unfolding M_def using fk_alg_correct'[OF assms(1-4)] by blast theorem fk_exact_space_usage: assumes "k \ 1" assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" unfolding M_def using fk_exact_space_usage'[OF assms(1-4)] by blast theorem fk_asymptotic_space_complexity: "fk_space_usage \ O[at_top \\<^sub>F at_top \\<^sub>F at_top \\<^sub>F at_right (0::rat) \\<^sub>F at_right (0::rat)](\ (k, n, m, \, \). real k * real n powr (1-1/ real k) / (of_rat \)\<^sup>2 * (ln (1 / of_rat \)) * (ln (real n) + ln (real m)))" (is "_ \ O[?F](?rhs)") proof - define k_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "k_of = (\(k, n, m, \, \). k)" define n_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "n_of = (\(k, n, m, \, \). n)" define m_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "m_of = (\(k, n, m, \, \). m)" define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" define g1 where "g1 = (\x. real (k_of x)*(real (n_of x)) powr (1-1/ real (k_of x)) * (1 / of_rat (\_of x)^2))" define g where "g = (\x. g1 x * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" define s1_of where "s1_of = (\x. nat \3 * real (k_of x) * real (n_of x) powr (1 - 1 / real (k_of x)) / (real_of_rat (\_of x))\<^sup>2\)" define s2_of where "s2_of = (\x. nat \- (18 * ln (real_of_rat (\_of x)))\)" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ real (k_of x) \ k \ real (m_of x) \ m\ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n k m P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def k_of_def m_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have 1: "(\_. 1) \ O[?F](\x. real (n_of x))" "(\_. 1) \ O[?F](\x. real (m_of x))" "(\_. 1) \ O[?F](\x. real (k_of x))" by (intro landau_o.big_mono eventually_mono[OF evt], auto)+ have "(\x. ln (real (m_of x) + 1)) \ O[?F](\x. ln (real (m_of x)))" by (intro landau_ln_2[where a="2"] evt[where m="2"] sum_in_bigo 1, auto) hence 2: " (\x. log 2 (real (m_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_2 eventually_mono[OF evt[where n="1" and m="1"]]) (auto simp add:log_def) have 3: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" using order_less_le_trans[OF exp_gt_zero] ln_ge_iff by (intro landau_o.big_mono evt[where \="exp 1"]) (simp add: abs_ge_iff, blast) have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"]) (simp add:power_one_over[symmetric], blast) have "(\x. 1) \ O[?F](\x. ln (real (n_of x)))" using order_less_le_trans[OF exp_gt_zero] ln_ge_iff by (intro landau_o.big_mono evt[where n="exp 1"]) (simp add: abs_ge_iff, blast) hence 5: "(\x. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="1" and m="1"], auto) have "(\x. -ln(of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt) (auto simp add:ln_div) hence 6: "(\x. real (s2_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding s2_of_def by (intro landau_nat_ceil 3, simp) have 7: "(\_. 1) \ O[?F](\x. real (n_of x) powr (1 - 1 / real (k_of x)))" by (intro landau_o.big_mono evt[where n="1" and k="1"]) (auto simp add: ge_one_powr_ge_zero) have 8: "(\_. 1) \ O[?F](g1)" unfolding g1_def by (intro landau_o.big_mult_1 1 7 4) have "(\x. 3 * (real (k_of x) * (n_of x) powr (1 - 1 / real (k_of x)) / (of_rat (\_of x))\<^sup>2)) \ O[?F](g1)" by (subst landau_o.big.cmult_in_iff, simp, simp add:g1_def) hence 9: "(\x. real (s1_of x)) \ O[?F](g1)" unfolding s1_of_def by (intro landau_nat_ceil 8, auto simp:ac_simps) have 10: "(\_. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8 3 5) have "(\x. real (s1_of x)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 5 3 9) hence "(\x. ln (real (s1_of x) + 1)) \ O[?F](g)" using 10 by (intro landau_ln_3 sum_in_bigo, auto) hence 11: "(\x. log 2 (real (s1_of x) + 1)) \ O[?F](g)" by (simp add:log_def) have 12: " (\x. ln (real (s2_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" using evt[where \="2"] 6 3 by (intro landau_ln_3 sum_in_bigo, auto) have 13: "(\x. log 2 (real (s2_of x) + 1)) \ O[?F](g)" unfolding g_def by (rule landau_o.big_mult_1, rule landau_o.big_mult_1', auto simp add: 8 5 12 log_def) have "(\x. real (k_of x)) \ O[?F](g1)" unfolding g1_def using 7 4 by (intro landau_o.big_mult_1, simp_all) hence "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g1)" by (simp add:log_def) (intro landau_ln_3 sum_in_bigo 8, auto) hence 14: "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 3 5) have 15: "(\x. log 2 (real (m_of x) + 1)) \ O[?F](g)" unfolding g_def using 2 8 3 by (intro landau_o.big_mult_1', simp_all) have "(\x. ln (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)))" by (intro landau_ln_2[where a="2"] eventually_mono[OF evt[where n="2"]] sum_in_bigo 1, auto) hence " (\x. log 2 (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="1" and m="1"]) (auto simp add:log_def) hence 16: "(\x. real (s1_of x) * real (s2_of x) * (2 + 2 * log 2 (real (n_of x) + 1) + 2 * log 2 (real (m_of x) + 1))) \ O[?F](g)" unfolding g_def using 9 6 5 2 by (intro landau_o.mult sum_in_bigo, auto) have "fk_space_usage = (\x. fk_space_usage (k_of x, n_of x, m_of x, \_of x, \_of x))" by (simp add:case_prod_beta' k_of_def n_of_def \_of_def \_of_def m_of_def) also have "... \ O[?F](g)" using 10 11 13 14 15 16 by (simp add:fun_cong[OF s1_of_def[symmetric]] fun_cong[OF s2_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g1_def g_def n_of_def \_of_def \_of_def m_of_def k_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Probability_Ext.thy b/thys/Frequency_Moments/Probability_Ext.thy --- a/thys/Frequency_Moments/Probability_Ext.thy +++ b/thys/Frequency_Moments/Probability_Ext.thy @@ -1,328 +1,82 @@ section \Probability Spaces\ text \Some additional results about probability spaces in addition to "HOL-Probability".\ theory Probability_Ext imports "HOL-Probability.Stream_Space" + Concentration_Inequalities.Bienaymes_Identity Universal_Hash_Families.Carter_Wegman_Hash_Family Frequency_Moments_Preliminary_Results begin -text \Random variables that depend on disjoint sets of the components of a product space are -independent.\ - -lemma make_ext: - assumes "\x. P x = P (restrict x I)" - shows "(\x \ Pi I A. P x) = (\x \ PiE I A. P x)" - using assms by (simp add:PiE_def Pi_def set_eq_iff, force) +text \The following aliases are here to prevent possible merge-conflicts. The lemmas have been +moved to @{theory Concentration_Inequalities.Bienaymes_Identity} and/or +@{theory Concentration_Inequalities.Concentration_Inequalities_Preliminary}.\ -lemma PiE_reindex: - assumes "inj_on f I" - shows "PiE I (A \ f) = (\a. restrict (a \ f) I) ` PiE (f ` I) A" (is "?lhs = ?g ` ?rhs") -proof - - have "?lhs \ ?g` ?rhs" - proof (rule subsetI) - fix x - assume a:"x \ Pi\<^sub>E I (A \ f)" - define y where y_def: "y = (\k. if k \ f ` I then x (the_inv_into I f k) else undefined)" - have b:"y \ PiE (f ` I) A" - using a assms the_inv_into_f_eq[OF assms] - by (simp add: y_def PiE_iff extensional_def) - have c: "x = (\a. restrict (a \ f) I) y" - using a assms the_inv_into_f_eq extensional_arb - by (intro ext, simp add:y_def PiE_iff, fastforce) - show "x \ ?g ` ?rhs" using b c by blast - qed - moreover have "?g ` ?rhs \ ?lhs" - by (rule image_subsetI, simp add:Pi_def PiE_def) - ultimately show ?thesis by blast -qed +lemmas make_ext = forall_Pi_to_PiE +lemmas PiE_reindex = PiE_reindex context prob_space begin -lemma indep_sets_reindex: - assumes "inj_on f I" - shows "indep_sets A (f ` I) = indep_sets (\i. A (f i)) I" -proof - - have a:"\J g. J \ I \ (\j \ f ` J. g j) = (\j \ J. g (f j))" - by (metis assms prod.reindex_cong subset_inj_on) - - have "J \ I \ (\\<^sub>E i \ J. A (f i)) = (\a. restrict (a \ f) J) ` PiE (f ` J) A" for J - using assms inj_on_subset - by (subst PiE_reindex[symmetric]) auto - - hence b:"\P J. J \ I \ (\x. P x = P (restrict x J)) \ (\A' \ \\<^sub>E i \ J. A (f i). P A') = (\A'\PiE (f ` J) A. P (A' \ f))" - by simp - - have c:"\J. J \ I \ finite (f ` J) = finite J" - by (meson assms finite_image_iff inj_on_subset) - - show ?thesis - by (simp add:indep_sets_def all_subset_image a c) - (simp add:make_ext b cong:restrict_cong)+ -qed - -lemma indep_vars_cong_AE: - assumes "AE x in M. (\i \ I. X' i x = Y' i x)" - assumes "indep_vars M' X' I" - assumes "\i. i \ I \ random_variable (M' i) (Y' i)" - shows "indep_vars M' Y' I" -proof (cases "I \ {}") - case True - - have a: "AE x in M. (\i\I. Y' i x) = (\i\I. X' i x)" - by (rule AE_mp[OF assms(1)], rule AE_I2, simp cong:restrict_cong) - have b: "\i. i \ I \ random_variable (M' i) (X' i)" - using assms(2) by (simp add:indep_vars_def2) - have c: "\x. x \ I \ AE xa in M. X' x xa = Y' x xa" - by (rule AE_mp[OF assms(1)], rule AE_I2, simp) - - have "distr M (Pi\<^sub>M I M') (\x. \i\I. Y' i x) = distr M (Pi\<^sub>M I M') (\x. \i\I. X' i x)" - by (intro distr_cong_AE measurable_restrict a b assms(3)) auto - also have "... = Pi\<^sub>M I (\i. distr M (M' i) (X' i))" - using assms True b by (subst indep_vars_iff_distr_eq_PiM'[symmetric]) auto - also have "... = Pi\<^sub>M I (\i. distr M (M' i) (Y' i))" - by (intro PiM_cong distr_cong_AE c assms(3) b) auto - finally have "distr M (Pi\<^sub>M I M') (\x. \i\I. Y' i x) = Pi\<^sub>M I (\i. distr M (M' i) (Y' i))" - by simp - - thus ?thesis - using True assms(3) - by (subst indep_vars_iff_distr_eq_PiM') auto -next - case False - then show ?thesis - by (simp add:indep_vars_def2 indep_sets_def) -qed - -lemma indep_vars_reindex: - assumes "inj_on f I" - assumes "indep_vars M' X' (f ` I)" - shows "indep_vars (M' \ f) (\k \. X' (f k) \) I" - using assms by (simp add:indep_vars_def2 indep_sets_reindex) - -lemma variance_divide: - fixes f :: "'a \ real" - assumes "integrable M f" - shows "variance (\\. f \ / r) = variance f / r^2" - using assms - by (subst Bochner_Integration.integral_divide[OF assms(1)]) - (simp add:diff_divide_distrib[symmetric] power2_eq_square algebra_simps) +lemmas indep_sets_reindex = indep_sets_reindex +lemmas indep_vars_cong_AE = indep_vars_cong_AE +lemmas indep_vars_reindex = indep_vars_reindex +lemmas variance_divide = variance_divide +lemmas covariance_def = covariance_def +lemmas real_prod_integrable = cauchy_schwartz(1) +lemmas covariance_eq = covariance_eq +lemmas covar_integrable = covar_integrable +lemmas sum_square_int = sum_square_int +lemmas var_sum_1 = bienaymes_identity +lemmas covar_self_eq = covar_self_eq +lemmas covar_indep_eq_zero = covar_indep_eq_zero +lemmas var_sum_2 = bienaymes_identity_2 +lemmas var_sum_pairwise_indep = bienaymes_identity_pairwise_indep +lemmas indep_var_from_indep_vars = indep_var_from_indep_vars +lemmas var_sum_pairwise_indep_2 = bienaymes_identity_pairwise_indep_2 +lemmas var_sum_all_indep = bienaymes_identity_full_indep lemma pmf_mono: assumes "M = measure_pmf p" assumes "\x. x \ P \ x \ set_pmf p \ x \ Q" shows "prob P \ prob Q" proof - have "prob P = prob (P \ (set_pmf p))" by (rule measure_pmf_eq[OF assms(1)], blast) also have "... \ prob Q" using assms by (intro finite_measure.finite_measure_mono, auto) finally show ?thesis by simp qed lemma pmf_add: assumes "M = measure_pmf p" assumes "\x. x \ P \ x \ set_pmf p \ x \ Q \ x \ R" shows "prob P \ prob Q + prob R" proof - have [simp]:"events = UNIV" by (subst assms(1), simp) have "prob P \ prob (Q \ R)" using assms by (intro pmf_mono[OF assms(1)], blast) also have "... \ prob Q + prob R" by (rule measure_subadditive, auto) finally show ?thesis by simp qed lemma pmf_add_2: assumes "M = measure_pmf p" assumes "prob {\. P \} \ r1" assumes "prob {\. Q \} \ r2" shows "prob {\. P \ \ Q \} \ r1 + r2" (is "?lhs \ ?rhs") proof - have "?lhs \ prob {\. P \} + prob {\. Q \}" by (intro pmf_add[OF assms(1)], auto) also have "... \ ?rhs" by (intro add_mono assms(2-3)) finally show ?thesis by simp qed -definition covariance where - "covariance f g = expectation (\\. (f \ - expectation f) * (g \ - expectation g))" - -lemma real_prod_integrable: - fixes f g :: "'a \ real" - assumes [measurable]: "f \ borel_measurable M" "g \ borel_measurable M" - assumes sq_int: "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" - shows "integrable M (\\. f \ * g \)" - unfolding integrable_iff_bounded -proof - have "(\\<^sup>+ \. ennreal (norm (f \ * g \)) \M)\<^sup>2 = (\\<^sup>+ \. ennreal \f \\ * ennreal \g \\ \M)\<^sup>2" - by (simp add: abs_mult ennreal_mult) - also have "... \ (\\<^sup>+ \. ennreal \f \\^2 \M) * (\\<^sup>+ \. ennreal \g \\^2 \M)" - by (rule Cauchy_Schwarz_nn_integral, auto) - also have "... < \" - using sq_int by (auto simp: integrable_iff_bounded ennreal_power ennreal_mult_less_top) - finally have "(\\<^sup>+ x. ennreal (norm (f x * g x)) \M)\<^sup>2 < \" - by simp - thus "(\\<^sup>+ x. ennreal (norm (f x * g x)) \M) < \" - by (simp add: power_less_top_ennreal) -qed auto - -lemma covariance_eq: - fixes f :: "'a \ real" - assumes "f \ borel_measurable M" "g \ borel_measurable M" - assumes "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" - shows "covariance f g = expectation (\\. f \ * g \) - expectation f * expectation g" -proof - - have "integrable M f" using square_integrable_imp_integrable assms by auto - moreover have "integrable M g" using square_integrable_imp_integrable assms by auto - ultimately show ?thesis - using assms real_prod_integrable - by (simp add:covariance_def algebra_simps prob_space) -qed - -lemma covar_integrable: - fixes f g :: "'a \ real" - assumes "f \ borel_measurable M" "g \ borel_measurable M" - assumes "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" - shows "integrable M (\\. (f \ - expectation f) * (g \ - expectation g))" -proof - - have "integrable M f" using square_integrable_imp_integrable assms by auto - moreover have "integrable M g" using square_integrable_imp_integrable assms by auto - ultimately show ?thesis using assms real_prod_integrable by (simp add: algebra_simps) -qed - -lemma sum_square_int: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - shows "integrable M (\\. (\i \ I. f i \)\<^sup>2)" -proof - - have " integrable M (\\. \i\I. \j\I. f j \ * f i \)" - using assms - by (intro Bochner_Integration.integrable_sum real_prod_integrable, auto) - thus ?thesis - by (simp add:power2_eq_square sum_distrib_left sum_distrib_right) -qed - -lemma var_sum_1: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - shows - "variance (\\. (\i \ I. f i \)) = (\i \ I. (\j \ I. covariance (f i) (f j)))" -proof - - have a:"\i j. i \ I \ j \ I \ integrable M (\\. (f i \ - expectation (f i)) * (f j \ - expectation (f j)))" - using assms covar_integrable by simp - have "variance (\\. (\i \ I. f i \)) = expectation (\\. (\i\I. f i \ - expectation (f i))\<^sup>2)" - using square_integrable_imp_integrable[OF assms(2,3)] - by (simp add: Bochner_Integration.integral_sum sum_subtractf) - also have "... = expectation (\\. (\i \ I. (\j \ I. (f i \ - expectation (f i)) * (f j \ - expectation (f j)))))" - by (simp add: power2_eq_square sum_distrib_right sum_distrib_left mult.commute) - also have "... = (\i \ I. (\j \ I. covariance (f i) (f j)))" - using a by (simp add: Bochner_Integration.integral_sum covariance_def) - finally show ?thesis by simp -qed - -lemma covar_self_eq: - fixes f :: "'a \ real" - shows "covariance f f = variance f" - by (simp add:covariance_def power2_eq_square) - -lemma covar_indep_eq_zero: - fixes f g :: "'a \ real" - assumes "integrable M f" - assumes "integrable M g" - assumes "indep_var borel f borel g" - shows "covariance f g = 0" -proof - - have a:"indep_var borel ((\t. t - expectation f) \ f) borel ((\t. t - expectation g) \ g)" - by (rule indep_var_compose[OF assms(3)], auto) - - have b:"expectation (\\. (f \ - expectation f) * (g \ - expectation g)) = 0" - using a assms by (subst indep_var_lebesgue_integral, auto simp add:comp_def prob_space) - - thus ?thesis by (simp add:covariance_def) -qed - -lemma var_sum_2: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - shows "variance (\\. (\i \ I. f i \)) = - (\i \ I. variance (f i)) + (\i \ I. \j \ I - {i}. covariance (f i) (f j))" -proof - - have "variance (\\. (\i \ I. f i \)) = (\i\I. \j\I. covariance (f i) (f j))" - by (simp add: var_sum_1[OF assms(1,2,3)]) - also have "... = (\i\I. covariance (f i) (f i) + (\j\I-{i}. covariance (f i) (f j)))" - using assms by (subst sum.insert[symmetric], auto simp add:insert_absorb) - also have "... = (\i\I. variance (f i)) + (\i \ I. (\j\I-{i}. covariance (f i) (f j)))" - by (simp add: covar_self_eq[symmetric] sum.distrib) - finally show ?thesis by simp -qed - -lemma var_sum_pairwise_indep: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - assumes "\i j. i \ I \ j \ I \ i \ j \ indep_var borel (f i) borel (f j)" - shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" -proof - - have "\i j. i \ I \ j \ I - {i} \ covariance (f i) (f j) = 0" - using covar_indep_eq_zero assms(4) square_integrable_imp_integrable[OF assms(2,3)] by auto - hence a:"(\i \ I. \j \ I - {i}. covariance (f i) (f j)) = 0" - by simp - thus ?thesis by (simp add: var_sum_2[OF assms(1,2,3)]) -qed - -lemma indep_var_from_indep_vars: - assumes "i \ j" - assumes "indep_vars (\_. M') f {i, j}" - shows "indep_var M' (f i) M' (f j)" -proof - - have a:"inj (case_bool i j)" using assms(1) - by (simp add: bool.case_eq_if inj_def) - have b:"range (case_bool i j) = {i,j}" - by (simp add: UNIV_bool insert_commute) - have c:"indep_vars (\_. M') f (range (case_bool i j))" using assms(2) b by simp - - have "True = indep_vars (\x. M') (\x. f (case_bool i j x)) UNIV" - using indep_vars_reindex[OF a c] - by (simp add:comp_def) - also have "... = indep_vars (\x. case_bool M' M' x) (\x. case_bool (f i) (f j) x) UNIV" - by (rule indep_vars_cong, auto simp:bool.case_distrib bool.case_eq_if) - also have "... = ?thesis" - by (simp add: indep_var_def) - finally show ?thesis by simp -qed - -lemma var_sum_pairwise_indep_2: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - assumes "\J. J \ I \ card J = 2 \ indep_vars (\ _. borel) f J" - shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" - using assms(4) - by (intro var_sum_pairwise_indep[OF assms(1,2,3)] indep_var_from_indep_vars, auto) - -lemma var_sum_all_indep: - fixes f :: "'b \ 'a \ real" - assumes "finite I" - assumes "\i. i \ I \ f i \ borel_measurable M" - assumes "\i. i \ I \ integrable M (\\. f i \^2)" - assumes "indep_vars (\ _. borel) f I" - shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" - by (intro var_sum_pairwise_indep_2[OF assms(1,2,3)] indep_vars_subset[OF assms(4)], auto) - end end diff --git a/thys/Frequency_Moments/ROOT b/thys/Frequency_Moments/ROOT --- a/thys/Frequency_Moments/ROOT +++ b/thys/Frequency_Moments/ROOT @@ -1,26 +1,27 @@ chapter AFP session Frequency_Moments = "HOL-Probability" + options [timeout = 1200] sessions Bertrands_Postulate + Concentration_Inequalities Equivalence_Relation_Enumeration "HOL-Algebra" Interpolation_Polynomials_HOL_Algebra Lp Prefix_Free_Code_Combinators Median_Method Universal_Hash_Families theories Frequency_Moments_Preliminary_Results Frequency_Moments Frequency_Moment_0 Frequency_Moment_2 Frequency_Moment_k Landau_Ext K_Smallest Probability_Ext Product_PMF_Ext document_files "root.tex" "root.bib"