diff --git a/thys/Frequency_Moments/Frequency_Moment_0.thy b/thys/Frequency_Moments/Frequency_Moment_0.thy --- a/thys/Frequency_Moments/Frequency_Moment_0.thy +++ b/thys/Frequency_Moments/Frequency_Moment_0.thy @@ -1,1314 +1,1314 @@ section \Frequency Moment $0$\label{sec:f0}\ theory Frequency_Moment_0 imports Frequency_Moments_Preliminary_Results Median_Method.Median K_Smallest Universal_Hash_Families.Carter_Wegman_Hash_Family Frequency_Moments Landau_Ext Product_PMF_Ext Universal_Hash_Families.Field begin text \This section contains a formalization of a new algorithm for the zero-th frequency moment inspired by ideas described in \cite{baryossed2002}. It is a KMV-type ($k$-minimum value) algorithm with a rounding method and matches the space complexity of the best algorithm described in \cite{baryossef2002}. In addition to the Isabelle proof here, there is also an informal hand-written proof in Appendix~\ref{sec:f0_proof}.\ type_synonym f0_state = "nat \ nat \ nat \ nat \ (nat \ nat list) \ (nat \ float set)" definition hash where "hash p = ring.hash (mod_ring p)" fun f0_init :: "rat \ rat \ nat \ f0_state pmf" where "f0_init \ \ n = do { let s = nat \-18 * ln (real_of_rat \)\; let t = nat \80 / (real_of_rat \)\<^sup>2\; let p = prime_above (max n 19); let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23); h \ prod_pmf {.._. pmf_of_set (bounded_degree_polynomials (mod_ring p) 2)); return_pmf (s, t, p, r, h, (\_ \ {0.. f0_state \ f0_state pmf" where "f0_update x (s, t, p, r, h, sketch) = return_pmf (s, t, p, r, h, \i \ {.. rat pmf" where "f0_result (s, t, p, r, h, sketch) = return_pmf (median s (\i \ {.. rat \ rat) \ real" where "f0_space_usage (n, \, \) = ( let s = nat \-18 * ln (real_of_rat \)\ in let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23) in let t = nat \80 / (real_of_rat \)\<^sup>2 \ in 6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (real n + 21) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) + real t * (13 + 4 * r + 2 * log 2 (log 2 (real n + 13)))))" definition encode_f0_state :: "f0_state \ bool list option" where "encode_f0_state = N\<^sub>e \\<^sub>e (\s. N\<^sub>e \\<^sub>e ( N\<^sub>e \\<^sub>e (\p. N\<^sub>e \\<^sub>e ( ([0..\<^sub>e (P\<^sub>e p 2)) \\<^sub>e ([0..\<^sub>e (S\<^sub>e F\<^sub>e))))))" lemma "inj_on encode_f0_state (dom encode_f0_state)" proof - have "is_encoding encode_f0_state" unfolding encode_f0_state_def by (intro dependent_encoding exp_golomb_encoding poly_encoding fun_encoding set_encoding float_encoding) thus ?thesis by (rule encoding_imp_inj) qed context fixes \ \ :: rat fixes n :: nat fixes as :: "nat list" fixes result assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ \ {0<..<1}" assumes as_range: "set as \ {.. fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" begin private definition t where "t = nat \80 / (real_of_rat \)\<^sup>2\" private lemma t_gt_0: "t > 0" using \_range by (simp add:t_def) private definition s where "s = nat \-(18 * ln (real_of_rat \))\" private lemma s_gt_0: "s > 0" using \_range by (simp add:s_def) private definition p where "p = prime_above (max n 19)" private lemma p_prime:"Factorial_Ring.prime p" using p_def prime_above_prime by presburger private lemma p_ge_18: "p \ 18" proof - have "p \ 19" by (metis p_def prime_above_lower_bound max.bounded_iff) thus ?thesis by simp qed private lemma p_gt_0: "p > 0" using p_ge_18 by simp private lemma p_gt_1: "p > 1" using p_ge_18 by simp private lemma n_le_p: "n \ p" proof - have "n \ max n 19" by simp also have "... \ p" unfolding p_def by (rule prime_above_lower_bound) finally show ?thesis by simp qed private lemma p_le_n: "p \ 2*n + 40" proof - have "p \ 2 * (max n 19) + 2" by (subst p_def, rule prime_above_upper_bound) also have "... \ 2 * n + 40" by (cases "n \ 19", auto) finally show ?thesis by simp qed private lemma as_lt_p: "\x. x \ set as \ x < p" using as_range atLeastLessThan_iff by (intro order_less_le_trans[OF _ n_le_p]) blast private lemma as_subset_p: "set as \ {..log 2 (1 / real_of_rat \)\ + 23)" private lemma r_bound: "4 * log 2 (1 / real_of_rat \) + 23 \ r" proof - have "0 \ log 2 (1 / real_of_rat \)" using \_range by simp hence "0 \ \log 2 (1 / real_of_rat \)\" by simp hence "0 \ 4 * \log 2 (1 / real_of_rat \)\ + 23" by (intro add_nonneg_nonneg mult_nonneg_nonneg, auto) thus ?thesis by (simp add:r_def) qed private lemma r_ge_23: "r \ 23" proof - have "(23::real) = 0 + 23" by simp also have "... \ 4 * log 2 (1 / real_of_rat \) + 23" using \_range by (intro add_mono mult_nonneg_nonneg, auto) also have "... \ r" using r_bound by simp finally show "23 \ r" by simp qed private lemma two_pow_r_le_1: "0 < 1 - 2 powr - real r" proof - have a: "2 powr (0::real) = 1" by simp show ?thesis using r_ge_23 by (simp, subst a[symmetric], intro powr_less_mono, auto) qed interpretation carter_wegman_hash_family "mod_ring p" 2 rewrites "ring.hash (mod_ring p) = Frequency_Moment_0.hash p" using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] using hash_def p_prime by auto private definition tr_hash where "tr_hash x \ = truncate_down r (hash x \)" private definition sketch_rv where "sketch_rv \ = least t ((\x. float_of (tr_hash x \)) ` set as)" private definition estimate where "estimate S = (if card S < t then of_nat (card S) else of_nat t * of_nat p / rat_of_float (Max S))" private definition sketch_rv' where "sketch_rv' \ = least t ((\x. tr_hash x \) ` set as)" private definition estimate' where "estimate' S = (if card S < t then real (card S) else real t * real p / Max S)" private definition \\<^sub>0 where "\\<^sub>0 = prod_pmf {.._. pmf_of_set space)" private lemma f0_alg_sketch: defines "sketch \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "sketch = map_pmf (\x. (s,t,p,r, x, \i \ {..\<^sub>0" unfolding sketch_rv_def proof (subst sketch_def, induction as rule:rev_induct) case Nil then show ?case by (simp add:s_def p_def[symmetric] map_pmf_def t_def r_def Let_def least_def restrict_def space_def \\<^sub>0_def) next case (snoc x xs) let ?sketch = "\\ xs. least t ((\a. float_of (tr_hash a \)) ` set xs)" have "fold (\a state. state \ f0_update a) (xs @ [x]) (f0_init \ \ n) = (map_pmf (\\. (s, t, p, r, \, \i \ {.. i) xs)) \\<^sub>0) \ f0_update x" by (simp add: restrict_def snoc del:f0_init.simps) also have "... = \\<^sub>0 \ (\\. f0_update x (s, t, p, r, \, \i\{.. i) xs)) " by (simp add:map_pmf_def bind_assoc_pmf bind_return_pmf del:f0_update.simps) also have "... = map_pmf (\\. (s, t, p, r, \, \i\{.. i) (xs@[x]))) \\<^sub>0" by (simp add:least_insert map_pmf_def tr_hash_def cong:restrict_cong) finally show ?case by blast qed private lemma card_nat_in_ball: fixes x :: nat fixes q :: real assumes "q \ 0" defines "A \ {k. abs (real x - real k) \ q \ k \ x}" shows "real (card A) \ 2 * q" and "finite A" proof - have a: "of_nat x \ {\real x-q\..\real x+q\}" using assms by (simp add: ceiling_le_iff) have "card A = card (int ` A)" by (rule card_image[symmetric], simp) also have "... \ card ({\real x-q\..\real x+q\} - {of_nat x})" by (intro card_mono image_subsetI, simp_all add:A_def abs_le_iff, linarith) also have "... = card {\real x-q\..\real x+q\} - 1" by (rule card_Diff_singleton, rule a) also have "... = int (card {\real x-q\..\real x+q\}) - int 1" by (intro of_nat_diff) (metis a card_0_eq empty_iff finite_atLeastAtMost_int less_one linorder_not_le) also have "... \ \q+real x\+1 -\real x-q\ - 1" using assms by (simp, linarith) also have "... \ 2*q" by linarith finally show "card A \ 2 * q" by simp have "A \ {..x + nat \q\}" by (rule subsetI, simp add:A_def abs_le_iff, linarith) thus "finite A" by (rule finite_subset, simp) qed private lemma prob_degree_lt_1: "prob {\. degree \ < 1} \ 1/real p" proof - have "space \ {\. length \ \ Suc 0} = bounded_degree_polynomials (mod_ring p) 1" by (auto simp:set_eq_iff bounded_degree_polynomials_def space_def) moreover have "field_size = p" by (simp add:mod_ring_def) hence "real (card (bounded_degree_polynomials (mod_ring p) (Suc 0))) / real (card space) = 1 / real p" by (simp add:space_def bounded_degree_polynomials_card power2_eq_square) ultimately show ?thesis by (simp add:M_def measure_pmf_of_set) qed private lemma collision_prob: assumes "c \ 1" shows "prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr -(real r) / (real p)\<^sup>2 + 1/real p" (is "prob {\. ?l \} \ ?r1 + ?r2") proof - define \ :: real where "\ = 9/8" have rho_c_ge_0: "\ * c \ 0" unfolding \_def using assms by simp have c_ge_0: "c\0" using assms by simp have "degree \ \ 1 \ \ \ space \ degree \ = 1" for \ by (simp add:bounded_degree_polynomials_def space_def) (metis One_nat_def Suc_1 le_less_Suc_eq less_imp_diff_less list.size(3) pos2) hence a: "\\ x y. x < p \ y < p \ x \ y \ degree \ \ 1 \ \ \ space \ hash x \ \ hash y \" using inj_onD[OF inj_if_degree_1] mod_ring_carr by blast have b: "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" if b_assms: "x \ set as" "y \ set as" "x < y" for x y proof - have c: "real u \ \ * c \ \real u - real v\ \ \ * c * 2 powr (-real r)" if c_assms:"truncate_down r (real u) \ c" "truncate_down r (real u) = truncate_down r (real v)" for u v proof - have "9 * 2 powr - real r \ 9 * 2 powr (- real 23)" using r_ge_23 by (intro mult_left_mono powr_mono, auto) also have "... \ 1" by simp finally have "9 * 2 powr - real r \ 1" by simp hence "1 \ \ * (1 - 2 powr (- real r))" by (simp add:\_def) hence d: "(c*1) / (1 - 2 powr (-real r)) \ c * \" using assms two_pow_r_le_1 by (simp add: pos_divide_le_eq) have "\x. truncate_down r (real x) \ c \ real x * (1 - 2 powr - real r) \ c * 1" using truncate_down_pos[OF of_nat_0_le_iff] order_trans by (simp, blast) hence "\x. truncate_down r (real x) \ c \ real x \ c * \" using two_pow_r_le_1 by (intro order_trans[OF _ d], simp add: pos_le_divide_eq) hence e: "real u \ c * \" "real v \ c * \" using c_assms by auto have " \real u - real v\ \ (max \real u\ \real v\) * 2 powr (-real r)" using c_assms by (intro truncate_down_eq, simp) also have "... \ (c * \) * 2 powr (-real r)" using e by (intro mult_right_mono, auto) finally have "\real u - real v\ \ \ * c * 2 powr (-real r)" by (simp add:algebra_simps) thus ?thesis using e by (simp add:algebra_simps) qed have "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ prob (\ i \ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. {\. hash x \ = fst i \ hash y \ = snd i})" using a by (intro pmf_mono[OF M_def], simp add:tr_hash_def) (metis hash_range mod_ring_carr b_assms as_subset_p lessThan_iff nat_neq_iff subset_eq) also have "... \ (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. prob {\. hash x \ = fst i \ hash y \ = snd i})" by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="{0.. {0.. (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. prob {\. (\u \ {x,y}. hash u \ = (if u = x then (fst i) else (snd i)))})" by (intro sum_mono pmf_mono[OF M_def]) force also have "... \ (\ i\ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. 1/(real p)\<^sup>2)" using assms as_subset_p b_assms by (intro sum_mono, subst hash_prob) (auto simp add: mod_ring_def power2_eq_square) also have "... = 1/(real p)\<^sup>2 * card {(u,v) \ {0.. {0.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}" by simp also have "... \ 1/(real p)\<^sup>2 * card {(u,v) \ {.. {.. v \ real u \ \ * c \ abs (real u - real v) \ \ * c * 2 powr (-real r)}" using c by (intro mult_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * card (\u' \ {u. u < p \ real u \ \ * c}. {(u::nat,v::nat). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {(u,v). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono card_UN_le, auto) also have "... = 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card ((\x. (u' ,x)) ` {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'}))" by (intro arg_cong2[where f="(*)"] arg_cong[where f="real"] sum.cong arg_cong[where f="card"]) (auto simp add:set_eq_iff) also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" by (intro mult_left_mono of_nat_mono sum_mono card_image_le, auto) also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'})" by (intro mult_left_mono sum_mono of_nat_mono card_mono card_nat_in_ball subsetI) auto also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. real (card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'}))" by simp also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. 2 * (\ * c * 2 powr (-real r)))" by (intro mult_left_mono sum_mono card_nat_in_ball(1), auto) also have "... = 1/(real p)\<^sup>2 * (real (card {u. u < p \ real u \ \ * c}) * (2 * (\ * c * 2 powr (-real r))))" by simp also have "... \ 1/(real p)\<^sup>2 * (real (card {u. u \ nat (\\ * c \)}) * (2 * (\ * c * 2 powr (-real r))))" using rho_c_ge_0 le_nat_floor by (intro mult_left_mono mult_right_mono of_nat_mono card_mono subsetI) auto also have "... \ 1/(real p)\<^sup>2 * ((1+\ * c) * (2 * (\ * c * 2 powr (-real r))))" using rho_c_ge_0 by (intro mult_left_mono mult_right_mono, auto) also have "... \ 1/(real p)\<^sup>2 * (((1+\) * c) * (2 * (\ * c * 2 powr (-real r))))" using assms by (intro mult_mono, auto simp add:distrib_left distrib_right \_def) also have "... = (\ * (2 + \ * 2)) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (simp add:ac_simps power2_eq_square) also have "... \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (intro divide_right_mono mult_right_mono) (auto simp add:\_def) finally show ?thesis by simp qed have "prob {\. ?l \ \ degree \ \ 1} \ prob (\ i \ {(x,y) \ (set as) \ (set as). x < y}. {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ tr_hash (fst i) \ = tr_hash (snd i) \})" by (rule pmf_mono[OF M_def], simp, metis linorder_neqE_nat) also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. prob {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ tr_hash (fst i) \ = tr_hash (snd i) \})" unfolding M_def by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="(set as) \ (set as)"]) auto also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2)" using b by (intro sum_mono, simp add:case_prod_beta) also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (2 * card {(x,y) \ (set as) \ (set as). x < y})" by simp also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (card (set as) * (card (set as) - 1))" by (subst card_ordered_pairs, auto) also have "... \ ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (real (card (set as)))\<^sup>2" by (intro mult_left_mono) (auto simp add:power2_eq_square mult_left_mono) also have "... = (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" by (simp add:algebra_simps) finally have f:"prob {\. ?l \ \ degree \ \ 1} \ ?r1" by simp have "prob {\. ?l \} \ prob {\. ?l \ \ degree \ \ 1} + prob {\. degree \ < 1}" by (rule pmf_add[OF M_def], auto) also have "... \ ?r1 + ?r2" by (intro add_mono f prob_degree_lt_1) finally show ?thesis by simp qed private lemma of_bool_square: "(of_bool x)\<^sup>2 = ((of_bool x)::real)" by (cases x, auto) private definition Q where "Q y \ = card {x \ set as. int (hash x \) < y}" private definition m where "m = card (set as)" private lemma assumes "a \ 0" assumes "a \ int p" shows exp_Q: "expectation (\\. real (Q a \)) = real m * (of_int a) / p" and var_Q: "variance (\\. real (Q a \)) \ real m * (of_int a) / p" proof - have exp_single: "expectation (\\. of_bool (int (hash x \) < a)) = real_of_int a /real p" if a:"x \ set as" for x proof - have x_le_p: "x < p" using a as_lt_p by simp have "expectation (\\. of_bool (int (hash x \) < a)) = expectation (indicat_real {\. int (Frequency_Moment_0.hash p x \) < a})" by (intro arg_cong2[where f="integral\<^sup>L"] ext, simp_all) also have "... = prob {\. hash x \ \ {k. int k < a}}" by (simp add:M_def) also have "... = card ({k. int k < a} \ {..\. of_bool (int (hash x \) < a)) = real_of_int a /real p" by simp qed have "expectation(\\. real (Q a \)) = expectation (\\. (\x \ set as. of_bool (int (hash x \) < a)))" by (simp add:Q_def Int_def) also have "... = (\x \ set as. expectation (\\. of_bool (int (hash x \) < a)))" by (rule Bochner_Integration.integral_sum, simp) also have "... = (\ x \ set as. a /real p)" by (rule sum.cong, simp, subst exp_single, simp, simp) also have "... = real m * real_of_int a / real p" by (simp add:m_def) finally show "expectation (\\. real (Q a \)) = real m * real_of_int a / p" by simp have indep: "J \ set as \ card J = 2 \ indep_vars (\_. borel) (\i x. of_bool (int (hash i x) < a)) J" for J using as_subset_p mod_ring_carr by (intro indep_vars_compose2[where Y="\i x. of_bool (int x < a)" and M'="\_. discrete"] k_wise_indep_vars_subset[OF k_wise_indep] finite_subset[OF _ finite_set]) auto have rv: "\x. x \ set as \ random_variable borel (\\. of_bool (int (hash x \) < a))" by (simp add:M_def) have "variance (\\. real (Q a \)) = variance (\\. (\x \ set as. of_bool (int (hash x \) < a)))" by (simp add:Q_def Int_def) also have "... = (\x \ set as. variance (\\. of_bool (int (hash x \) < a)))" by (intro var_sum_pairwise_indep_2 indep rv) auto also have "... \ (\ x \ set as. a / real p)" by (rule sum_mono, simp add: variance_eq of_bool_square, simp add: exp_single) also have "... = real m * real_of_int a /real p" by (simp add:m_def) finally show "variance (\\. real (Q a \)) \ real m * real_of_int a / p" by simp qed private lemma t_bound: "t \ 81 / (real_of_rat \)\<^sup>2" proof - have "t \ 80 / (real_of_rat \)\<^sup>2 + 1" using t_def t_gt_0 by linarith also have "... \ 80 / (real_of_rat \)\<^sup>2 + 1 / (real_of_rat \)\<^sup>2" using \_range by (intro add_mono, simp, simp add:power_le_one) also have "... = 81 / (real_of_rat \)\<^sup>2" by simp finally show ?thesis by simp qed private lemma t_r_bound: "18 * 40 * (real t)\<^sup>2 * 2 powr (-real r) \ 1" proof - have "720 * (real t)\<^sup>2 * 2 powr (-real r) \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * 2 powr (-4 * log 2 (1 / real_of_rat \) - 23)" using r_bound t_bound by (intro mult_left_mono mult_mono power_mono powr_mono, auto) also have "... \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * (2 powr (-4 * log 2 (1 / real_of_rat \)) * 2 powr (-23))" using \_range by (intro mult_left_mono mult_mono power_mono add_mono) (simp_all add:power_le_one powr_diff) also have "... = 720 * (81\<^sup>2 / (real_of_rat \)^4) * (2 powr (log 2 ((real_of_rat \)^4)) * 2 powr (-23))" using \_range by (intro arg_cong2[where f="(*)"]) (simp_all add:power2_eq_square power4_eq_xxxx log_divide log_powr[symmetric]) also have "... = 720 * 81\<^sup>2 * 2 powr (-23)" using \_range by simp also have "... \ 1" by simp finally show ?thesis by simp qed private lemma m_eq_F_0: "real m = of_rat (F 0 as)" by (simp add:m_def F_def) private lemma estimate'_bounds: "prob {\. of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ 1/3" proof (cases "card (set as) \ t") case True define \' where "\' = 3 * real_of_rat \ / 4" define u where "u = \real t * p / (m * (1+\'))\" define v where "v = \real t * p / (m * (1-\'))\" define has_no_collision where "has_no_collision = (\\. \x\ set as. \y \ set as. (tr_hash x \ = tr_hash y \ \ x = y) \ tr_hash x \ > v)" have "2 powr (-real r) \ 2 powr (-(4 * log 2 (1 / real_of_rat \) + 23))" using r_bound by (intro powr_mono, linarith, simp) also have "... = 2 powr (-4 * log 2 (1 /real_of_rat \) -23)" by (rule arg_cong2[where f="(powr)"], auto simp add:algebra_simps) also have "... \ 2 powr ( -1 * log 2 (1 /real_of_rat \) -4)" using \_range by (intro powr_mono diff_mono, auto) also have "... = 2 powr ( -1 * log 2 (1 /real_of_rat \)) / 16" by (simp add: powr_diff) also have "... = real_of_rat \ / 16" using \_range by (simp add:log_divide) also have "... < real_of_rat \ / 8" using \_range by (subst pos_divide_less_eq, auto) finally have r_le_\: "2 powr (-real r) < real_of_rat \ / 8" by simp have \'_gt_0: "\' > 0" using \_range by (simp add:\'_def) have "\' < 3/4" using \_range by (simp add:\'_def)+ also have "... < 1" by simp finally have \'_lt_1: "\' < 1" by simp have "t \ 81 / (real_of_rat \)\<^sup>2" using t_bound by simp also have "... = (81*9/16) / (\')\<^sup>2" by (simp add:\'_def power2_eq_square) also have "... \ 46 / \'\<^sup>2" by (intro divide_right_mono, simp, simp) finally have t_le_\': "t \ 46/ \'\<^sup>2" by simp have "80 \ (real_of_rat \)\<^sup>2 * (80 / (real_of_rat \)\<^sup>2)" using \_range by simp also have "... \ (real_of_rat \)\<^sup>2 * t" by (intro mult_left_mono, simp add:t_def of_nat_ceiling, simp) finally have "80 \ (real_of_rat \)\<^sup>2 * t" by simp hence t_ge_\': "45 \ t * \' * \'" by (simp add:\'_def power2_eq_square) have "m \ card {.. p" using n_le_p by simp finally have m_le_p: "m \ p" by simp hence t_le_m: "t \ card (set as)" using True by simp have m_ge_0: "real m > 0" using m_def True t_gt_0 by simp have "v \ real t * real p / (real m * (1 - \'))" by (simp add:v_def) also have "... \ real t * real p / (real m * (1/4))" using \'_lt_1 m_ge_0 \_range by (intro divide_left_mono mult_left_mono mult_nonneg_nonneg mult_pos_pos, simp_all add:\'_def) finally have v_ubound: "v \ 4 * real t * real p / real m" by (simp add:algebra_simps) have a_ge_1: "u \ 1" using \'_gt_0 p_gt_0 m_ge_0 t_gt_0 by (auto intro!:mult_pos_pos divide_pos_pos simp add:u_def) hence a_ge_0: "u \ 0" by simp have "real m * (1 - \') < real m" using \'_gt_0 m_ge_0 by simp also have "... \ 1 * real p" using m_le_p by simp also have "... \ real t * real p" using t_gt_0 by (intro mult_right_mono, auto) finally have " real m * (1 - \') < real t * real p" by simp hence v_gt_0: "v > 0" using mult_pos_pos m_ge_0 \'_lt_1 by (simp add:v_def) hence v_ge_1: "real_of_int v \ 1" by linarith have "real t \ real m" using True m_def by linarith also have "... < (1 + \') * real m" using \'_gt_0 m_ge_0 by force finally have a_le_p_aux: "real t < (1 + \') * real m" by simp have "u \ real t * real p / (real m * (1 + \'))+1" by (simp add:u_def) also have "... < real p + 1" using m_ge_0 \'_gt_0 a_le_p_aux a_le_p_aux p_gt_0 by (simp add: pos_divide_less_eq ac_simps) finally have "u \ real p" by (metis int_less_real_le not_less of_int_le_iff of_int_of_nat_eq) hence u_le_p: "u \ int p" by linarith have "prob {\. Q u \ \ t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q u \) - expectation (\\. real (Q u \))) \ 3 * sqrt (m * real_of_int u / p)}" proof (rule pmf_mono[OF M_def]) fix \ assume "\ \ {\. t \ Q u \}" hence t_le: "t \ Q u \" by simp have "real m * real_of_int u / real p \ real m * (real t * real p / (real m * (1 + \'))+1) / real p" using m_ge_0 p_gt_0 by (intro divide_right_mono mult_left_mono, simp_all add: u_def) also have "... = real m * real t * real p / (real m * (1+\') * real p) + real m / real p" by (simp add:distrib_left add_divide_distrib) also have "... = real t / (1+\') + real m / real p" using p_gt_0 m_ge_0 by simp also have "... \ real t / (1+\') + 1" using m_le_p p_gt_0 by (intro add_mono, auto) finally have "real m * real_of_int u / real p \ real t / (1 + \') + 1" by simp hence "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ 3 * sqrt (t / (1+\')+1)+(t/(1+\')+1)" by (intro add_mono mult_left_mono real_sqrt_le_mono, auto) also have "... \ 3 * sqrt (real t+1) + ((t * (1 - \' / (1+\'))) + 1)" using \'_gt_0 t_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono) (simp_all add: pos_divide_le_eq left_diff_distrib) also have "... = 3 * sqrt (real t+1) + (t - \' * t / (1+\')) + 1" by (simp add:algebra_simps) also have "... \ 3 * sqrt (46 / \'\<^sup>2 + 1 / \'\<^sup>2) + (t - \' * t/2) + 1 / \'" using \'_gt_0 t_gt_0 \'_lt_1 add_pos_pos t_le_\' by (intro add_mono mult_left_mono real_sqrt_le_mono add_mono) (simp_all add: power_le_one pos_le_divide_eq) also have "... \ (21 / \' + (t - 45 / (2*\'))) + 1 / \'" using \'_gt_0 t_ge_\' by (intro add_mono) (simp_all add:real_sqrt_divide divide_le_cancel real_le_lsqrt pos_divide_le_eq ac_simps) also have "... \ t" using \'_gt_0 by simp also have "... \ Q u \" using t_le by simp finally have "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ Q u \" by simp hence " 3 * sqrt (real m * real_of_int u / real p) \ \real (Q u \) - expectation (\\. real (Q u \))\" using a_ge_0 u_le_p True by (simp add:exp_Q abs_ge_iff) thus "\ \ {\ \ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int u / real p) \ \real (Q u \) - expectation (\\. real (Q u \))\}" by (simp add: M_def) qed also have "... \ variance (\\. real (Q u \)) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" using a_ge_1 p_gt_0 m_ge_0 by (intro Chebyshev_inequality, simp add:M_def, auto) also have "... \ (real m * real_of_int u / real p) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" using a_ge_0 u_le_p by (intro divide_right_mono var_Q, auto) also have "... \ 1/9" using a_ge_0 by simp finally have case_1: "prob {\. Q u \ \ t} \ 1/9" by simp have case_2: "prob {\. Q v \ < t} \ 1/9" proof (cases "v \ p") case True have "prob {\. Q v \ < t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q v \) - expectation (\\. real (Q v \))) \ 3 * sqrt (m * real_of_int v / p)}" proof (rule pmf_mono[OF M_def]) fix \ assume "\ \ set_pmf (pmf_of_set space)" have "(real t + 3 * sqrt (real t / (1 - \') )) * (1 - \') = real t - \' * t + 3 * ((1-\') * sqrt( real t / (1-\') ))" by (simp add:algebra_simps) also have "... = real t - \' * t + 3 * sqrt ( (1-\')\<^sup>2 * (real t / (1-\')))" using \'_lt_1 by (subst real_sqrt_mult, simp) also have "... = real t - \' * t + 3 * sqrt ( real t * (1- \'))" by (simp add:power2_eq_square distrib_left) also have "... \ real t - 45/ \' + 3 * sqrt ( real t )" using \'_gt_0 t_ge_\' \'_lt_1 by (intro add_mono mult_left_mono real_sqrt_le_mono) (simp_all add:pos_divide_le_eq ac_simps left_diff_distrib power_le_one) also have "... \ real t - 45/ \' + 3 * sqrt ( 46 / \'\<^sup>2)" using t_le_\' \'_lt_1 \'_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono, simp_all add:pos_divide_le_eq power_le_one) also have "... = real t + (3 * sqrt(46) - 45)/ \'" using \'_gt_0 by (simp add:real_sqrt_divide diff_divide_distrib) also have "... \ t" using \'_gt_0 by (simp add:pos_divide_le_eq real_le_lsqrt) finally have aux: "(real t + 3 * sqrt (real t / (1 - \'))) * (1 - \') \ real t " by simp assume "\ \ {\. Q v \ < t}" hence "Q v \ < t" by simp hence "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) \ real t - 1 + 3 * sqrt (real m * real_of_int v / real p)" using m_le_p p_gt_0 by (intro add_mono, auto simp add: algebra_simps add_divide_distrib) also have "... \ (real t-1) + 3 * sqrt (real m * (real t * real p / (real m * (1- \'))) / real p)" by (intro add_mono mult_left_mono real_sqrt_le_mono divide_right_mono) (auto simp add:v_def) also have "... \ real t + 3 * sqrt(real t / (1-\')) - 1" using m_ge_0 p_gt_0 by simp also have "... \ real t / (1-\')-1" using \'_lt_1 aux by (simp add: pos_le_divide_eq) also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - 1" using p_gt_0 m_ge_0 by simp also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - real m / real p" using m_le_p p_gt_0 by (intro diff_mono, auto) also have "... = real m * (real t * real p / (real m * (1-\'))-1) / real p" by (simp add: left_diff_distrib right_diff_distrib diff_divide_distrib) also have "... \ real m * real_of_int v / real p" by (intro divide_right_mono mult_left_mono, simp_all add:v_def) finally have "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) \ real m * real_of_int v / real p" by simp hence " 3 * sqrt (real m * real_of_int v / real p) \ \real (Q v \) -expectation (\\. real (Q v \))\" using v_gt_0 True by (simp add: exp_Q abs_ge_iff) thus "\ \ {\\ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int v / real p) \ \real (Q v \) - expectation (\\. real (Q v \))\}" by (simp add:M_def) qed also have "... \ variance (\\. real (Q v \)) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" using v_gt_0 p_gt_0 m_ge_0 by (intro Chebyshev_inequality, simp add:M_def, auto) also have "... \ (real m * real_of_int v / real p) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" using v_gt_0 True by (intro divide_right_mono var_Q, auto) also have "... = 1/9" using p_gt_0 v_gt_0 m_ge_0 by (simp add:power2_eq_square) finally show ?thesis by simp next case False have "prob {\. Q v \ < t} \ prob {\. False}" proof (rule pmf_mono[OF M_def]) fix \ assume a:"\ \ {\. Q v \ < t}" assume "\ \ set_pmf (pmf_of_set space)" hence b:"\x. x < p \ hash x \ < p" using hash_range mod_ring_carr by (simp add:M_def measure_pmf_inverse) have "t \ card (set as)" using True by simp also have "... \ Q v \" unfolding Q_def using b False as_lt_p by (intro card_mono subsetI, simp, force) also have "... < t" using a by simp finally have "False" by auto thus "\ \ {\. False}" by simp qed also have "... = 0" by auto finally show ?thesis by simp qed have "prob {\. \has_no_collision \} \ prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real_of_int v \ tr_hash x \ = tr_hash y \}" by (rule pmf_mono[OF M_def]) (simp add:has_no_collision_def M_def, force) also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" using collision_prob v_ge_1 by blast also have "... \ (5/2) * (real m)\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" by (intro divide_right_mono add_mono mult_right_mono mult_mono power_mono, simp_all add:m_def) also have "... \ (5/2) * (real m)\<^sup>2 * (4 * real t * real p / real m)\<^sup>2 * (2 powr - real r) / (real p)\<^sup>2 + 1 / real p" using v_def v_ge_1 v_ubound by (intro add_mono divide_right_mono mult_right_mono mult_left_mono, auto) also have "... = 40 * (real t)\<^sup>2 * (2 powr -real r) + 1 / real p" using p_gt_0 m_ge_0 t_gt_0 by (simp add:algebra_simps power2_eq_square) also have "... \ 1/18 + 1/18" using t_r_bound p_ge_18 by (intro add_mono, simp_all add: pos_le_divide_eq) also have "... = 1/9" by simp finally have case_3: "prob {\. \has_no_collision \} \ 1/9" by simp have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ prob {\. Q u \ \ t \ Q v \ < t \ \(has_no_collision \)}" proof (rule pmf_mono[OF M_def], rule ccontr) fix \ assume "\ \ set_pmf (pmf_of_set space)" assume "\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" hence est: "real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\" by simp assume "\ \ {\. t \ Q u \ \ Q v \ < t \ \ has_no_collision \}" hence "\( t \ Q u \ \ Q v \ < t \ \ has_no_collision \)" by simp hence lb: "Q u \ < t" and ub: "Q v \ \ t" and no_col: "has_no_collision \" by simp+ define y where "y = nth_mset (t-1) {#int (hash x \). x \# mset_set (set as)#}" define y' where "y' = nth_mset (t-1) {#tr_hash x \. x \# mset_set (set as)#}" have rank_t_lb: "u \ y" unfolding y_def using True t_gt_0 lb by (intro nth_mset_bound_left, simp_all add:count_less_def swap_filter_image Q_def) have rank_t_ub: "y \ v - 1" unfolding y_def using True t_gt_0 ub by (intro nth_mset_bound_right, simp_all add:Q_def swap_filter_image count_le_def) have y_ge_0: "real_of_int y \ 0" using rank_t_lb a_ge_0 by linarith have "mono (\x. truncate_down r (real_of_int x))" by (metis truncate_down_mono mono_def of_int_le_iff) hence y'_eq: "y' = truncate_down r y" unfolding y_def y'_def using True t_gt_0 by (subst nth_mset_commute_mono[where f="(\x. truncate_down r (of_int x))"]) (simp_all add: multiset.map_comp comp_def tr_hash_def) have "real_of_int u * (1 - 2 powr -real r) \ real_of_int y * (1 - 2 powr (-real r))" using rank_t_lb of_int_le_iff two_pow_r_le_1 by (intro mult_right_mono, auto) also have "... \ y'" using y'_eq truncate_down_pos[OF y_ge_0] by simp finally have rank_t_lb': "u * (1 - 2 powr -real r) \ y'" by simp have "y' \ real_of_int y" by (subst y'_eq, rule truncate_down_le, simp) also have "... \ real_of_int (v-1)" using rank_t_ub of_int_le_iff by blast finally have rank_t_ub': "y' \ v-1" by simp have "0 < u * (1-2 powr -real r)" using a_ge_1 two_pow_r_le_1 by (intro mult_pos_pos, auto) hence y'_pos: "y' > 0" using rank_t_lb' by linarith have no_col': "\x. x \ y' \ count {#tr_hash x \. x \# mset_set (set as)#} x \ 1" using rank_t_ub' no_col by (simp add:vimage_def card_le_Suc0_iff_eq count_image_mset has_no_collision_def) force have h_1: "Max (sketch_rv' \) = y'" using True t_gt_0 no_col' by (simp add:sketch_rv'_def y'_def nth_mset_max) have "card (sketch_rv' \) = card (least ((t-1)+1) (set_mset {#tr_hash x \. x \# mset_set (set as)#}))" using t_gt_0 by (simp add:sketch_rv'_def) also have "... = (t-1) +1" using True t_gt_0 no_col' by (intro nth_mset_max(2), simp_all add:y'_def) also have "... = t" using t_gt_0 by simp finally have "card (sketch_rv' \) = t" by simp hence h_3: "estimate' (sketch_rv' \) = real t * real p / y'" using h_1 by (simp add:estimate'_def) have "(real t) * real p \ (1 + \') * real m * ((real t) * real p / (real m * (1 + \')))" using \'_lt_1 m_def True t_gt_0 \'_gt_0 by auto also have "... \ (1+\') * m * u" using \'_gt_0 by (intro mult_left_mono, simp_all add:u_def) also have "... < ((1 + real_of_rat \)*(1-real_of_rat \/8)) * m * u" using True m_def t_gt_0 a_ge_1 \_range by (intro mult_strict_right_mono, auto simp add:\'_def right_diff_distrib) also have "... \ ((1 + real_of_rat \)*(1-2 powr (-r))) * m * u" using r_le_\ \_range a_ge_0 by (intro mult_right_mono mult_left_mono, auto) also have "... = (1 + real_of_rat \) * m * (u * (1-2 powr -real r))" by simp also have "... \ (1 + real_of_rat \) * m * y'" using \_range by (intro mult_left_mono rank_t_lb', simp) finally have "real t * real p < (1 + real_of_rat \) * m * y'" by simp hence f_1: "estimate' (sketch_rv' \) < (1 + real_of_rat \) * m" using y'_pos by (simp add: h_3 pos_divide_less_eq) have "(1 - real_of_rat \) * m * y' \ (1 - real_of_rat \) * m * v" using \_range rank_t_ub' y'_pos by (intro mult_mono rank_t_ub', simp_all) also have "... = (1-real_of_rat \) * (real m * v)" by simp also have "... < (1-\') * (real m * v)" using \_range m_ge_0 v_ge_1 by (intro mult_strict_right_mono mult_pos_pos, simp_all add:\'_def) also have "... \ (1-\') * (real m * (real t * real p / (real m * (1-\'))))" using \'_gt_0 \'_lt_1 by (intro mult_left_mono, auto simp add:v_def) also have "... = real t * real p" using \'_gt_0 \'_lt_1 t_gt_0 p_gt_0 m_ge_0 by auto finally have "(1 - real_of_rat \) * m * y' < real t * real p" by simp hence f_2: "estimate' (sketch_rv' \) > (1 - real_of_rat \) * m" using y'_pos by (simp add: h_3 pos_less_divide_eq) have "abs (estimate' (sketch_rv' \) - real_of_rat (F 0 as)) < real_of_rat \ * (real_of_rat (F 0 as))" using f_1 f_2 by (simp add:abs_less_iff algebra_simps m_eq_F_0) thus "False" using est by linarith qed also have "... \ 1/9 + (1/9 + 1/9)" by (intro pmf_add_2[OF M_def] case_1 case_2 case_3) also have "... = 1/3" by simp finally show ?thesis by simp next case False have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" proof (rule pmf_mono[OF M_def]) fix \ assume a:"\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" assume b:"\ \ set_pmf (pmf_of_set space)" have c: "card (set as) < t" using False by auto hence "card ((\x. tr_hash x \) ` set as) < t" using card_image_le order_le_less_trans by blast hence d:"card (sketch_rv' \) = card ((\x. tr_hash x \) ` (set as))" by (simp add:sketch_rv'_def card_least) have "card (sketch_rv' \) < t" by (metis List.finite_set c d card_image_le order_le_less_trans) hence "estimate' (sketch_rv' \) = card (sketch_rv' \)" by (simp add:estimate'_def) hence "card (sketch_rv' \) \ real_of_rat (F 0 as)" using a \_range by simp (metis abs_zero cancel_comm_monoid_add_class.diff_cancel of_nat_less_0_iff pos_prod_lt zero_less_of_rat_iff) hence "card (sketch_rv' \) \ card (set as)" using m_def m_eq_F_0 by linarith hence "\inj_on (\x. tr_hash x \) (set as)" using card_image d by auto moreover have "tr_hash x \ \ real p" if a:"x \ set as" for x proof - have "hash x \ < p" using hash_range as_lt_p a b by (simp add:mod_ring_carr M_def) thus "tr_hash x \ \ real p" using truncate_down_le by (simp add:tr_hash_def) qed ultimately show "\ \ {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" by (simp add:inj_on_def, blast) qed also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real p)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" using p_gt_0 by (intro collision_prob, auto) also have "... = (5/2) * (real (card (set as)))\<^sup>2 * 2 powr (- real r) + 1 / real p" using p_gt_0 by (simp add:ac_simps power2_eq_square) also have "... \ (5/2) * (real t)\<^sup>2 * 2 powr (-real r) + 1 / real p" using False by (intro add_mono mult_right_mono mult_left_mono power_mono, auto) also have "... \ 1/6 + 1/6" using t_r_bound p_ge_18 by (intro add_mono, simp_all) also have "... \ 1/3" by simp finally show ?thesis by simp qed private lemma median_bounds: "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1 - real_of_rat \" proof - have "strict_mono_on real_of_float A" for A by (meson less_float.rep_eq strict_mono_onI) hence real_g_2: "\\. sketch_rv' \ = real_of_float ` sketch_rv \" by (simp add: sketch_rv'_def sketch_rv_def tr_hash_def least_mono_commute image_comp) moreover have "inj_on real_of_float A" for A using real_of_float_inject by (simp add:inj_on_def) ultimately have card_eq: "\\. card (sketch_rv \) = card (sketch_rv' \)" using real_g_2 by (auto intro!: card_image[symmetric]) have "Max (sketch_rv' \) = real_of_float (Max (sketch_rv \))" if a:"card (sketch_rv' \) \ t" for \ proof - have "mono real_of_float" using less_eq_float.rep_eq mono_def by blast moreover have "finite (sketch_rv \)" by (simp add:sketch_rv_def least_def) moreover have " sketch_rv \ \ {}" using card_eq[symmetric] card_gt_0_iff t_gt_0 a by (simp, force) ultimately show ?thesis by (subst mono_Max_commute[where f="real_of_float"], simp_all add:real_g_2) qed hence real_g: "\\. estimate' (sketch_rv' \) = real_of_rat (estimate (sketch_rv \))" by (simp add:estimate_def estimate'_def card_eq of_rat_divide of_rat_mult of_rat_add real_of_rat_of_float) have indep: "prob_space.indep_vars (measure_pmf \\<^sub>0) (\_. borel) (\i \. estimate' (sketch_rv' (\ i))) {0..\<^sub>0_def by (rule indep_vars_restrict_intro', auto simp add:restrict_dfl_def lessThan_atLeast0) moreover have "- (18 * ln (real_of_rat \)) \ real s" using of_nat_ceiling by (simp add:s_def) blast moreover have "i < s \ measure \\<^sub>0 {\. of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' (\ i)) - of_rat (F 0 as)\} \ 1/3" for i using estimate'_bounds unfolding \\<^sub>0_def M_def by (subst prob_prod_pmf_slice, simp_all) ultimately have "1-real_of_rat \ \ \

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate' (sketch_rv' (\ i))) - real_of_rat (F 0 as)\ \ real_of_rat \ * real_of_rat (F 0 as))" using \_range prob_space_measure_pmf by (intro prob_space.median_bound_2) auto also have "... = \

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as)" using s_gt_0 median_rat[symmetric] real_g by (intro arg_cong2[where f="measure"]) (simp_all add:of_rat_diff[symmetric] of_rat_mult[symmetric] of_rat_less_eq) finally show "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1-real_of_rat \" by blast qed lemma f0_alg_correct': "\

(\ in measure_pmf result. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" proof - have f0_result_elim: "\x. f0_result (s, t, p, r, x, \i\{..i. estimate (sketch_rv (x i))))" by (simp add:estimate_def, rule median_cong, simp) have "result = map_pmf (\x. (s, t, p, r, x, \i\{..\<^sub>0 \ f0_result" by (subst result_def, subst f0_alg_sketch, simp) also have "... = \\<^sub>0 \ (\x. return_pmf (s, t, p, r, x, \i\{.. f0_result" by (simp add:t_def p_def r_def s_def map_pmf_def) also have "... = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" by (subst bind_assoc_pmf, subst bind_return_pmf, subst f0_result_elim) simp finally have a:"result = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" by simp show ?thesis using median_bounds by (simp add: a map_pmf_def[symmetric]) qed private lemma f_subset: assumes "g ` A \ h ` B" shows "(\x. f (g x)) ` A \ (\x. f (h x)) ` B" using assms by auto lemma f0_exact_space_usage': defines "\ \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" proof - have log_2_4: "log 2 4 = 2" by (metis log2_of_power_eq mult_2 numeral_Bit0 of_nat_numeral power2_eq_square) have a: "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ ereal (12 + 4 * real r + 2 * log 2 (log 2 (n+13)))" if a_1:"y \ {.. 1") case True have aux_1: " 0 < 2 + log 2 (real y)" using True by (intro add_pos_nonneg, auto) have aux_2: "0 < 2 + log 2 (real p)" using p_gt_1 by (intro add_pos_nonneg, auto) have "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ ereal (10 + 4 * real r + 2 * log 2 (2 + \log 2 \real y\\))" by (rule truncate_float_bit_count) also have "... = ereal (10 + 4 * real r + 2 * log 2 (2 + (log 2 (real y))))" using True by simp also have "... \ ereal (10 + 4 * real r + 2 * log 2 (2 + log 2 p))" using aux_1 aux_2 True p_gt_0 a_1 by simp also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 4 + log 2 (2 * n + 40)))" using log_2_4 p_le_n p_gt_0 by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg, auto) also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 (8 * n + 160)))" by (simp add:log_mult[symmetric]) also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 ((n+13) powr 2)))" by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg) (auto simp add:power2_eq_square algebra_simps) also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 4 * log 2 (n + 13)))" by (subst log_powr, simp_all add:log_2_4) also have "... = ereal (12 + 4 * real r + 2 * log 2 (log 2 (n + 13)))" by (subst log_mult, simp_all add:log_2_4) finally show ?thesis by simp next case False hence "y = 0" using a_1 by simp then show ?thesis by (simp add:float_bit_count_zero) qed have "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. f0_space_usage (n, \, \)" if b: "x \ {..\<^sub>E space" for x proof - have c: "x \ extensional {.. (\k. float_of (truncate_down r k)) ` {.. (\xa. float_of (truncate_down r (hash xa (x y)))) ` set as" using least_subset by (auto simp add:sketch_rv_def tr_hash_def) also have "... \ (\k. float_of (truncate_down r (real k))) ` {..y. y < s \ finite (sketch_rv (x y))" unfolding sketch_rv_def by (rule finite_subset[OF least_subset], simp) moreover have card_sketch: "\y. y < s \ card (sketch_rv (x y)) \ t " by (simp add:sketch_rv_def card_least) moreover have "\y z. y < s \ z \ sketch_rv (x y) \ bit_count (F\<^sub>e z) \ ereal (12 + 4 * real r + 2 * log 2 (log 2 (real n + 13)))" using a d by auto ultimately have e: "\y. y < s \ bit_count (S\<^sub>e F\<^sub>e (sketch_rv (x y))) \ ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1" using float_encoding by (intro set_bit_count_est, auto) have f: "\y. y < s \ bit_count (P\<^sub>e p 2 (x y)) \ ereal (real 2 * (log 2 (real p) + 1))" using p_gt_1 b by (intro bounded_degree_polynomial_bit_count) (simp_all add:space_def PiE_def Pi_def) have "bit_count (encode_f0_state (s, t, p, r, x, \i\{..e s) + bit_count (N\<^sub>e t) + bit_count (N\<^sub>e p) + bit_count (N\<^sub>e r) + bit_count (([0..\<^sub>e P\<^sub>e p 2) x) + bit_count (([0..\<^sub>e S\<^sub>e F\<^sub>e) (\i\{.. ereal (2* log 2 (real s + 1) + 1) + ereal (2* log 2 (real t + 1) + 1) + ereal (2* log 2 (real p + 1) + 1) + ereal (2 * log 2 (real r + 1) + 1) + (ereal (real s) * (ereal (real 2 * (log 2 (real p) + 1)))) + (ereal (real s) * ((ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1)))" using c e f by (intro add_mono exp_golomb_bit_count fun_bit_count_est[where xs="[0.. ereal ( 4 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (2 * (21 + real n)) + 2 * log 2 (real r + 1) + real s * (3 + 2 * log 2 (2 * (21 + real n)) + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" using p_le_n p_gt_0 by (intro ereal_mono add_mono mult_left_mono, auto) also have "... = ereal (6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + 2 * log 2 (21 + real n) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" by (subst (1 2) log_mult, auto) also have "... \ f0_space_usage (n, \, \)" by (simp add:s_def[symmetric] r_def[symmetric] t_def[symmetric] Let_def) (simp add:algebra_simps) finally show "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. f0_space_usage (n, \, \)" by simp qed hence "\x. x \ set_pmf \\<^sub>0 \ bit_count (encode_f0_state (s, t, p, r, x, \i\{.. ereal (f0_space_usage (n, \, \))" by (simp add:\\<^sub>0_def set_prod_pmf del:f0_space_usage.simps) hence "\y. y \ set_pmf \ \ bit_count (encode_f0_state y) \ ereal (f0_space_usage (n, \, \))" by (simp add: \_def f0_alg_sketch del:f0_space_usage.simps f0_init.simps) (metis (no_types, lifting) image_iff pmf.set_map) thus ?thesis by (simp add: AE_measure_pmf_iff del:f0_space_usage.simps) qed end text \Main results of this section:\ theorem f0_alg_correct: assumes "\ \ {0<..<1}" assumes "\ \ {0<..<1}" assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" shows "\

(\ in measure_pmf \. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" using f0_alg_correct'[OF assms(1-3)] unfolding \_def by blast theorem f0_exact_space_usage: assumes "\ \ {0<..<1}" assumes "\ \ {0<..<1}" assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" using f0_exact_space_usage'[OF assms(1-3)] unfolding \_def by blast -theorem f0_asympotic_space_complexity: +theorem f0_asymptotic_space_complexity: "f0_space_usage \ O[at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\(n, \, \). ln (1 / of_rat \) * (ln (real n) + 1 / (of_rat \)\<^sup>2 * (ln (ln (real n)) + ln (1 / of_rat \))))" (is "_ \ O[?F](?rhs)") proof - define n_of :: "nat \ rat \ rat \ nat" where "n_of = (\(n, \, \). n)" define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" define t_of where "t_of = (\x. nat \80 / (real_of_rat (\_of x))\<^sup>2\)" define s_of where "s_of = (\x. nat \-(18 * ln (real_of_rat (\_of x)))\)" define r_of where "r_of = (\x. nat (4 * \log 2 (1 / real_of_rat (\_of x))\ + 23))" define g where "g = (\x. ln (1 / of_rat (\_of x)) * (ln (real (n_of x)) + 1 / (of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / of_rat (\_of x)))))" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have exp_pos: "exp k \ real x \ x > 0" for k x using exp_gt_zero gr0I by force have exp_gt_1: "exp 1 \ (1::real)" by simp have 1: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have 2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have 3: " (\x. 1) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_2 2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) have "(\x. 80 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (subst landau_o.big.cmult_in_iff, auto) hence 5: "(\x. real (t_of x)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" unfolding t_of_def by (intro landau_real_nat landau_ceil 4, auto) have "(\x. ln (real_of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt[where \="1"], auto simp add:ln_div) hence 6: "(\x. real (s_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding s_of_def by (intro landau_nat_ceil 1, simp) have 7: " (\x. 1) \ O[?F](\x. ln (real (n_of x)))" using exp_pos by (auto intro!: landau_o.big_mono evt[where n="exp 1"] iffD2[OF ln_ge_iff] simp: abs_ge_iff) have 8:" (\_. 1) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using order_trans[OF exp_gt_1] exp_pos by (intro landau_sum_1 7 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) auto have "(\x. ln (real (s_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_ln_3 sum_in_bigo 6 1, simp) hence 9: "(\x. log 2 (real (s_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8, auto simp:log_def) have 10: "(\x. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8 1) have "(\x. ln (real (t_of x) + 1)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using 5 by (intro landau_o.big_mult_1 3 landau_ln_3 sum_in_bigo 4, simp_all) hence " (\x. log 2 (real (t_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using order_trans[OF exp_gt_1] exp_pos by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) hence 11: "(\x. log 2 (real (t_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' 1, auto) have " (\x. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt[where n="1"], auto) hence "(\x. ln (real (n_of x) + 21)) \ O[?F](\x. ln (real (n_of x)))" by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) hence 12: "(\x. log 2 (real (n_of x) + 21)) \ O[?F](g)" unfolding g_def using exp_pos order_trans[OF exp_gt_1] by (intro landau_o.big_mult_1' 1 landau_sum_1 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) have "(\x. ln (1 / real_of_rat (\_of x))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (intro landau_ln_3 evt[where \="1"] landau_o.big_mono) (auto simp add:power_one_over[symmetric] self_le_power) hence " (\x. real (nat (4*\log 2 (1 / real_of_rat (\_of x))\+23))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using 4 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) hence " (\x. ln (real (r_of x) + 1)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" unfolding r_of_def by (intro landau_ln_3 sum_in_bigo 4, auto) hence " (\x. log 2 (real (r_of x) + 1)) \ O[?F](\x. (1 / (real_of_rat (\_of x))\<^sup>2) * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" by (intro landau_o.big_mult_1 3, simp add:log_def) hence " (\x. log 2 (real (r_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using exp_pos order_trans[OF exp_gt_1] by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] add_nonneg_nonneg mult_nonneg_nonneg) (auto) hence 13: "(\x. log 2 (real (r_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' 1, auto) have 14: "(\x. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt[where n="1"], auto) have "(\x. ln (real (n_of x) + 13)) \ O[?F](\x. ln (real (n_of x)))" using 14 by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) hence "(\x. ln (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))))" using exp_pos by (intro landau_ln_2[where a="2"] iffD2[OF ln_ge_iff] evt[where n="exp 2"]) (auto simp add:log_def) hence "(\x. log 2 (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_1 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff]) (auto simp add:log_def) moreover have "(\x. real (r_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding r_of_def using 2 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) hence "(\x. real (r_of x)) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) ultimately have 15:" (\x. real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" using 5 3 by (intro landau_o.mult sum_in_bigo, auto) have "(\x. 5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" proof - have "\\<^sub>F x in ?F. 0 \ ln (real (n_of x))" by (intro evt[where n="1"] ln_ge_zero, auto) moreover have "\\<^sub>F x in ?F. 0 \ 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" using exp_pos by (intro evt[where n="exp 1" and \="1"] mult_nonneg_nonneg add_nonneg_nonneg ln_ge_zero iffD2[OF ln_ge_iff]) auto moreover have " (\x. ln (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" using 14 by (intro landau_ln_2[where a="2"] sum_in_bigo evt[where n="2"], auto) hence "(\x. 5 + 2 * log 2 (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" using 7 by (intro sum_in_bigo, auto simp add:log_def) ultimately show ?thesis using 15 by (rule landau_sum) qed hence 16: "(\x. real (s_of x) * (5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))) \ O[?F](g)" unfolding g_def by (intro landau_o.mult 6, auto) have "f0_space_usage = (\x. f0_space_usage (n_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) also have "... \ O[?F](g)" using 9 10 11 12 13 16 by (simp add:fun_cong[OF s_of_def[symmetric]] fun_cong[OF t_of_def[symmetric]] fun_cong[OF r_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Frequency_Moment_2.thy b/thys/Frequency_Moments/Frequency_Moment_2.thy --- a/thys/Frequency_Moments/Frequency_Moment_2.thy +++ b/thys/Frequency_Moments/Frequency_Moment_2.thy @@ -1,720 +1,720 @@ section \Frequency Moment $2$\ theory Frequency_Moment_2 imports Universal_Hash_Families.Carter_Wegman_Hash_Family Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration Landau_Ext Median_Method.Median Product_PMF_Ext Universal_Hash_Families.Field Frequency_Moments begin text \This section contains a formalization of the algorithm for the second frequency moment. It is based on the algorithm described in \cite[\textsection 2.2]{alon1999}. The only difference is that the algorithm is adapted to work with prime field of odd order, which greatly reduces the implementation complexity.\ fun f2_hash where "f2_hash p h k = (if even (ring.hash (mod_ring p) k h) then int p - 1 else - int p - 1)" type_synonym f2_state = "nat \ nat \ nat \ (nat \ nat \ nat list) \ (nat \ nat \ int)" fun f2_init :: "rat \ rat \ nat \ f2_state pmf" where "f2_init \ \ n = do { let s\<^sub>1 = nat \6 / \\<^sup>2\; let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\; let p = prime_above (max n 3); h \ prod_pmf ({..1} \ {..2}) (\_. pmf_of_set (bounded_degree_polynomials (mod_ring p) 4)); return_pmf (s\<^sub>1, s\<^sub>2, p, h, (\_ \ {..1} \ {..2}. (0 :: int))) }" fun f2_update :: "nat \ f2_state \ f2_state pmf" where "f2_update x (s\<^sub>1, s\<^sub>2, p, h, sketch) = return_pmf (s\<^sub>1, s\<^sub>2, p, h, \i \ {..1} \ {..2}. f2_hash p (h i) x + sketch i)" fun f2_result :: "f2_state \ rat pmf" where "f2_result (s\<^sub>1, s\<^sub>2, p, h, sketch) = return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {..2}. (\i\<^sub>1\{..1} . (rat_of_int (sketch (i\<^sub>1, i\<^sub>2)))\<^sup>2) / (((rat_of_nat p)\<^sup>2-1) * rat_of_nat s\<^sub>1)))" fun f2_space_usage :: "(nat \ nat \ rat \ rat) \ real" where "f2_space_usage (n, m, \, \) = ( let s\<^sub>1 = nat \6 / \\<^sup>2 \ in let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in 3 + 2 * log 2 (s\<^sub>1 + 1) + 2 * log 2 (s\<^sub>2 + 1) + 2 * log 2 (9 + 2 * real n) + s\<^sub>1 * s\<^sub>2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))" definition encode_f2_state :: "f2_state \ bool list option" where "encode_f2_state = N\<^sub>e \\<^sub>e (\s\<^sub>1. N\<^sub>e \\<^sub>e (\s\<^sub>2. N\<^sub>e \\<^sub>e (\p. (List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) \\<^sub>e (List.product [0..1] [0..2] \\<^sub>e I\<^sub>e))))" lemma "inj_on encode_f2_state (dom encode_f2_state)" proof - have " is_encoding encode_f2_state" unfolding encode_f2_state_def by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding) thus ?thesis by (rule encoding_imp_inj) qed context fixes \ \ :: rat fixes n :: nat fixes as :: "nat list" fixes result assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ > 0" assumes as_range: "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" begin private definition s\<^sub>1 where "s\<^sub>1 = nat \6 / \\<^sup>2\" lemma s1_gt_0: "s\<^sub>1 > 0" using \_range by (simp add:s\<^sub>1_def) private definition s\<^sub>2 where "s\<^sub>2 = nat \-(18* ln (real_of_rat \))\" lemma s2_gt_0: "s\<^sub>2 > 0" using \_range by (simp add:s\<^sub>2_def) private definition p where "p = prime_above (max n 3)" lemma p_prime: "Factorial_Ring.prime p" unfolding p_def using prime_above_prime by blast lemma p_ge_3: "p \ 3" unfolding p_def by (meson max.boundedE prime_above_lower_bound) lemma p_gt_0: "p > 0" using p_ge_3 by linarith lemma p_gt_1: "p > 1" using p_ge_3 by simp lemma p_ge_n: "p \ n" unfolding p_def by (meson max.boundedE prime_above_lower_bound ) interpretation carter_wegman_hash_family "mod_ring p" 4 using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] using p_prime by auto definition sketch where "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" private definition \ where"\ = prod_pmf ({..1} \ {..2}) (\_. pmf_of_set space)" private definition \\<^sub>p where"\\<^sub>p = measure_pmf \" private definition sketch_rv where "sketch_rv \ = of_int (sum_list (map (f2_hash p \) as))^2" private definition mean_rv where "mean_rv \ = (\i\<^sub>2. (\i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) / (((of_nat p)\<^sup>2 - 1) * of_nat s\<^sub>1))" private definition result_rv where "result_rv \ = median s\<^sub>2 (\i\<^sub>2\{..2}. mean_rv \ i\<^sub>2)" lemma mean_rv_alg_sketch: "sketch = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" proof - have "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" by (simp add:sketch_def) also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" proof (induction as rule:rev_induct) case Nil then show ?case by (simp add:s\<^sub>1_def s\<^sub>2_def space_def p_def[symmetric] \_def restrict_def Let_def) next case (snoc a as) have "fold (\a state. state \ f2_update a) (as @ [a]) (f2_init \ \ n) = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \s \ {..1} \ {..2}. (\x \ as. f2_hash p (\ s) x)) \ f2_update a)" using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps) also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. (\x \ as@[a]. f2_hash p (\ i) x)))" by (subst bind_return_pmf) (simp add: add.commute del:f2_hash.simps cong:restrict_cong) finally show ?case by blast qed finally show ?thesis by auto qed lemma distr: "result = map_pmf result_rv \" proof - have "result = sketch \ f2_result" by (simp add:result_def sketch_def) also have "... = \ \ (\x. f2_result (s\<^sub>1, s\<^sub>2, p, x, \i\{..1} \ {..2}. sum_list (map (f2_hash p (x i)) as)))" by (simp add: mean_rv_alg_sketch bind_assoc_pmf bind_return_pmf) also have "... = map_pmf result_rv \" by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong) finally show ?thesis by simp qed private lemma f2_hash_pow_exp: assumes "k < p" shows "expectation (\\. real_of_int (f2_hash p \ k) ^m) = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" proof - have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp then obtain t where t_def: "p=2*t+1" using oddE by blast have "Collect even \ {..<2 * t + 1} \ (*) 2 ` {.. Collect even \ {..<2 * t + 1}" by (rule image_subsetI, simp) ultimately have "card ({k. even k} \ {..x. 2*x) ` {.. {.. {... hash k \ \ Collect even} = (real p + 1)/(2*real p)" using assms by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def) have "p = card {.. {.. ({k. even k} \ {.. {.. {.. {.. {.. {... hash k \ \ Collect odd} = (real p - 1)/(2*real p)" using assms by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def) have "expectation (\x. real_of_int (f2_hash p x k) ^ m) = expectation (\\. indicator {\. even (hash k \)} \ * (real p - 1)^m + indicator {\. odd (hash k \)} \ * (-real p - 1)^m)" by (rule Bochner_Integration.integral_cong, simp, simp) also have "... = prob {\. hash k \ \ Collect even} * (real p - 1) ^ m + prob {\. hash k \ \ Collect odd} * (-real p - 1) ^ m " by (simp, simp add:M_def) also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)" by (subst prob_even, subst prob_odd, simp) also have "... = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by (simp add:add_divide_distrib ac_simps) finally show "expectation (\x. real_of_int (f2_hash p x k) ^ m) = ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp qed lemma shows var_sketch_rv:"variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" (is "?A") and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)\<^sup>2-1)" (is "?B") proof - define h where "h = (\\ x. real_of_int (f2_hash p \ x))" define c where "c = (\x. real (count_list as x))" define r where "r = (\(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))" define h_prod where "h_prod = (\as \. prod_list (map (h \) as))" define exp_h_prod :: "nat list \ real" where "exp_h_prod = (\as. (\i \ set as. r (count_list as i)))" have f_eq: "sketch_rv = (\\. (\x \ set as. c x * h \ x)^2)" by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps) have r_one: "r (Suc 0) = 0" by (simp add:r_def algebra_simps) have r_two: "r 2 = (real p^2-1)" using p_gt_0 unfolding r_def power2_eq_square by (simp add:nonzero_divide_eq_eq, simp add:algebra_simps) have"(real p)^2 \ 2^2" by (rule power_mono, use p_gt_1 in linarith, simp) hence p_square_ge_4: "(real p)\<^sup>2 \ 4" by simp have "r 4 = (real p)^4+2*(real p)\<^sup>2 - 3" using p_gt_0 unfolding r_def by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps) also have "... \ (real p)^4+2*(real p)\<^sup>2 + 3" by simp also have "... \ 3 * r 2 * r 2" using p_square_ge_4 by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono) finally have r_four_est: "r 4 \ 3 * r 2 * r 2" by simp have exp_h_prod_elim: "exp_h_prod = (\as. prod_list (map (r \ count_list as) (remdups as)))" by (simp add:exp_h_prod_def prod.set_conv_list[symmetric]) have exp_h_prod: "\x. set x \ set as \ length x \ 4 \ expectation (h_prod x) = exp_h_prod x" proof - fix x assume "set x \ set as" hence x_sub_p: "set x \ {..k. k \ set x \ k < p" by auto assume "length x \ 4" hence card_x: "card (set x) \ 4" using card_length dual_order.trans by blast have "set x \ carrier (mod_ring p) " using x_sub_p by (simp add:mod_ring_def) hence h_indep: "indep_vars (\_. borel) (\i \. h \ i ^ count_list x i) (set x)" using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def by (auto intro:indep_vars_compose2[where X="hash" and M'=" (\_. discrete)"]) have "expectation (h_prod x) = expectation (\\. \ i \ set x. h \ i^(count_list x i))" by (simp add:h_prod_def prod_list_eval) also have "... = (\i \ set x. expectation (\\. h \ i^(count_list x i)))" by (simp add: indep_vars_lebesgue_integral[OF _ h_indep]) also have "... = (\i \ set x. r (count_list x i))" using f2_hash_pow_exp x_le_p by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps) also have "... = exp_h_prod x" by (simp add:exp_h_prod_def) finally show "expectation (h_prod x) = exp_h_prod x" by simp qed have "\x y. kernel_of x = kernel_of y \ exp_h_prod x = exp_h_prod y" proof - fix x y :: "nat list" assume a:"kernel_of x = kernel_of y" then obtain f where b:"bij_betw f (set x) (set y)" and c:"\z. z \ set x \ count_list x z = count_list y (f z)" using kernel_of_eq_imp_bij by blast have "exp_h_prod x = prod ( (\i. r(count_list y i)) \ f) (set x)" by (simp add:exp_h_prod_def c) also have "... = (\i \ f ` (set x). r(count_list y i))" by (metis b bij_betw_def prod.reindex) also have "... = exp_h_prod y" unfolding exp_h_prod_def by (rule prod.cong, metis b bij_betw_def) simp finally show "exp_h_prod x = exp_h_prod y" by simp qed hence exp_h_prod_cong: "\p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p = of_bool (kernel_of x = kernel_of p) * exp_h_prod x" by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left) have c:"(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r" if a:"length xs = n" for xs :: "nat list" and n and r :: real proof - have "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)" using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def) thus "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)" by (simp add:sum_list_mult_const) qed have "expectation sketch_rv = (\i\set as. (\j\set as. c i * c j * expectation (h_prod [i,j])))" by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) also have "... = (\i\set as. (\j\set as. c i * c j * exp_h_prod [i,j]))" by (simp add:exp_h_prod) also have "... = (\i \ set as. (\j \ set as. c i * c j * (sum_list (map (\p. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))" by (subst exp_h_prod_cong, simp add:c) also have "... = (\i\set as. c i * c i * r 2)" by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse) also have "... = real_of_rat (F 2 as) * ((real p)^2-1)" by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two) finally show b:?B by simp have "expectation (\x. (sketch_rv x)\<^sup>2) = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))" by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))" by (simp add:exp_h_prod) also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. c i1 * c i2 * c i3 * c i4 * (sum_list (map (\p. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))" by (subst exp_h_prod_cong, simp add:c) also have "... = 3 * (\i \ set as. (\j \ set as. c i^2 * c j^2 * r 2 * r 2)) + ((\ i \ set as. c i^4 * r 4) - 3 * (\ i \ set as. c i ^ 4 * r 2 * r 2))" apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one) (* large intermediate terms *) apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute) apply (simp add: algebra_simps sum_subtractf sum_collapse) by (simp add: sum_distrib_left algebra_simps) also have "... = 3 * (\i \ set as. c i^2 * r 2)^2 + (\ i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf) also have "... = 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" by (simp add:power_mult_distrib sum_distrib_right[symmetric]) also have "... \ 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * 0)" using r_four_est by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos) also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by (simp add:c_def r_two F_def of_rat_sum of_rat_power) finally have "expectation (\x. (sketch_rv x)\<^sup>2) \ 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by simp thus "variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" by (simp add: variance_eq, simp add:power_mult_distrib b) qed lemma space_omega_1 [simp]: "Sigma_Algebra.space \\<^sub>p = UNIV" by (simp add:\\<^sub>p_def) interpretation \: prob_space "\\<^sub>p" by (simp add:\\<^sub>p_def prob_space_measure_pmf) lemma integrable_\: fixes f :: "((nat \ nat) \ (nat list)) \ real" shows "integrable \\<^sub>p f" unfolding \\<^sub>p_def \_def by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf) lemma sketch_rv_exp: assumes "i\<^sub>2 < s\<^sub>2" assumes "i\<^sub>1 \ {0..1}" shows "\.expectation (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) = real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)" proof - have "\.expectation (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2))) :: real) = expectation sketch_rv" using integrable_\ integrable_M assms unfolding \_def \\<^sub>p_def M_def by (subst expectation_Pi_pmf_slice, auto) also have "... = (real_of_rat (F 2 as)) * ((real p)\<^sup>2 - 1)" using exp_sketch_rv by simp finally show ?thesis by simp qed lemma sketch_rv_var: assumes "i\<^sub>2 < s\<^sub>2" assumes "i\<^sub>1 \ {0..1}" shows "\.variance (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" proof - have "\.variance (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2)) :: real)) = variance sketch_rv" using integrable_\ integrable_M assms unfolding \_def \\<^sub>p_def M_def by (subst variance_prod_pmf_slice, auto) also have "... \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" using var_sketch_rv by simp finally show ?thesis by simp qed lemma mean_rv_exp: assumes "i < s\<^sub>2" shows "\.expectation (\\. mean_rv \ i) = real_of_rat (F 2 as)" proof - have a:"(real p)\<^sup>2 > 1" using p_gt_1 by simp have "\.expectation (\\. mean_rv \ i) = (\i\<^sub>1 = 0..1. \.expectation (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" using assms integrable_\ by (simp add:mean_rv_def) also have "... = (\i\<^sub>1 = 0..1. real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" using sketch_rv_exp[OF assms] by simp also have "... = real_of_rat (F 2 as)" using s1_gt_0 a by simp finally show ?thesis by simp qed lemma mean_rv_var: assumes "i < s\<^sub>2" shows "\.variance (\\. mean_rv \ i) \ (real_of_rat (\ * F 2 as))\<^sup>2 / 3" proof - have a: "\.indep_vars (\_. borel) (\i\<^sub>1 x. sketch_rv (x (i\<^sub>1, i))) {0..1}" using assms unfolding \\<^sub>p_def \_def by (intro indep_vars_restrict_intro'[where f="fst"]) (auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0) have p_sq_ne_1: "(real p)^2 \ 1" by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff) have s1_bound: " 6 / (real_of_rat \)\<^sup>2 \ real s\<^sub>1" unfolding s\<^sub>1_def by (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge) have "\.variance (\\. mean_rv \ i) = \.variance (\\. \i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" unfolding mean_rv_def by (subst \.variance_divide[OF integrable_\], simp) also have "... = (\i\<^sub>1 = 0..1. \.variance (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" by (subst \.var_sum_all_indep[OF _ _ integrable_\ a]) (auto simp: \_def \\<^sub>p_def) also have "... \ (\i\<^sub>1 = 0..1. 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto) also have "... = 2 * (real_of_rat (F 2 as)^2) / real s\<^sub>1" using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square) also have "... \ 2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat \)\<^sup>2)" using s1_gt_0 \_range by (intro divide_left_mono mult_pos_pos s1_bound) auto also have "... = (real_of_rat (\ * F 2 as))\<^sup>2 / 3" by (simp add:of_rat_mult algebra_simps) finally show ?thesis by simp qed lemma mean_rv_bounds: assumes "i < s\<^sub>2" shows "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ 1/3" proof (cases "as = []") case True then show ?thesis using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def) next case False hence "F 2 as > 0" using F_gr_0 by auto hence a: "0 < real_of_rat (\ * F 2 as)" using \_range by simp have [simp]: "(\\. mean_rv \ i) \ borel_measurable \\<^sub>p" by (simp add:\_def \\<^sub>p_def) have "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ \.prob {\. real_of_rat (\ * F 2 as) \ \mean_rv \ i - real_of_rat (F 2 as)\}" by (rule \.pmf_mono[OF \\<^sub>p_def], simp add:of_rat_mult) also have "... \ \.variance (\\. mean_rv \ i) / (real_of_rat (\ * F 2 as))\<^sup>2" using \.Chebyshev_inequality[where a="real_of_rat (\ * F 2 as)" and f="\\. mean_rv \ i",simplified] a prob_space_measure_pmf[where p="\"] mean_rv_exp[OF assms] integrable_\ by simp also have "... \ ((real_of_rat (\ * F 2 as))\<^sup>2/3) / (real_of_rat (\ * F 2 as))\<^sup>2" by (rule divide_right_mono, rule mean_rv_var[OF assms], simp) also have "... = 1/3" using a by force finally show ?thesis by blast qed lemma f2_alg_correct': "\

(\ in measure_pmf result. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" proof - have a: "\.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" using s1_gt_0 unfolding \\<^sub>p_def \_def by (intro indep_vars_restrict_intro'[where f="snd"]) (auto simp: \\<^sub>p_def \_def mean_rv_def restrict_dfl_def) have b: "- 18 * ln (real_of_rat \) \ real s\<^sub>2" unfolding s\<^sub>2_def using of_nat_ceiling by auto have "1 - of_rat \ \ \.prob {\. \median s\<^sub>2 (mean_rv \) - real_of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" using \_range \.median_bound_2[OF _ a b, where \="real_of_rat \ * real_of_rat (F 2 as)" and \="real_of_rat (F 2 as)"] mean_rv_bounds by simp also have "... = \.prob {\. \real_of_rat (result_rv \) - of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0] mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power) also have "... = \.prob {\. \result_rv \ - F 2 as\ \ \ * F 2 as} " by (simp add:of_rat_less_eq of_rat_mult[symmetric] of_rat_diff[symmetric] set_eq_iff) finally have "\.prob {y. \result_rv y - F 2 as\ \ \ * F 2 as} \ 1-of_rat \ " by simp thus ?thesis by (simp add: distr \\<^sub>p_def) qed lemma f2_exact_space_usage': "AE \ in sketch . bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" proof - have "p \ 2 * max n 3 + 2" by (subst p_def, rule prime_above_upper_bound) also have "... \ 2 * n + 8" by (cases "n \ 2", simp_all) finally have p_bound: "p \ 2 * n + 8" by simp have "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (real p + 1) + 1)" by (rule exp_golomb_bit_count) also have "... \ ereal (2 * log 2 (2 * real n + 9) + 1)" using p_bound by simp finally have p_bit_count: "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (2 * real n + 9) + 1)" by simp have a: "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (f2_space_usage (n, length as, \, \))" if a:"y\{..1} \ {..2} \\<^sub>E bounded_degree_polynomials (mod_ring p) 4" for y proof - have "y \ extensional ({..1} \ {..2})" using a PiE_iff by blast hence y_ext: "y \ extensional (set (List.product [0..1] [0..2]))" by (simp add:lessThan_atLeast0) have h_bit_count_aux: "bit_count (P\<^sub>e p 4 (y x)) \ ereal (4 + 4 * log 2 (8 + 2 * real n))" if b:"x \ set (List.product [0..1] [0..2])" for x proof - have "y x \ bounded_degree_polynomials (Field.mod_ring p) 4" using b a by force hence "bit_count (P\<^sub>e p 4 (y x)) \ ereal ( real 4 * (log 2 (real p) + 1))" by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] ) also have "... \ ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )" using p_gt_0 p_bound by simp also have "... \ ereal (4 + 4 * log 2 (8 + 2 * real n))" by simp finally show ?thesis by blast qed have h_bit_count: "bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) \ ereal (real s\<^sub>1 * real s\<^sub>2 * (4 + 4 * log 2 (8 + 2 * real n)))" using fun_bit_count_est[where e="P\<^sub>e p 4", OF y_ext h_bit_count_aux] by simp have sketch_bit_count_aux: "bit_count (I\<^sub>e (sum_list (map (f2_hash p (y x)) as))) \ ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs \ ?rhs") if " x \ {0..1} \ {0..2}" for x proof - have "\sum_list (map (f2_hash p (y x)) as)\ \ sum_list (map (abs \ (f2_hash p (y x))) as)" by (subst map_map[symmetric]) (rule sum_list_abs) also have "... \ sum_list (map (\_. (int p+1)) as)" by (rule sum_list_mono) (simp add:p_gt_0) also have "... = int (length as) * (int p+1)" by (simp add: sum_list_triv) also have "... \ int (length as) * (9+2*(int n))" using p_bound by (intro mult_mono, auto) finally have "\sum_list (map (f2_hash p (y x)) as)\ \ int (length as) * (9 + 2 * int n)" by simp hence "?lhs \ ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)" by (rule int_bit_count_est) also have "... = ?rhs" by (simp add:algebra_simps) finally show "?thesis" by simp qed have "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (real (length (List.product [0..1] [0..2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by (intro fun_bit_count_est) (simp_all add:extensional_def lessThan_atLeast0 sketch_bit_count_aux del:f2_hash.simps) also have "... = ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp finally have sketch_bit_count: "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp have "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ bit_count (N\<^sub>e s\<^sub>1) + bit_count (N\<^sub>e s\<^sub>2) +bit_count (N\<^sub>e p) + bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) + bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as)))" by (simp add:Let_def s\<^sub>1_def s\<^sub>2_def encode_f2_state_def dependent_bit_count add.assoc) also have "... \ ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (4 + 4 * log 2 (8 + 2 * real n))) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))" by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count) also have "... = ereal (f2_space_usage (n, length as, \, \))" by (simp add:distrib_left add.commute s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) finally show "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ ereal (f2_space_usage (n, length as, \, \))" by simp qed have "set_pmf \ = {..1} \ {..2} \\<^sub>E bounded_degree_polynomials (Field.mod_ring p) 4" by (simp add: \_def set_prod_pmf) (simp add: space_def) thus ?thesis by (simp add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a) qed end text \Main results of this section:\ theorem f2_alg_correct: assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. \ fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" shows "\

(\ in measure_pmf \. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" using f2_alg_correct'[OF assms(1,2,3)] \_def by auto theorem f2_exact_space_usage: assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n)" shows "AE \ in M. bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" using f2_exact_space_usage'[OF assms(1,2,3)] by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp) -theorem f2_asympotic_space_complexity: +theorem f2_asymptotic_space_complexity: "f2_space_usage \ O[at_top \\<^sub>F at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\ (n, m, \, \). (ln (1 / of_rat \)) / (of_rat \)\<^sup>2 * (ln (real n) + ln (real m)))" (is "_ \ O[?F](?rhs)") proof - define n_of :: "nat \ nat \ rat \ rat \ nat" where "n_of = (\(n, m, \, \). n)" define m_of :: "nat \ nat \ rat \ rat \ nat" where "m_of = (\(n, m, \, \). m)" define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" define g where "g = (\x. (1/ (of_rat (\_of x))\<^sup>2) * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ real (m_of x) \ m\ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n m P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def m_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have unit_1: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) have unit_2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt[where \="exp 1"]) (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) have unit_3: "(\_. 1) \ O[?F](\x. real (n_of x))" by (intro landau_o.big_mono evt, auto) have unit_4: "(\_. 1) \ O[?F](\x. real (m_of x))" by (intro landau_o.big_mono evt, auto) have unit_5: "(\_. 1) \ O[?F](\x. ln (real (n_of x)))" by (auto intro!: landau_o.big_mono evt[where n="exp 1"]) (metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans) have unit_6: "(\_. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt unit_5 iffD2[OF ln_ge_iff], auto) have unit_7: "(\_. 1) \ O[?F](\x. 1 / real_of_rat (\_of x))" by (intro landau_o.big_mono evt[where \="1"], auto) have unit_8: "(\_. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6) have unit_9: "(\_. 1) \ O[?F](\x. real (n_of x) * real (m_of x))" by (intro landau_o.big_mult_1 unit_3 unit_4) have " (\x. 6 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (subst landau_o.big.cmult_in_iff, simp_all) hence l1: "(\x. real (nat \6 / (\_of x)\<^sup>2\)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" by (intro landau_real_nat landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power) have "(\x. - ( ln (real_of_rat (\_of x)))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt) (subst ln_div, auto) hence l2: "(\x. real (nat \- (18 * ln (real_of_rat (\_of x)))\)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_real_nat landau_ceil[OF unit_2], simp) have l3_aux: " (\x. real (m_of x) * (18 + 4 * real (n_of x)) + 1) \ O[?F](\x. real (n_of x) * real (m_of x))" by (rule sum_in_bigo[OF _unit_9], subst mult.commute) (intro landau_o.mult sum_in_bigo, auto simp:unit_3) have "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x) * real (m_of x)))" apply (rule landau_ln_2[where a="2"], simp, simp) apply (rule evt[where m="2" and n="1"]) apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1)) using l3_aux by simp also have "(\x. ln (real (n_of x) * real (m_of x))) \ O[?F](\x. ln (real (n_of x)) + ln(real (m_of x)))" by (intro landau_o.big_mono evt[where m="1" and n="1"], auto simp add:ln_mult) finally have l3: "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" using landau_o.big_trans by simp have l4: "(\x. ln (8 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) (auto intro!: sum_in_bigo simp add:unit_3) have l5: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) (auto intro!: sum_in_bigo simp add:unit_3) have l6: "(\x. ln (real (nat \6 / (\_of x)\<^sup>2\) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp) have l7: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1' unit_1 unit_2 l5) have l8: "(\x. ln (real (nat \- (18 * ln (real_of_rat (\_of x)))\) + 1) ) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp have l9: "(\x. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro sum_in_bigo, auto simp: l3 l4 unit_6) have l10: "(\x. real (nat \6 / (\_of x)\<^sup>2\) * real (nat \- (18 * ln (real_of_rat (\_of x)))\) * (5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)) \ O[?F](g)" unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9) have "f2_space_usage = (\x. f2_space_usage (n_of x, m_of x, \_of x, \_of x))" by (simp add:case_prod_beta' n_of_def \_of_def \_of_def m_of_def) also have "... \ O[?F](g)" by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def m_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Frequency_Moment_k.thy b/thys/Frequency_Moments/Frequency_Moment_k.thy --- a/thys/Frequency_Moments/Frequency_Moment_k.thy +++ b/thys/Frequency_Moments/Frequency_Moment_k.thy @@ -1,1015 +1,1015 @@ section \Frequency Moment $k$\ theory Frequency_Moment_k imports Frequency_Moments Landau_Ext Lp.Lp Median_Method.Median Product_PMF_Ext begin text \This section contains a formalization of the algorithm for the $k$-th frequency moment. It is based on the algorithm described in \cite[\textsection 2.1]{alon1999}.\ type_synonym fk_state = "nat \ nat \ nat \ nat \ (nat \ nat \ (nat \ nat))" fun fk_init :: "nat \ rat \ rat \ nat \ fk_state pmf" where "fk_init k \ \ n = do { let s\<^sub>1 = nat \3 * real k * n powr (1-1/real k) / (real_of_rat \)\<^sup>2\; let s\<^sub>2 = nat \-18 * ln (real_of_rat \)\; return_pmf (s\<^sub>1, s\<^sub>2, k, 0, (\_ \ {0..1} \ {0..2}. (0,0))) }" fun fk_update :: "nat \ fk_state \ fk_state pmf" where "fk_update a (s\<^sub>1, s\<^sub>2, k, m, r) = do { coins \ prod_pmf ({0..1} \ {0..2}) (\_. bernoulli_pmf (1/(real m+1))); return_pmf (s\<^sub>1, s\<^sub>2, k, m+1, \i \ {0..1} \ {0..2}. if coins i then (a,0) else ( let (x,l) = r i in (x, l + of_bool (x=a)) ) ) }" fun fk_result :: "fk_state \ rat pmf" where "fk_result (s\<^sub>1, s\<^sub>2, k, m, r) = return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {0..2}. (\i\<^sub>1\{0..1}. rat_of_nat (let t = snd (r (i\<^sub>1, i\<^sub>2)) + 1 in m * (t^k - (t - 1)^k))) / (rat_of_nat s\<^sub>1)) )" lemma bernoulli_pmf_1: "bernoulli_pmf 1 = return_pmf True" by (rule pmf_eqI, simp add:indicator_def) fun fk_space_usage :: "(nat \ nat \ nat \ rat \ rat) \ real" where "fk_space_usage (k, n, m, \, \) = ( let s\<^sub>1 = nat \3*real k* (real n) powr (1-1/ real k) / (real_of_rat \)\<^sup>2 \ in let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in 4 + 2 * log 2 (s\<^sub>1 + 1) + 2 * log 2 (s\<^sub>2 + 1) + 2 * log 2 (real k + 1) + 2 * log 2 (real m + 1) + s\<^sub>1 * s\<^sub>2 * (2 + 2 * log 2 (real n+1) + 2 * log 2 (real m+1)))" definition encode_fk_state :: "fk_state \ bool list option" where "encode_fk_state = N\<^sub>e \\<^sub>e (\s\<^sub>1. N\<^sub>e \\<^sub>e (\s\<^sub>2. N\<^sub>e \\<^sub>e N\<^sub>e \\<^sub>e (List.product [0..1] [0..2] \\<^sub>e (N\<^sub>e \\<^sub>e N\<^sub>e))))" lemma "inj_on encode_fk_state (dom encode_fk_state)" proof - have "is_encoding encode_fk_state" by (simp add:encode_fk_state_def) (intro dependent_encoding exp_golomb_encoding fun_encoding) thus ?thesis by (rule encoding_imp_inj) qed text \This is an intermediate non-parallel form @{term "fk_update"} used only in the correctness proof.\ fun fk_update_2 :: "'a \ (nat \ 'a \ nat) \ (nat \ 'a \ nat) pmf" where "fk_update_2 a (m,x,l) = do { coin \ bernoulli_pmf (1/(real m+1)); return_pmf (m+1,if coin then (a,0) else (x, l + of_bool (x=a))) }" definition sketch where "sketch as i = (as ! i, count_list (drop (i+1) as) (as ! i))" lemma fk_update_2_distr: assumes "as \ []" shows "fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0)) = pmf_of_set {.. (\k. return_pmf (length as, sketch as k))" using assms proof (induction as rule:rev_nonempty_induct) case (single x) show ?case using single by (simp add:bind_return_pmf pmf_of_set_singleton bernoulli_pmf_1 lessThan_def sketch_def) next case (snoc x xs) let ?h = "(\xs k. count_list (drop (Suc k) xs) (xs ! k))" let ?q = "(\xs k. (length xs, sketch xs k))" have non_empty: " {.. {}" " {.. {}" using snoc by auto have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" by auto have "pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) = bernoulli_pmf (1 / (real (length xs) + 1)) \ (\y. pmf_of_set {.. (\k. return_pmf (if y then length xs else k)))" by (subst bind_commute_pmf, simp) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) = pmf_of_set {..x s. (s \ fk_update_2 x)) (xs@[x]) (return_pmf (0,0,0)) = (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ fk_update_2 x" using snoc by (simp add:case_prod_beta') also have "... = (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ (\(m,a,l). bernoulli_pmf (1 / (real m + 1)) \ (\coin. return_pmf (m + 1, if coin then (x, 0) else (a, (l + of_bool (a = x))))))" by (subst fk_update_2_eta, subst fk_update_2.simps, simp add:case_prod_beta') also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (length xs + 1, if coin then (x, 0) else (xs ! k, ?h xs k + of_bool (xs ! k = x)))))" by (subst bind_assoc_pmf, simp add: bind_return_pmf sketch_def) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k) \ (\k'. return_pmf (?q (xs@[x]) k'))))" using non_empty by (intro bind_pmf_cong, auto simp add:bind_return_pmf nth_append count_list_append sketch_def) also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ (\coin. return_pmf (if coin then length xs else k))) \ (\k'. return_pmf (?q (xs@[x]) k'))" by (subst bind_assoc_pmf, subst bind_assoc_pmf, simp) also have "... = pmf_of_set {.. (\k'. return_pmf (?q (xs@[x]) k'))" by (subst b, simp) finally show ?case by simp qed context fixes \ \ :: rat fixes n k :: nat fixes as assumes k_ge_1: "k \ 1" assumes \_range: "\ \ {0<..<1}" assumes \_range: "\ > 0" assumes as_range: "set as \ {..1 where "s\<^sub>1 = nat \3 * real k * (real n) powr (1-1/real k) / (real_of_rat \)\<^sup>2\" definition s\<^sub>2 where "s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\" definition "M\<^sub>1 = {(u, v). v < count_list as u}" definition "\\<^sub>1 = measure_pmf (pmf_of_set M\<^sub>1)" definition "M\<^sub>2 = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set M\<^sub>1)" definition "\\<^sub>2 = measure_pmf M\<^sub>2" interpretation prob_space "\\<^sub>1" unfolding \\<^sub>1_def by (simp add:prob_space_measure_pmf) interpretation \\<^sub>2:prob_space "\\<^sub>2" unfolding \\<^sub>2_def by (simp add:prob_space_measure_pmf) lemma split_space: "(\a\M\<^sub>1. f (snd a)) = (\u \ set as. (\v \{0..u. {u} \ {v. v < count_list as u})" have a: "inj_on snd (A x)" for x by (simp add:A_def inj_on_def) have "\u v. u < count_list as v \ v \ set as" by (subst count_list_gr_1, force) hence "M\<^sub>1 = \ (A ` set as)" by (auto simp add:set_eq_iff A_def M\<^sub>1_def) hence "(\a\M\<^sub>1. f (snd a)) = sum (f \ snd) (\ (A ` set as))" by (intro sum.cong, auto) also have "... = sum (\x. sum (f \ snd) (A x)) (set as)" by (rule sum.UNION_disjoint, simp, simp add:A_def, simp add:A_def, blast) also have "... = sum (\x. sum f (snd ` A x)) (set as)" by (intro sum.cong, auto simp add:sum.reindex[OF a]) also have "... = (\u \ set as. (\v \{0.. []" shows fin_space: "finite M\<^sub>1" and non_empty_space: "M\<^sub>1 \ {}" and card_space: "card M\<^sub>1 = length as" proof - have "M\<^sub>1 \ set as \ {k. k < length as}" proof (rule subsetI) fix x assume a:"x \ M\<^sub>1" have "fst x \ set as" using a by (simp add:case_prod_beta count_list_gr_1 M\<^sub>1_def) moreover have "snd x < length as" using a count_le_length order_less_le_trans by (simp add:case_prod_beta M\<^sub>1_def) fast ultimately show "x \ set as \ {k. k < length as}" by (simp add:mem_Times_iff) qed thus fin_space: "finite M\<^sub>1" using finite_subset by blast have "(as ! 0, 0) \ M\<^sub>1" using assms(1) unfolding M\<^sub>1_def by (simp, metis count_list_gr_1 gr0I length_greater_0_conv not_one_le_zero nth_mem) thus "M\<^sub>1 \ {}" by blast show "card M\<^sub>1 = length as" using fin_space split_space[where f="\_. (1::nat)"] by (simp add:sum_count_set[where X="set as" and xs="as", simplified]) qed lemma assumes "as \ []" shows integrable_1: "integrable \\<^sub>1 (f :: _ \ real)" and integrable_2: "integrable \\<^sub>2 (g :: _ \ real)" proof - have fin_omega: "finite (set_pmf (pmf_of_set M\<^sub>1))" using fin_space[OF assms] non_empty_space[OF assms] by auto thus "integrable \\<^sub>1 f" unfolding \\<^sub>1_def by (rule integrable_measure_pmf_finite) have "finite (set_pmf M\<^sub>2)" unfolding M\<^sub>2_def using fin_omega by (subst set_prod_pmf) (auto intro:finite_PiE) thus "integrable \\<^sub>2 g" unfolding \\<^sub>2_def by (intro integrable_measure_pmf_finite) qed lemma sketch_distr: assumes "as \ []" shows "pmf_of_set {.. (\k. return_pmf (sketch as k)) = pmf_of_set M\<^sub>1" proof - have "x < y \ y < length as \ count_list (drop (y+1) as) (as ! y) < count_list (drop (x+1) as) (as ! y)" for x y by (intro count_list_lt_suffix suffix_drop_drop, simp_all) (metis Suc_diff_Suc diff_Suc_Suc diff_add_inverse lessI less_natE) hence a1: "inj_on (sketch as) {k. k < length as}" unfolding sketch_def by (intro inj_onI) (metis Pair_inject mem_Collect_eq nat_neq_iff) have "x < length as \ count_list (drop (x+1) as) (as ! x) < count_list as (as ! x)" for x by (rule count_list_lt_suffix, auto simp add:suffix_drop) hence "sketch as ` {k. k < length as} \ M\<^sub>1" by (intro image_subsetI, simp add:sketch_def M\<^sub>1_def) moreover have "card M\<^sub>1 \ card (sketch as ` {k. k < length as})" by (simp add: card_space[OF assms(1)] card_image[OF a1]) ultimately have "sketch as ` {k. k < length as} = M\<^sub>1" using fin_space[OF assms(1)] by (intro card_seteq, simp_all) hence "bij_betw (sketch as) {k. k < length as} M\<^sub>1" using a1 by (simp add:bij_betw_def) hence "map_pmf (sketch as) (pmf_of_set {k. k < length as}) = pmf_of_set M\<^sub>1" using assms by (intro map_pmf_of_set_bij_betw, auto) thus ?thesis by (simp add: sketch_def map_pmf_def lessThan_def) qed lemma fk_update_distr: "fold (\x s. s \ fk_update x) as (fk_init k \ \ n) = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0))) \ (\x. return_pmf (s\<^sub>1,s\<^sub>2,k, length as, \i\{0..1}\{0..2}. snd (x i)))" proof (induction as rule:rev_induct) case Nil then show ?case by (auto simp:Let_def s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] bind_return_pmf) next case (snoc x xs) have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" by auto have a: "fk_update x (s\<^sub>1, s\<^sub>2, k, length xs, \i\{0..1} \ {0..2}. snd (f i)) = prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ (\a. return_pmf (s\<^sub>1,s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" if b: "f \ set_pmf (prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))))" for f proof - have c:"fst (f i) = length xs" if d:"i \ {0..1} \{0..2}" for i proof (cases "xs = []") case True then show ?thesis using b d by (simp add: set_Pi_pmf) next case False hence "{.. {}" by force thus ?thesis using b d by (simp add:set_Pi_pmf fk_update_2_distr[OF False] PiE_dflt_def) force qed show ?thesis apply (subst fk_update_2_eta, subst fk_update_2.simps, simp) apply (simp add: Pi_pmf_bind_return[where d'="undefined"] bind_assoc_pmf) apply (rule bind_pmf_cong, simp add:c cong:Pi_pmf_cong) by (auto simp add:bind_return_pmf case_prod_beta) qed have "fold (\x s. s \ fk_update x) (xs @ [x]) (fk_init k \ \ n) = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) xs (return_pmf (0,0,0))) \ (\\. return_pmf (s\<^sub>1,s\<^sub>2,k, length xs, \i\{0..1}\{0..2}. snd (\ i)) \ fk_update x)" using snoc by (simp add:restrict_def bind_assoc_pmf del:fk_init.simps) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i))))" using a by (intro bind_pmf_cong, simp_all add:bind_return_pmf del:fk_update.simps) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i))) \ (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" by (simp add:bind_assoc_pmf) also have "... = (prod_pmf ({0..1} \ {0..2}) (\_. fold (\a s. s \ fk_update_2 a) (xs@[x]) (return_pmf (0,0,0))) \ (\a. return_pmf (s\<^sub>1,s\<^sub>2,k, length (xs@[x]), \i\{0..1}\{0..2}. snd (a i))))" by (simp, subst Pi_pmf_bind, auto) finally show ?case by blast qed lemma power_diff_sum: fixes a b :: "'a :: {comm_ring_1,power}" assumes "k > 0" shows "a^k - b^k = (a-b) * (\i = 0.. insert m {Suc m..i. a * (a^i * b^(k-1-i))) {0..i. b * (a^i * b^(k-1-i))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * (b^(1+(k-1-i))))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * b^(k-i))) {0..i. (a^i * b^(k-i))) (insert k {1..i. (a^i * b^(k-i))) (insert 0 {Suc 0.. 0" assumes "(a :: real) \ b" assumes "b \ 0" shows "a^k -b^k \ (a-b) * k * a^(k-1)" proof - have " \i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ i * a ^ (k-1-i)" using assms by (intro mult_left_mono power_mono) auto also have "\i. i < k \ a ^ i * a ^ (k - 1 - i) = a ^ (k - Suc 0)" using assms(1) by (subst power_add[symmetric], simp) finally have a: "\i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ (k - Suc 0)" by blast have "a^k - b^k = (a-b) * (\i = 0.. (a-b) * (\i = 0..Specialization of the Hoelder inquality for sums.\ lemma Holder_inequality_sum: assumes "p > (0::real)" "q > 0" "1/p + 1/q = 1" assumes "finite A" shows "\\x\A. f x * g x\ \ (\x\A. \f x\ powr p) powr (1/p) * (\x\A. \g x\ powr q) powr (1/q)" proof - have "\LINT x|count_space A. f x * g x\ \ (LINT x|count_space A. \f x\ powr p) powr (1 / p) * (LINT x|count_space A. \g x\ powr q) powr (1 / q)" using assms integrable_count_space by (intro Lp.Holder_inequality, auto) thus ?thesis using assms by (simp add: lebesgue_integral_count_space_finite[symmetric]) qed lemma real_count_list_pos: assumes "x \ set as" shows "real (count_list as x) > 0" using count_list_gr_1 assms by force lemma fk_estimate: assumes "as \ []" shows "length as * of_rat (F (2*k-1) as) \ n powr (1 - 1 / real k) * (of_rat (F k as))^2" (is "?lhs \ ?rhs") proof (cases "k \ 2") case True define M where "M = Max (count_list as ` set as)" have "M \ count_list as ` set as" unfolding M_def using assms by (intro Max_in, auto) then obtain m where m_in: "m \ set as" and m_def: "M = count_list as m" by blast have a: "real M > 0" using m_in count_list_gr_1 by (simp add:m_def, force) have b: "2*k-1 = (k-1) + k" by simp have " 0 < real (count_list as m)" using m_in count_list_gr_1 by force hence "M powr k = real (count_list as m) ^ k" by (simp add: powr_realpow m_def) also have "... \ (\x\set as. real (count_list as x) ^ k)" using m_in by (intro member_le_sum, simp_all) also have "... \ real_of_rat (F k as)" by (simp add:F_def of_rat_sum of_rat_power) finally have d: "M powr k \ real_of_rat (F k as)" by simp have e: "0 \ real_of_rat (F k as)" using F_gr_0[OF assms(1)] by (simp add: order_le_less) have "real (k - 1) / real k + 1 = real (k - 1) / real k + real k / real k" using assms True by simp also have "... = real (2 * k - 1) / real k" using b by (subst add_divide_distrib[symmetric], force) finally have f: "real (k - 1) / real k + 1 = real (2 * k - 1) / real k" by blast have "real_of_rat (F (2*k-1) as) = (\x\set as. real (count_list as x) ^ (k - 1) * real (count_list as x) ^ k)" using b by (simp add:F_def of_rat_sum sum_distrib_left of_rat_mult power_add of_rat_power) also have "... \ (\x\set as. real M ^ (k - 1) * real (count_list as x) ^ k)" by (intro sum_mono mult_right_mono power_mono of_nat_mono) (auto simp:M_def) also have "... = M powr (k-1) * of_rat (F k as)" using a by (simp add:sum_distrib_left F_def of_rat_mult of_rat_sum of_rat_power powr_realpow) also have "... = (M powr k) powr (real (k - 1) / real k) * of_rat (F k as) powr 1" using e by (simp add:powr_powr) also have "... \ (real_of_rat (F k as)) powr ((k-1)/k) * (real_of_rat (F k as) powr 1)" using d by (intro mult_right_mono powr_mono2, auto) also have "... = (real_of_rat (F k as)) powr ((2*k-1) / k)" by (subst powr_add[symmetric], subst f, simp) finally have a: "real_of_rat (F (2*k-1) as) \ (real_of_rat (F k as)) powr ((2*k-1) / k)" by blast have g: "card (set as) \ n" using card_mono[OF _ as_range] by simp have "length as = abs (sum (\x. real (count_list as x)) (set as))" by (subst of_nat_sum[symmetric], simp add: sum_count_set) also have "... \ card (set as) powr ((k-Suc 0)/k) * (sum (\x. \real (count_list as x)\ powr k) (set as)) powr (1/k)" using assms True by (intro Holder_inequality_sum[where p="k/(k-1)" and q="k" and f="\_.1", simplified]) (auto simp add:algebra_simps add_divide_distrib[symmetric]) also have "... = (card (set as)) powr ((k-1) / real k) * of_rat (F k as) powr (1/ k)" using real_count_list_pos by (simp add:F_def of_rat_sum of_rat_power powr_realpow) also have "... = (card (set as)) powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" using k_ge_1 by (subst of_nat_diff[OF k_ge_1], subst diff_divide_distrib, simp) also have "... \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" using k_ge_1 g by (intro mult_right_mono powr_mono2, auto) finally have h: "length as \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/real k)" by blast have i:"1 / real k + real (2 * k - 1) / real k = real 2" using True by (subst add_divide_distrib[symmetric], simp_all add:of_nat_diff) have "?lhs \ n powr (1 - 1/k) * of_rat (F k as) powr (1/k) * (of_rat (F k as)) powr ((2*k-1) / k)" using a h F_ge_0 by (intro mult_mono mult_nonneg_nonneg, auto) also have "... = ?rhs" using i F_gr_0[OF assms] by (simp add:powr_add[symmetric] powr_realpow[symmetric]) finally show ?thesis by blast next case False have "n = 0 \ False" using as_range assms by auto hence "n > 0" by auto moreover have "k = 1" using assms k_ge_1 False by linarith moreover have "length as = real_of_rat (F (Suc 0) as)" by (simp add:F_def sum_count_set of_nat_sum[symmetric] del:of_nat_sum) ultimately show ?thesis by (simp add:power2_eq_square) qed definition result where "result a = of_nat (length as) * of_nat (Suc (snd a) ^ k - snd a ^ k)" lemma result_exp_1: assumes "as \ []" shows "expectation result = real_of_rat (F k as)" proof - have "expectation result = (\a\M\<^sub>1. result a * pmf (pmf_of_set M\<^sub>1) a)" unfolding \\<^sub>1_def using non_empty_space assms fin_space by (subst integral_measure_pmf_real) auto also have "... = (\a\M\<^sub>1. result a / real (length as))" using non_empty_space assms fin_space card_space by simp also have "... = (\a\M\<^sub>1. real (Suc (snd a) ^ k - snd a ^ k))" using assms by (simp add:result_def) also have "... = (\u\set as. \v = 0..u\set as. real (count_list as u)^k)" using k_ge_1 by (subst sum_Suc_diff') (auto simp add:zero_power) also have "... = of_rat (F k as)" by (simp add:F_def of_rat_sum of_rat_power) finally show ?thesis by simp qed lemma result_var_1: assumes "as \ []" shows "variance result \ (of_rat (F k as))\<^sup>2 * k * n powr (1 - 1 / real k)" proof - have k_gt_0: "k > 0" using k_ge_1 by linarith have c:"real (Suc v ^ k) - real (v ^ k) \ k * real (count_list as a) ^ (k - Suc 0)" if c_1: "v < count_list as a" for a v proof - have "real (Suc v ^ k) - real (v ^ k) \ (real (v+1) - real v) * k * (1 + real v) ^ (k - Suc 0)" using k_gt_0 power_diff_est[where a="Suc v" and b="v"] by simp moreover have "(real (v+1) - real v) = 1" by auto ultimately have "real (Suc v ^ k) - real (v ^ k) \ k * (1 + real v) ^ (k - Suc 0)" by auto also have "... \ k * real (count_list as a) ^ (k- Suc 0)" using c_1 by (intro mult_left_mono power_mono, auto) finally show ?thesis by blast qed have "length as * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) = length as * (\a\ set as. (\v \ {0.. length as * (\a\ set as. (\v \ {0..a\ set as. real (count_list as a) ^ (k-1) * (\v \ {0..a\ set as. real (count_list as a ^ (2*k-1)))" using assms k_ge_1 by (subst sum_Suc_diff', auto simp: zero_power[OF k_gt_0] mult_2 power_add[symmetric]) also have "... = k * (length as * of_rat (F (2*k-1) as))" by (simp add:sum_distrib_left[symmetric] F_def of_rat_sum of_rat_power) also have "... \ k * (of_rat (F k as)^2 * n powr (1 - 1 / real k))" using fk_estimate[OF assms] by (intro mult_left_mono) (auto simp: mult.commute) finally have b: "real (length as) * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) \ k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" by blast have "expectation (\\. (result \ :: real)^2) - (expectation result)^2 \ expectation (\\. result \^2)" by simp also have "... = (\a\M\<^sub>1. (length as * real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2 * pmf (pmf_of_set M\<^sub>1) a)" using fin_space non_empty_space assms unfolding \\<^sub>1_def result_def by (subst integral_measure_pmf_real[where A="M\<^sub>1"], auto) also have "... = (\a\M\<^sub>1. length as * (real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2)" using assms non_empty_space fin_space by (subst pmf_of_set) (simp_all add:card_space power_mult_distrib power2_eq_square ac_simps) also have "... \ k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" using b by (simp add:sum_distrib_left[symmetric]) also have "... = of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" by (simp add:ac_simps) finally have "expectation (\\. result \^2) - (expectation result)^2 \ of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" by blast thus ?thesis using integrable_1[OF assms] by (simp add:variance_eq) qed theorem fk_alg_sketch: assumes "as \ []" shows "fold (\a state. state \ fk_update a) as (fk_init k \ \ n) = map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" (is "?lhs = ?rhs") proof - have "?lhs = prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0, 0, 0))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst fk_update_distr, simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (length as, sketch as k))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst fk_update_2_distr[OF assms], simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k)) \ (\s. return_pmf (length as, s))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst bind_assoc_pmf, subst bind_return_pmf, simp) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k))) \ (\x. return_pmf (\i \ {0..1} \ {0..2}. (length as, x i))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" by (subst Pi_pmf_bind_return[where d'="undefined"], simp, simp add:restrict_def) also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. (\k. return_pmf (sketch as k))) \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" by (subst bind_assoc_pmf, simp add:bind_return_pmf cong:restrict_cong) also have "... = M\<^sub>2 \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" by (subst sketch_distr[OF assms], simp add:M\<^sub>2_def) also have "... = M\<^sub>2 \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, x))" by (rule bind_pmf_cong, auto simp add:PiE_dflt_def M\<^sub>2_def set_Pi_pmf) also have "... = ?rhs" by (simp add:map_pmf_def) finally show ?thesis by simp qed definition mean_rv where "mean_rv \ i\<^sub>2 = (\i\<^sub>1 = 0..1. result (\ (i\<^sub>1, i\<^sub>2))) / of_nat s\<^sub>1" definition median_rv where "median_rv \ = median s\<^sub>2 (\i\<^sub>2. mean_rv \ i\<^sub>2)" lemma fk_alg_correct': defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" proof (cases "as = []") case True have a: "nat \- (18 * ln (real_of_rat \))\ > 0" using \_range by simp show ?thesis using True \_range by (simp add:F_def M_def bind_return_pmf median_const[OF a] Let_def) next case False have "set as \ {}" using assms False by blast hence n_nonzero: "n > 0" using as_range by fastforce have fk_nonzero: "F k as > 0" using F_gr_0[OF False] by simp have s1_nonzero: "s\<^sub>1 > 0" using \_range k_ge_1 n_nonzero by (simp add:s\<^sub>1_def) have s2_nonzero: "s\<^sub>2 > 0" using \_range by (simp add:s\<^sub>2_def) have real_of_rat_mean_rv: "\x i. mean_rv x = (\i. real_of_rat (mean_rv x i))" by (rule ext, simp add:of_rat_divide of_rat_sum of_rat_mult result_def mean_rv_def) have real_of_rat_median_rv: "\x. median_rv x = real_of_rat (median_rv x)" unfolding median_rv_def using s2_nonzero by (subst real_of_rat_mean_rv, simp add: median_rat median_restrict) have space_\\<^sub>2: "space \\<^sub>2 = UNIV" by (simp add:\\<^sub>2_def) have fk_result_eta: "fk_result = (\(x,y,z,u,v). fk_result (x,y,z,u,v))" by auto have a:"fold (\x state. state \ fk_update x) as (fk_init k \ \ n) = map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" by (subst fk_alg_sketch[OF False]) (simp add:s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric]) have "M = map_pmf (\x. (s\<^sub>1, s\<^sub>2, k, length as, x)) M\<^sub>2 \ fk_result" by (subst M_def, subst a, simp) also have "... = M\<^sub>2 \ return_pmf \ median_rv" by (subst fk_result_eta) (auto simp add:map_pmf_def bind_assoc_pmf bind_return_pmf median_rv_def mean_rv_def comp_def M\<^sub>1_def result_def median_restrict) finally have b: "M = M\<^sub>2 \ return_pmf \ median_rv" by simp have result_exp: "i\<^sub>1 < s\<^sub>1 \ i\<^sub>2 < s\<^sub>2 \ \\<^sub>2.expectation (\x. result (x (i\<^sub>1, i\<^sub>2))) = real_of_rat (F k as)" for i\<^sub>1 i\<^sub>2 unfolding \\<^sub>2_def M\<^sub>2_def using integrable_1[OF False] result_exp_1[OF False] by (subst expectation_Pi_pmf_slice, auto simp:\\<^sub>1_def) have result_var: "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2))) \ of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" if result_var_assms: "i\<^sub>1 < s\<^sub>1" "i\<^sub>2 < s\<^sub>2" for i\<^sub>1 i\<^sub>2 proof - have "3 * real k * n powr (1 - 1 / real k) = (of_rat \)\<^sup>2 * (3 * real k * n powr (1 - 1 / real k) / (of_rat \)\<^sup>2)" using \_range by simp also have "... \ (real_of_rat \)\<^sup>2 * (real s\<^sub>1)" unfolding s\<^sub>1_def by (intro mult_mono of_nat_ceiling, simp_all) finally have f2_var_2: "3 * real k * n powr (1 - 1 / real k) \ (of_rat \)\<^sup>2 * (real s\<^sub>1)" by blast have "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2)) :: real) = variance result" using result_var_assms integrable_1[OF False] unfolding \\<^sub>2_def M\<^sub>2_def \\<^sub>1_def by (subst variance_prod_pmf_slice, auto) also have "... \ of_rat (F k as)^2 * real k * n powr (1 - 1 / real k)" using assms False result_var_1 \\<^sub>1_def by simp also have "... = of_rat (F k as)^2 * (real k * n powr (1 - 1 / real k))" by (simp add:ac_simps) also have "... \ of_rat (F k as)^2 * (of_rat \^2 * (real s\<^sub>1 / 3))" using f2_var_2 by (intro mult_left_mono, auto) also have "... = of_rat (F k as * \)^2 * (real s\<^sub>1 / 3)" by (simp add: of_rat_mult power_mult_distrib) also have "... = of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" by (simp add:ac_simps) finally show ?thesis by simp qed have mean_rv_exp: "\\<^sub>2.expectation (\\. mean_rv \ i) = real_of_rat (F k as)" if mean_rv_exp_assms: "i < s\<^sub>2" for i proof - have "\\<^sub>2.expectation (\\. mean_rv \ i) = \\<^sub>2.expectation (\\. \n = 0..1. result (\ (n, i)) / real s\<^sub>1)" by (simp add:mean_rv_def sum_divide_distrib) also have "... = (\n = 0..1. \\<^sub>2.expectation (\\. result (\ (n, i))) / real s\<^sub>1)" using integrable_2[OF False] by (subst Bochner_Integration.integral_sum, auto) also have "... = of_rat (F k as)" using s1_nonzero mean_rv_exp_assms by (simp add:result_exp) finally show ?thesis by simp qed have mean_rv_var: "\\<^sub>2.variance (\\. mean_rv \ i) \ real_of_rat (\ * F k as)^2/3" if mean_rv_var_assms: "i < s\<^sub>2" for i proof - have a:"\\<^sub>2.indep_vars (\_. borel) (\n x. result (x (n, i)) / real s\<^sub>1) {0..1}" unfolding \\<^sub>2_def M\<^sub>2_def using mean_rv_var_assms by (intro indep_vars_restrict_intro'[where f="fst"], simp, simp add:restrict_dfl_def, simp, simp) have "\\<^sub>2.variance (\\. mean_rv \ i) = \\<^sub>2.variance (\\. \j = 0..1. result (\ (j, i)) / real s\<^sub>1)" by (simp add:mean_rv_def sum_divide_distrib) also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i)) / real s\<^sub>1))" using a integrable_2[OF False] by (subst \\<^sub>2.var_sum_all_indep, auto simp add:\\<^sub>2_def) also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i))) / real s\<^sub>1^2)" using integrable_2[OF False] by (subst \\<^sub>2.variance_divide, auto) also have "... \ (\j = 0..1. ((real_of_rat (\ * F k as))\<^sup>2 * real s\<^sub>1 / 3) / (real s\<^sub>1^2))" using result_var[OF _ mean_rv_var_assms] by (intro sum_mono divide_right_mono, auto) also have "... = real_of_rat (\ * F k as)^2/3" using s1_nonzero by (simp add:algebra_simps power2_eq_square) finally show ?thesis by simp qed have "\\<^sub>2.prob {y. of_rat (\ * F k as) < \mean_rv y i - real_of_rat (F k as)\} \ 1/3" (is "?lhs \ _") if c_assms: "i < s\<^sub>2" for i proof - define a where "a = real_of_rat (\ * F k as)" have c: "0 < a" unfolding a_def using assms \_range fk_nonzero by (metis zero_less_of_rat_iff mult_pos_pos) have "?lhs \ \\<^sub>2.prob {y \ space \\<^sub>2. a \ \mean_rv y i - \\<^sub>2.expectation (\\. mean_rv \ i)\}" by (intro \\<^sub>2.pmf_mono[OF \\<^sub>2_def], simp add:a_def mean_rv_exp[OF c_assms] space_\\<^sub>2) also have "... \ \\<^sub>2.variance (\\. mean_rv \ i)/a^2" by (intro \\<^sub>2.Chebyshev_inequality integrable_2 c False) (simp add:\\<^sub>2_def) also have "... \ 1/3" using c using mean_rv_var[OF c_assms] by (simp add:algebra_simps, simp add:a_def) finally show ?thesis by blast qed moreover have "\\<^sub>2.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" using s1_nonzero unfolding \\<^sub>2_def M\<^sub>2_def by (intro indep_vars_restrict_intro'[where f="snd"] finite_cartesian_product) (simp_all add:mean_rv_def restrict_dfl_def space_\\<^sub>2) moreover have " - (18 * ln (real_of_rat \)) \ real s\<^sub>2" by (simp add:s\<^sub>2_def, linarith) ultimately have "1 - of_rat \ \ \\<^sub>2.prob {y \ space \\<^sub>2. \median s\<^sub>2 (mean_rv y) - real_of_rat (F k as)\ \ of_rat (\ * F k as)}" using \_range by (intro \\<^sub>2.median_bound_2, simp_all add:space_\\<^sub>2) also have "... = \\<^sub>2.prob {y. \median_rv y - real_of_rat (F k as)\ \ real_of_rat (\ * F k as)}" by (simp add:median_rv_def space_\\<^sub>2) also have "... = \\<^sub>2.prob {y. \median_rv y - F k as\ \ \ * F k as}" by (simp add:real_of_rat_median_rv of_rat_less_eq flip: of_rat_diff) also have "... = \

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as)" by (simp add: b comp_def map_pmf_def[symmetric] \\<^sub>2_def) finally show ?thesis by simp qed lemma fk_exact_space_usage': defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" (is "AE \ in M. (_ \ ?rhs)") proof - define H where "H = (if as = [] then return_pmf (\i\ {0..1}\{0..2}. (0,0)) else M\<^sub>2)" have a:"M = map_pmf (\x.(s\<^sub>1,s\<^sub>2,k,length as, x)) H" proof (cases "as \ []") case True then show ?thesis unfolding M_def fk_alg_sketch[OF True] H_def by (simp add:M\<^sub>2_def) next case False then show ?thesis by (simp add:H_def M_def s\<^sub>1_def[symmetric] Let_def s\<^sub>2_def[symmetric] map_pmf_def bind_return_pmf) qed have "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" if b:"y \ set_pmf H" for y proof - have b0:" as \ [] \ y \ {0..1} \ {0..2} \\<^sub>E M\<^sub>1" using b non_empty_space fin_space by (simp add:H_def M\<^sub>2_def set_prod_pmf) have "bit_count ((N\<^sub>e \\<^sub>e N\<^sub>e) (y x)) \ ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1)" (is "_ \ ?rhs1") if b1_assms: "x \ {0..1}\{0..2}" for x proof - have "fst (y x) \ n" proof (cases "as = []") case True then show ?thesis using b b1_assms by (simp add:H_def) next case False hence "1 \ count_list as (fst (y x))" using b0 b1_assms by (simp add:PiE_iff case_prod_beta M\<^sub>1_def, fastforce) hence "fst (y x) \ set as" using count_list_gr_1 by metis then show ?thesis by (meson lessThan_iff less_imp_le_nat subsetD as_range) qed moreover have "snd (y x) \ length as" proof (cases "as = []") case True then show ?thesis using b b1_assms by (simp add:H_def) next case False hence "(y x) \ M\<^sub>1" using b0 b1_assms by auto hence "snd (y x) \ count_list as (fst (y x))" by (simp add:M\<^sub>1_def case_prod_beta) then show ?thesis using count_le_length by (metis order_trans) qed ultimately have "bit_count (N\<^sub>e (fst (y x))) + bit_count (N\<^sub>e (snd (y x))) \ ?rhs1" using exp_golomb_bit_count_est by (intro add_mono, auto) thus ?thesis by (subst dependent_bit_count_2, simp) qed moreover have "y \ extensional ({0..1} \ {0..2})" using b0 b PiE_iff by (cases "as = []", auto simp:H_def PiE_iff) ultimately have "bit_count ((List.product [0..1] [0..2] \\<^sub>e N\<^sub>e \\<^sub>e N\<^sub>e) y) \ ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1))" by (intro fun_bit_count_est[where xs="(List.product [0..1] [0..2])", simplified], auto) hence "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + (ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + (ereal (2 * log 2 (real k + 1) + 1) + (ereal (2 * log 2 (real (length as) + 1) + 1) + (ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n+1) + 1) + ereal (2 * log 2 (real (length as)+1) + 1))))))" unfolding encode_fk_state_def dependent_bit_count by (intro add_mono exp_golomb_bit_count, auto) also have "... \ ?rhs" by (simp add: s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) (simp add:ac_simps) finally show "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" by blast qed thus ?thesis by (simp add: a AE_measure_pmf_iff del:fk_space_usage.simps) qed end text \Main results of this section:\ theorem fk_alg_correct: assumes "k \ 1" assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" unfolding M_def using fk_alg_correct'[OF assms(1-4)] by blast theorem fk_exact_space_usage: assumes "k \ 1" assumes "\ \ {0<..<1}" assumes "\ > 0" assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" unfolding M_def using fk_exact_space_usage'[OF assms(1-4)] by blast -theorem fk_asympotic_space_complexity: +theorem fk_asymptotic_space_complexity: "fk_space_usage \ O[at_top \\<^sub>F at_top \\<^sub>F at_top \\<^sub>F at_right (0::rat) \\<^sub>F at_right (0::rat)](\ (k, n, m, \, \). real k * real n powr (1-1/ real k) / (of_rat \)\<^sup>2 * (ln (1 / of_rat \)) * (ln (real n) + ln (real m)))" (is "_ \ O[?F](?rhs)") proof - define k_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "k_of = (\(k, n, m, \, \). k)" define n_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "n_of = (\(k, n, m, \, \). n)" define m_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "m_of = (\(k, n, m, \, \). m)" define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" define g1 where "g1 = (\x. real (k_of x)*(real (n_of x)) powr (1-1/ real (k_of x)) * (1 / of_rat (\_of x)^2))" define g where "g = (\x. g1 x * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" define s1_of where "s1_of = (\x. nat \3 * real (k_of x) * real (n_of x) powr (1 - 1 / real (k_of x)) / (real_of_rat (\_of x))\<^sup>2\)" define s2_of where "s2_of = (\x. nat \- (18 * ln (real_of_rat (\_of x)))\)" have evt: "(\x. 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ real (n_of x) \ n \ real (k_of x) \ k \ real (m_of x) \ m\ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") for \ \ n k m P apply (rule eventually_mono[where P="?prem" and Q="P"]) apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def k_of_def m_of_def) apply (intro eventually_conj eventually_prod1' eventually_prod2' sequentially_inf eventually_at_right_less inv_at_right_0_inf) by (auto simp add:prod_filter_eq_bot) have 1: "(\_. 1) \ O[?F](\x. real (n_of x))" "(\_. 1) \ O[?F](\x. real (m_of x))" "(\_. 1) \ O[?F](\x. real (k_of x))" by (intro landau_o.big_mono eventually_mono[OF evt], auto)+ have "(\x. ln (real (m_of x) + 1)) \ O[?F](\x. ln (real (m_of x)))" by (intro landau_ln_2[where a="2"] evt[where m="2"] sum_in_bigo 1, auto) hence 2: " (\x. log 2 (real (m_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_2 eventually_mono[OF evt[where n="1" and m="1"]]) (auto simp add:log_def) have 3: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" using order_less_le_trans[OF exp_gt_zero] ln_ge_iff by (intro landau_o.big_mono evt[where \="exp 1"]) (simp add: abs_ge_iff, blast) have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" using one_le_power by (intro landau_o.big_mono evt[where \="1"]) (simp add:power_one_over[symmetric], blast) have "(\x. 1) \ O[?F](\x. ln (real (n_of x)))" using order_less_le_trans[OF exp_gt_zero] ln_ge_iff by (intro landau_o.big_mono evt[where n="exp 1"]) (simp add: abs_ge_iff, blast) hence 5: "(\x. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="1" and m="1"], auto) have "(\x. -ln(of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" by (intro landau_o.big_mono evt) (auto simp add:ln_div) hence 6: "(\x. real (s2_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" unfolding s2_of_def by (intro landau_nat_ceil 3, simp) have 7: "(\_. 1) \ O[?F](\x. real (n_of x) powr (1 - 1 / real (k_of x)))" by (intro landau_o.big_mono evt[where n="1" and k="1"]) (auto simp add: ge_one_powr_ge_zero) have 8: "(\_. 1) \ O[?F](g1)" unfolding g1_def by (intro landau_o.big_mult_1 1 7 4) have "(\x. 3 * (real (k_of x) * (n_of x) powr (1 - 1 / real (k_of x)) / (of_rat (\_of x))\<^sup>2)) \ O[?F](g1)" by (subst landau_o.big.cmult_in_iff, simp, simp add:g1_def) hence 9: "(\x. real (s1_of x)) \ O[?F](g1)" unfolding s1_of_def by (intro landau_nat_ceil 8, auto simp:ac_simps) have 10: "(\_. 1) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 8 3 5) have "(\x. real (s1_of x)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 5 3 9) hence "(\x. ln (real (s1_of x) + 1)) \ O[?F](g)" using 10 by (intro landau_ln_3 sum_in_bigo, auto) hence 11: "(\x. log 2 (real (s1_of x) + 1)) \ O[?F](g)" by (simp add:log_def) have 12: " (\x. ln (real (s2_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" using evt[where \="2"] 6 3 by (intro landau_ln_3 sum_in_bigo, auto) have 13: "(\x. log 2 (real (s2_of x) + 1)) \ O[?F](g)" unfolding g_def by (rule landau_o.big_mult_1, rule landau_o.big_mult_1', auto simp add: 8 5 12 log_def) have "(\x. real (k_of x)) \ O[?F](g1)" unfolding g1_def using 7 4 by (intro landau_o.big_mult_1, simp_all) hence "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g1)" by (simp add:log_def) (intro landau_ln_3 sum_in_bigo 8, auto) hence 14: "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g)" unfolding g_def by (intro landau_o.big_mult_1 3 5) have 15: "(\x. log 2 (real (m_of x) + 1)) \ O[?F](g)" unfolding g_def using 2 8 3 by (intro landau_o.big_mult_1', simp_all) have "(\x. ln (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)))" by (intro landau_ln_2[where a="2"] eventually_mono[OF evt[where n="2"]] sum_in_bigo 1, auto) hence " (\x. log 2 (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" by (intro landau_sum_1 evt[where n="1" and m="1"]) (auto simp add:log_def) hence 16: "(\x. real (s1_of x) * real (s2_of x) * (2 + 2 * log 2 (real (n_of x) + 1) + 2 * log 2 (real (m_of x) + 1))) \ O[?F](g)" unfolding g_def using 9 6 5 2 by (intro landau_o.mult sum_in_bigo, auto) have "fk_space_usage = (\x. fk_space_usage (k_of x, n_of x, m_of x, \_of x, \_of x))" by (simp add:case_prod_beta' k_of_def n_of_def \_of_def \_of_def m_of_def) also have "... \ O[?F](g)" using 10 11 13 14 15 16 by (simp add:fun_cong[OF s1_of_def[symmetric]] fun_cong[OF s2_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto) also have "... = O[?F](?rhs)" by (simp add:case_prod_beta' g1_def g_def n_of_def \_of_def \_of_def m_of_def k_of_def) finally show ?thesis by simp qed end diff --git a/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy b/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy --- a/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy +++ b/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy @@ -1,500 +1,491 @@ section \Preliminary Results\ theory Frequency_Moments_Preliminary_Results imports "HOL.Transcendental" "HOL-Computational_Algebra.Primes" "HOL-Library.Extended_Real" "HOL-Library.Multiset" "HOL-Library.Sublist" Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators Bertrands_Postulate.Bertrand begin text \This section contains various preliminary results.\ lemma card_ordered_pairs: fixes M :: "('a ::linorder) set" assumes "finite M" shows "2 * card {(x,y) \ M \ M. x < y} = card M * (card M - 1)" proof - have a: "finite (M \ M)" using assms by simp have inj_swap: "inj (\x. (snd x, fst x))" by (rule inj_onI, simp add: prod_eq_iff) have "2 * card {(x,y) \ M \ M. x < y} = card {(x,y) \ M \ M. x < y} + card ((\x. (snd x, fst x))`{(x,y) \ M \ M. x < y})" by (simp add: card_image[OF inj_on_subset[OF inj_swap]]) also have "... = card {(x,y) \ M \ M. x < y} + card {(x,y) \ M \ M. y < x}" by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff) also have "... = card ({(x,y) \ M \ M. x < y} \ {(x,y) \ M \ M. y < x})" by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M \ M"] subsetI) auto also have "... = card ((M \ M) - {(x,y) \ M \ M. x = y})" by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff) also have "... = card (M \ M) - card {(x,y) \ M \ M. x = y}" by (intro card_Diff_subset a finite_subset[where B="M \ M"] subsetI) auto also have "... = card M ^ 2 - card ((\x. (x,x)) ` M)" using assms by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"]) (auto simp:power2_eq_square set_eq_iff image_iff) also have "... = card M ^ 2 - card M" by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto) also have "... = card M * (card M - 1)" by (cases "card M \ 0", auto simp:power2_eq_square algebra_simps) finally show ?thesis by simp qed lemma ereal_mono: "x \ y \ ereal x \ ereal y" by simp lemma log_mono: "a > 1 \ x \ y \ 0 < x \ log a x \ log a y" by (subst log_le_cancel_iff, auto) lemma abs_ge_iff: "((x::real) \ abs y) = (x \ y \ x \ -y)" by linarith lemma count_list_gr_1: "(x \ set xs) = (count_list xs x \ 1)" by (induction xs, simp, simp) lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v" by (induction xs, simp, simp) lemma count_list_lt_suffix: assumes "suffix a b" assumes "x \ {b ! i| i. i < length b - length a}" shows "count_list a x < count_list b x" proof - have "length a \ length b" using assms(1) by (simp add: suffix_length_le) hence "x \ set (nths b {i. i < length b - length a})" using assms diff_commute by (auto simp add:set_nths) hence a:"x \ set (take (length b - length a) b)" by (subst (asm) lessThan_def[symmetric], simp) have "b = (take (length b - length a) b)@drop (length b - length a) b" by simp also have "... = (take (length b - length a) b)@a" using assms(1) suffix_take by auto finally have b:"b = (take (length b - length a) b)@a" by simp have "count_list a x < 1 + count_list a x" by simp also have "... \ count_list (take (length b - length a) b) x + count_list a x" using a count_list_gr_1 by (intro add_mono, fast, simp) also have "... = count_list b x" using b count_list_append by metis finally show ?thesis by simp qed lemma suffix_drop_drop: assumes "x \ y" shows "suffix (drop x a) (drop y a)" proof - have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)" by (subst append_take_drop_id, simp) also have " ... = take (x-y) (drop y a)@drop x a" using assms by simp finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp thus ?thesis by (auto simp add:suffix_def) qed lemma count_list_card: "count_list xs x = card {k. k < length xs \ xs ! k = x}" proof - have "count_list xs x = length (filter ((=) x) xs)" by (induction xs, simp, simp) also have "... = card {k. k < length xs \ xs ! k = x}" by (subst length_filter_conv_card, metis) finally show ?thesis by simp qed lemma card_gr_1_iff: assumes "finite S" "x \ S" "y \ S" "x \ y" shows "card S > 1" using assms card_le_Suc0_iff_eq leI by auto lemma count_list_ge_2_iff: assumes "y < z" assumes "z < length xs" assumes "xs ! y = xs ! z" shows "count_list xs (xs ! y) > 1" proof - have " 1 < card {k. k < length xs \ xs ! k = xs ! y}" using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto) thus ?thesis by (simp add: count_list_card) qed text \Results about multisets and sorting\ text \This is a induction scheme over the distinct elements of a multisets: We can represent each multiset as a sum like: @{text "replicate_mset n\<^sub>1 x\<^sub>1 + replicate_mset n\<^sub>2 x\<^sub>2 + ... + replicate_mset n\<^sub>k x\<^sub>k"} where the @{term "x\<^sub>i"} are distinct.\ lemma disj_induct_mset: assumes "P {#}" assumes "\n M x. P M \ \(x \# M) \ n > 0 \ P (M + replicate_mset n x)" shows "P M" proof (induction "size M" arbitrary: M rule:nat_less_induct) case 1 show ?case proof (cases "M = {#}") case True then show ?thesis using assms by simp next case False then obtain x where x_def: "x \# M" using multiset_nonemptyE by auto define M1 where "M1 = M - replicate_mset (count M x) x" then have M_def: "M = M1 + replicate_mset (count M x) x" by (metis count_le_replicate_mset_subset_eq dual_order.refl subset_mset.diff_add) have "size M1 < size M" by (metis M_def x_def count_greater_zero_iff less_add_same_cancel1 size_replicate_mset size_union) hence "P M1" using 1 by blast then show "P M" apply (subst M_def, rule assms(2), simp) by (simp add:M1_def x_def count_eq_zero_iff[symmetric])+ qed qed lemma prod_mset_conv: fixes f :: "'a \ 'b::{comm_monoid_mult}" shows "prod_mset (image_mset f A) = prod (\x. f x^(count A x)) (set_mset A)" proof (induction A rule: disj_induct_mset) case 1 then show ?case by simp next case (2 n M x) moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff) moreover have "\y. y \ set_mset M \ y \ x" using 2 by blast ultimately show ?case by (simp add:algebra_simps) qed -lemma sum_collapse: - fixes f :: "'a \ 'b::{comm_monoid_add}" - assumes "finite A" - assumes "z \ A" - assumes "\y. y \ A \ y \ z \ f y = 0" - shows "sum f A = f z" - using sum.union_disjoint[where A="A-{z}" and B="{z}" and g="f"] - by (simp add: assms sum.insert_if) - text \There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work if the function maps into the reals.\ lemma sum_list_eval: fixes f :: "'a \ 'b::{ring,semiring_1}" shows "sum_list (map f xs) = (\x \ set xs. of_nat (count_list xs x) * f x)" proof - define M where "M = mset xs" have "sum_mset (image_mset f M) = (\x \ set_mset M. of_nat (count M x) * f x)" proof (induction "M" rule:disj_induct_mset) case 1 then show ?case by simp next case (2 n M x) have a:"\y. y \ set_mset M \ y \ x" using 2(2) by blast show ?case using 2 by (simp add:a count_eq_zero_iff[symmetric]) qed moreover have "\x. count_list xs x = count (mset xs) x" by (induction xs, simp, simp) ultimately show ?thesis by (simp add:M_def sum_mset_sum_list[symmetric]) qed lemma prod_list_eval: fixes f :: "'a \ 'b::{ring,semiring_1,comm_monoid_mult}" shows "prod_list (map f xs) = (\x \ set xs. (f x)^(count_list xs x))" proof - define M where "M = mset xs" have "prod_mset (image_mset f M) = (\x \ set_mset M. f x ^ (count M x))" proof (induction "M" rule:disj_induct_mset) case 1 then show ?case by simp next case (2 n M x) have a:"\y. y \ set_mset M \ y \ x" using 2(2) by blast have b:"count M x = 0" using 2 by (subst count_eq_zero_iff) blast show ?case using 2 by (simp add:a b mult.commute) qed moreover have "\x. count_list xs x = count (mset xs) x" by (induction xs, simp, simp) ultimately show ?thesis by (simp add:M_def prod_mset_prod_list[symmetric]) qed lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)" by (induction M, auto simp:sorted_insort) lemma count_mset: "count (mset xs) a = count_list xs a" by (induction xs, auto) lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g \ f) A)" by (induction A, auto) lemma list_eq_iff: assumes "mset xs = mset ys" assumes "sorted xs" assumes "sorted ys" shows "xs = ys" using assms properties_for_sort by blast lemma sorted_list_of_multiset_image_commute: assumes "mono f" shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)" proof - have "sorted (sorted_list_of_multiset (image_mset f M))" by (simp add:sorted_sorted_list_of_multiset) moreover have " sorted_wrt (\x y. f x \ f y) (sorted_list_of_multiset M)" by (rule sorted_wrt_mono_rel[where P="\x y. x \ y"]) (auto intro: monoD[OF assms] sorted_sorted_list_of_multiset) hence "sorted (map f (sorted_list_of_multiset M))" by (subst sorted_wrt_map) ultimately show ?thesis by (intro list_eq_iff, auto) qed text \Results about rounding and floating point numbers\ lemma round_down_ge: "x \ round_down prec x + 2 powr (-prec)" using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq) lemma truncate_down_ge: "x \ truncate_down prec x + abs x * 2 powr (-prec)" proof (cases "abs x > 0") case True have "x \ round_down (int prec - \log 2 \x\\) x + 2 powr (-real_of_int(int prec - \log 2 \x\\))" by (rule round_down_ge) also have "... \ truncate_down prec x + 2 powr ( \log 2 \x\\) * 2 powr (-real prec)" by (rule add_mono, simp_all add:powr_add[symmetric] truncate_down_def) also have "... \ truncate_down prec x + \x\ * 2 powr (-real prec)" using True by (intro add_mono mult_right_mono, simp_all add:le_log_iff[symmetric]) finally show ?thesis by simp next case False then show ?thesis by simp qed lemma truncate_down_pos: assumes "x \ 0" shows "x * (1 - 2 powr (-prec)) \ truncate_down prec x" by (simp add:right_diff_distrib diff_le_eq) (metis truncate_down_ge assms abs_of_nonneg) lemma truncate_down_eq: assumes "truncate_down r x = truncate_down r y" shows "abs (x-y) \ max (abs x) (abs y) * 2 powr (-real r)" proof - have "x - y \ truncate_down r x + abs x * 2 powr (-real r) - y" by (rule diff_right_mono, rule truncate_down_ge) also have "... \ y + abs x * 2 powr (-real r) - y" using truncate_down_le by (intro diff_right_mono add_mono, subst assms(1), simp_all) also have "... \ abs x * 2 powr (-real r)" by simp also have "... \ max (abs x) (abs y) * 2 powr (-real r)" by simp finally have a:"x - y \ max (abs x) (abs y) * 2 powr (-real r)" by simp have "y - x \ truncate_down r y + abs y * 2 powr (-real r) - x" by (rule diff_right_mono, rule truncate_down_ge) also have "... \ x + abs y * 2 powr (-real r) - x" using truncate_down_le by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto) also have "... \ abs y * 2 powr (-real r)" by simp also have "... \ max (abs x) (abs y) * 2 powr (-real r)" by simp finally have b:"y - x \ max (abs x) (abs y) * 2 powr (-real r)" by simp show ?thesis using abs_le_iff a b by linarith qed definition rat_of_float :: "float \ rat" where "rat_of_float f = of_int (mantissa f) * (if exponent f \ 0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))" lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x" proof - have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))" by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide) also have "... = real_of_float x" using mantissa_exponent by simp finally show ?thesis by simp qed lemma log_est: "log 2 (real n + 1) \ n" proof - have "1 + real n = real (n + 1)" by simp also have "... \ real (2 ^ n)" by (intro of_nat_mono suc_n_le_2_pow_n) also have "... = 2 powr (real n)" by (simp add:powr_realpow) finally have "1 + real n \ 2 powr (real n)" by simp thus ?thesis by (simp add: Transcendental.log_le_iff) qed lemma truncate_mantissa_bound: "abs (\x * 2 powr (real r - real_of_int \log 2 \x\\)\) \ 2 ^ (r+1)" (is "?lhs \ _") proof - define q where "q = \x * 2 powr (real r - real_of_int (\log 2 \x\\))\" have "abs q \ 2 ^ (r + 1)" if a:"x > 0" proof - have "abs q = q" using a by (intro abs_of_nonneg, simp add:q_def) also have "... \ x * 2 powr (real r - real_of_int \log 2 \x\\)" unfolding q_def using of_int_floor_le by blast also have "... = x * 2 powr real_of_int (int r - \log 2 \x\\)" by auto also have "... = 2 powr (log 2 x + real_of_int (int r - \log 2 \x\\))" using a by (simp add:powr_add) also have "... \ 2 powr (real r + 1)" using a by (intro powr_mono, linarith+) also have "... = 2 ^ (r+1)" by (subst powr_realpow[symmetric], simp_all add:add.commute) finally show "abs q \ 2 ^ (r+1)" by (metis of_int_le_iff of_int_numeral of_int_power) qed moreover have "abs q \ (2 ^ (r + 1))" if a: "x < 0" proof - have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)" by (subst powr_realpow[symmetric], simp_all add: add.commute) also have "... < -(2 powr (log 2 (- x) + (r - \log 2 \x\\)) + 1)" using a by (simp, linarith) also have "... = x * 2 powr (r - \log 2 \x\\) - 1" using a by (simp add:powr_add) also have "... \ q" by (simp add:q_def) also have "... = - abs q" using a by (subst abs_of_neg, simp_all add: mult_pos_neg2 q_def) finally have "-(2 ^ (r+1)+1) < - abs q" using of_int_less_iff by fastforce hence "-(2 ^ (r+1)) \ - abs q" by linarith thus "abs q \ 2^(r+1)" by linarith qed moreover have "x = 0 \ abs q \ 2^(r+1)" by (simp add:q_def) ultimately have "abs q \ 2^(r+1)" by fastforce thus ?thesis using q_def by blast qed lemma truncate_float_bit_count: "bit_count (F\<^sub>e (float_of (truncate_down r x))) \ 10 + 4 * real r + 2*log 2 (2 + \log 2 \x\\)" (is "?lhs \ ?rhs") proof - define m where "m = \x * 2 powr (real r - real_of_int \log 2 \x\\)\" define e where "e = \log 2 \x\\ - int r" have a: "(real_of_int \log 2 \x\\ - real r) = e" by (simp add:e_def) have "abs m + 2 \ 2 ^ (r + 1) + 2^1" using truncate_mantissa_bound by (intro add_mono, simp_all add:m_def) also have "... \ 2 ^ (r+2)" by simp finally have b:"abs m + 2 \ 2 ^ (r+2)" by simp hence "real_of_int (\m\ + 2) \ real_of_int (4 * 2 ^ r)" by (subst of_int_le_iff, simp) hence "\real_of_int m\ + 2 \ 4 * 2 ^ r" by simp hence c:"log 2 (real_of_int (\m\ + 2)) \ r+2" by (simp add: Transcendental.log_le_iff powr_add powr_realpow) have "real_of_int (abs e + 1) \ real_of_int \\log 2 \x\\\ + real_of_int r + 1" by (simp add:e_def) also have "... \ 1 + abs (log 2 (abs x)) + real_of_int r + 1" by (simp add:abs_le_iff, linarith) also have "... \ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by (simp add:distrib_left distrib_right) finally have d:"real_of_int (abs e + 1) \ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp have "log 2 (real_of_int (abs e + 1)) \ log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))" using d by (simp add: log_mult[symmetric]) also have "... \ r + log 2 (2 + abs (log 2 (abs x)))" using log_est by (intro add_mono, simp_all add:add.commute) finally have e: "log 2 (real_of_int (abs e + 1)) \ r + log 2 (2 + abs (log 2 (abs x)))" by simp have "?lhs = bit_count (F\<^sub>e (float_of (real_of_int m * 2 powr real_of_int e)))" by (simp add:truncate_down_def round_down_def m_def[symmetric] a) also have "... \ ereal (6 + (2 * log 2 (real_of_int (\m\ + 2)) + 2 * log 2 (real_of_int (\e\ + 1))))" using float_bit_count_2 by simp also have "... \ ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))" using c e by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+) also have "... = ?rhs" by simp finally show ?thesis by simp qed definition prime_above :: "nat \ nat" where "prime_above n = (SOME x. x \ {n..(2*n+2)} \ prime x)" text \The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}. Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to replace this with an algorithm, that finds such a prime exactly or approximately. The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the high-level mathematical correctness proof.\ lemma ex_subset: assumes "\x \ A. P x" assumes "A \ B" shows "\x \ B. P x" using assms by auto lemma shows prime_above_prime: "prime (prime_above n)" and prime_above_range: "prime_above n \ {n..(2*n+2)}" proof - define r where "r = (\x. x \ {n..(2*n+2)} \ prime x)" have "\x. r x" proof (cases "n>2") case True hence "n-1 > 1" by simp hence "\x \ {(n-1)<..<(2*(n-1))}. prime x" using bertrand by simp moreover have "{n - 1<..<2 * (n - 1)} \ {n..2 * n + 2}" by (intro subsetI, auto) ultimately have "\x \ {n..(2*n+2)}. prime x" by (rule ex_subset) then show ?thesis by (simp add:r_def Bex_def) next case False hence "2 \ {n..(2*n+2)}" by simp moreover have "prime (2::nat)" using two_is_prime_nat by blast ultimately have "r 2" using r_def by simp then show ?thesis by (rule exI) qed moreover have "prime_above n = (SOME x. r x)" by (simp add:prime_above_def r_def) ultimately have a:"r (prime_above n)" using someI_ex by metis show "prime (prime_above n)" using a unfolding r_def by blast show "prime_above n \ {n..(2*n+2)}" using a unfolding r_def by blast qed lemma prime_above_min: "prime_above n \ 2" using prime_above_prime by (simp add: prime_ge_2_nat) lemma prime_above_lower_bound: "prime_above n \ n" using prime_above_range by simp lemma prime_above_upper_bound: "prime_above n \ 2*n+2" using prime_above_range by simp end