diff --git a/thys/Frequency_Moments/Frequency_Moment_0.thy b/thys/Frequency_Moments/Frequency_Moment_0.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Frequency_Moment_0.thy @@ -0,0 +1,1314 @@ +section \Frequency Moment $0$\label{sec:f0}\ + +theory Frequency_Moment_0 + imports + Frequency_Moments_Preliminary_Results + Median_Method.Median + K_Smallest + Universal_Hash_Families.Carter_Wegman_Hash_Family + Frequency_Moments + Landau_Ext + Product_PMF_Ext + Universal_Hash_Families.Field +begin + +text \This section contains a formalization of a new algorithm for the zero-th frequency moment +inspired by ideas described in \cite{baryossed2002}. +It is a KMV-type ($k$-minimum value) algorithm with a rounding method and matches the space complexity +of the best algorithm described in \cite{baryossef2002}. + +In addition to the Isabelle proof here, there is also an informal hand-written proof in +Appendix~\ref{sec:f0_proof}.\ + +type_synonym f0_state = "nat \ nat \ nat \ nat \ (nat \ nat list) \ (nat \ float set)" + +definition hash where "hash p = ring.hash (mod_ring p)" + +fun f0_init :: "rat \ rat \ nat \ f0_state pmf" where + "f0_init \ \ n = + do { + let s = nat \-18 * ln (real_of_rat \)\; + let t = nat \80 / (real_of_rat \)\<^sup>2\; + let p = prime_above (max n 19); + let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23); + h \ prod_pmf {.._. pmf_of_set (bounded_degree_polynomials (mod_ring p) 2)); + return_pmf (s, t, p, r, h, (\_ \ {0.. f0_state \ f0_state pmf" where + "f0_update x (s, t, p, r, h, sketch) = + return_pmf (s, t, p, r, h, \i \ {.. rat pmf" where + "f0_result (s, t, p, r, h, sketch) = return_pmf (median s (\i \ {.. rat \ rat) \ real" where + "f0_space_usage (n, \, \) = ( + let s = nat \-18 * ln (real_of_rat \)\ in + let r = nat (4 * \log 2 (1 / real_of_rat \)\ + 23) in + let t = nat \80 / (real_of_rat \)\<^sup>2 \ in + 6 + + 2 * log 2 (real s + 1) + + 2 * log 2 (real t + 1) + + 2 * log 2 (real n + 21) + + 2 * log 2 (real r + 1) + + real s * (5 + 2 * log 2 (21 + real n) + + real t * (13 + 4 * r + 2 * log 2 (log 2 (real n + 13)))))" + +definition encode_f0_state :: "f0_state \ bool list option" where + "encode_f0_state = + N\<^sub>e \\<^sub>e (\s. + N\<^sub>e \\<^sub>e ( + N\<^sub>e \\<^sub>e (\p. + N\<^sub>e \\<^sub>e ( + ([0..\<^sub>e (P\<^sub>e p 2)) \\<^sub>e + ([0..\<^sub>e (S\<^sub>e F\<^sub>e))))))" + +lemma "inj_on encode_f0_state (dom encode_f0_state)" +proof - + have "is_encoding encode_f0_state" + unfolding encode_f0_state_def + by (intro dependent_encoding exp_golomb_encoding poly_encoding fun_encoding set_encoding float_encoding) + thus ?thesis by (rule encoding_imp_inj) +qed + +context + fixes \ \ :: rat + fixes n :: nat + fixes as :: "nat list" + fixes result + assumes \_range: "\ \ {0<..<1}" + assumes \_range: "\ \ {0<..<1}" + assumes as_range: "set as \ {.. fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" +begin + +private definition t where "t = nat \80 / (real_of_rat \)\<^sup>2\" +private lemma t_gt_0: "t > 0" using \_range by (simp add:t_def) + +private definition s where "s = nat \-(18 * ln (real_of_rat \))\" +private lemma s_gt_0: "s > 0" using \_range by (simp add:s_def) + +private definition p where "p = prime_above (max n 19)" + +private lemma p_prime:"Factorial_Ring.prime p" + using p_def prime_above_prime by presburger + +private lemma p_ge_18: "p \ 18" +proof - + have "p \ 19" + by (metis p_def prime_above_lower_bound max.bounded_iff) + thus ?thesis by simp +qed + +private lemma p_gt_0: "p > 0" using p_ge_18 by simp +private lemma p_gt_1: "p > 1" using p_ge_18 by simp + +private lemma n_le_p: "n \ p" +proof - + have "n \ max n 19" by simp + also have "... \ p" + unfolding p_def by (rule prime_above_lower_bound) + finally show ?thesis by simp +qed + +private lemma p_le_n: "p \ 2*n + 40" +proof - + have "p \ 2 * (max n 19) + 2" + by (subst p_def, rule prime_above_upper_bound) + also have "... \ 2 * n + 40" + by (cases "n \ 19", auto) + finally show ?thesis by simp +qed + +private lemma as_lt_p: "\x. x \ set as \ x < p" + using as_range atLeastLessThan_iff + by (intro order_less_le_trans[OF _ n_le_p]) blast + +private lemma as_subset_p: "set as \ {..log 2 (1 / real_of_rat \)\ + 23)" + +private lemma r_bound: "4 * log 2 (1 / real_of_rat \) + 23 \ r" +proof - + have "0 \ log 2 (1 / real_of_rat \)" using \_range by simp + hence "0 \ \log 2 (1 / real_of_rat \)\" by simp + hence "0 \ 4 * \log 2 (1 / real_of_rat \)\ + 23" + by (intro add_nonneg_nonneg mult_nonneg_nonneg, auto) + thus ?thesis by (simp add:r_def) +qed + +private lemma r_ge_23: "r \ 23" +proof - + have "(23::real) = 0 + 23" by simp + also have "... \ 4 * log 2 (1 / real_of_rat \) + 23" + using \_range by (intro add_mono mult_nonneg_nonneg, auto) + also have "... \ r" using r_bound by simp + finally show "23 \ r" by simp +qed + +private lemma two_pow_r_le_1: "0 < 1 - 2 powr - real r" +proof - + have a: "2 powr (0::real) = 1" + by simp + show ?thesis using r_ge_23 + by (simp, subst a[symmetric], intro powr_less_mono, auto) +qed + +interpretation carter_wegman_hash_family "mod_ring p" 2 + rewrites "ring.hash (mod_ring p) = Frequency_Moment_0.hash p" + using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] + using hash_def p_prime by auto + +private definition tr_hash where "tr_hash x \ = truncate_down r (hash x \)" + +private definition sketch_rv where + "sketch_rv \ = least t ((\x. float_of (tr_hash x \)) ` set as)" + +private definition estimate + where "estimate S = (if card S < t then of_nat (card S) else of_nat t * of_nat p / rat_of_float (Max S))" + +private definition sketch_rv' where "sketch_rv' \ = least t ((\x. tr_hash x \) ` set as)" +private definition estimate' where "estimate' S = (if card S < t then real (card S) else real t * real p / Max S)" + +private definition \\<^sub>0 where "\\<^sub>0 = prod_pmf {.._. pmf_of_set space)" + +private lemma f0_alg_sketch: + defines "sketch \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" + shows "sketch = map_pmf (\x. (s,t,p,r, x, \i \ {..\<^sub>0" + unfolding sketch_rv_def +proof (subst sketch_def, induction as rule:rev_induct) + case Nil + then show ?case + by (simp add:s_def p_def[symmetric] map_pmf_def t_def r_def Let_def least_def restrict_def space_def \\<^sub>0_def) +next + case (snoc x xs) + let ?sketch = "\\ xs. least t ((\a. float_of (tr_hash a \)) ` set xs)" + have "fold (\a state. state \ f0_update a) (xs @ [x]) (f0_init \ \ n) = + (map_pmf (\\. (s, t, p, r, \, \i \ {.. i) xs)) \\<^sub>0) \ f0_update x" + by (simp add: restrict_def snoc del:f0_init.simps) + also have "... = \\<^sub>0 \ (\\. f0_update x (s, t, p, r, \, \i\{.. i) xs)) " + by (simp add:map_pmf_def bind_assoc_pmf bind_return_pmf del:f0_update.simps) + also have "... = map_pmf (\\. (s, t, p, r, \, \i\{.. i) (xs@[x]))) \\<^sub>0" + by (simp add:least_insert map_pmf_def tr_hash_def cong:restrict_cong) + finally show ?case by blast +qed + +private lemma card_nat_in_ball: + fixes x :: nat + fixes q :: real + assumes "q \ 0" + defines "A \ {k. abs (real x - real k) \ q \ k \ x}" + shows "real (card A) \ 2 * q" and "finite A" +proof - + have a: "of_nat x \ {\real x-q\..\real x+q\}" + using assms + by (simp add: ceiling_le_iff) + + have "card A = card (int ` A)" + by (rule card_image[symmetric], simp) + also have "... \ card ({\real x-q\..\real x+q\} - {of_nat x})" + by (intro card_mono image_subsetI, simp_all add:A_def abs_le_iff, linarith) + also have "... = card {\real x-q\..\real x+q\} - 1" + by (rule card_Diff_singleton, rule a) + also have "... = int (card {\real x-q\..\real x+q\}) - int 1" + by (intro of_nat_diff) + (metis a card_0_eq empty_iff finite_atLeastAtMost_int less_one linorder_not_le) + also have "... \ \q+real x\+1 -\real x-q\ - 1" + using assms by (simp, linarith) + also have "... \ 2*q" + by linarith + finally show "card A \ 2 * q" + by simp + + have "A \ {..x + nat \q\}" + by (rule subsetI, simp add:A_def abs_le_iff, linarith) + thus "finite A" + by (rule finite_subset, simp) +qed + +private lemma prob_degree_lt_1: + "prob {\. degree \ < 1} \ 1/real p" +proof - + have "space \ {\. length \ \ Suc 0} = bounded_degree_polynomials (mod_ring p) 1" + by (auto simp:set_eq_iff bounded_degree_polynomials_def space_def) + moreover have "field_size = p" by (simp add:mod_ring_def) + hence "real (card (bounded_degree_polynomials (mod_ring p) (Suc 0))) / real (card space) = 1 / real p" + by (simp add:space_def bounded_degree_polynomials_card power2_eq_square) + ultimately show ?thesis + by (simp add:M_def measure_pmf_of_set) +qed + +private lemma collision_prob: + assumes "c \ 1" + shows "prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ + (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr -(real r) / (real p)\<^sup>2 + 1/real p" (is "prob {\. ?l \} \ ?r1 + ?r2") +proof - + define \ :: real where "\ = 9/8" + + have rho_c_ge_0: "\ * c \ 0" unfolding \_def using assms by simp + + have c_ge_0: "c\0" using assms by simp + + have "degree \ \ 1 \ \ \ space \ degree \ = 1" for \ + by (simp add:bounded_degree_polynomials_def space_def) + (metis One_nat_def Suc_1 le_less_Suc_eq less_imp_diff_less list.size(3) pos2) + + hence a: "\\ x y. x < p \ y < p \ x \ y \ degree \ \ 1 \ \ \ space \ hash x \ \ hash y \" + using inj_onD[OF inj_if_degree_1] mod_ring_carr by blast + + have b: "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" + if b_assms: "x \ set as" "y \ set as" "x < y" for x y + proof - + have c: "real u \ \ * c \ \real u - real v\ \ \ * c * 2 powr (-real r)" + if c_assms:"truncate_down r (real u) \ c" "truncate_down r (real u) = truncate_down r (real v)" for u v + proof - + have "9 * 2 powr - real r \ 9 * 2 powr (- real 23)" + using r_ge_23 by (intro mult_left_mono powr_mono, auto) + + also have "... \ 1" by simp + + finally have "9 * 2 powr - real r \ 1" by simp + + hence "1 \ \ * (1 - 2 powr (- real r))" + by (simp add:\_def) + + hence d: "(c*1) / (1 - 2 powr (-real r)) \ c * \" + using assms two_pow_r_le_1 by (simp add: pos_divide_le_eq) + + have "\x. truncate_down r (real x) \ c \ real x * (1 - 2 powr - real r) \ c * 1" + using truncate_down_pos[OF of_nat_0_le_iff] order_trans by (simp, blast) + + hence "\x. truncate_down r (real x) \ c \ real x \ c * \" + using two_pow_r_le_1 by (intro order_trans[OF _ d], simp add: pos_le_divide_eq) + + hence e: "real u \ c * \" "real v \ c * \" + using c_assms by auto + + have " \real u - real v\ \ (max \real u\ \real v\) * 2 powr (-real r)" + using c_assms by (intro truncate_down_eq, simp) + + also have "... \ (c * \) * 2 powr (-real r)" + using e by (intro mult_right_mono, auto) + + finally have "\real u - real v\ \ \ * c * 2 powr (-real r)" + by (simp add:algebra_simps) + + thus ?thesis using e by (simp add:algebra_simps) + qed + + have "prob {\. degree \ \ 1 \ tr_hash x \ \ c \ tr_hash x \ = tr_hash y \} \ + prob (\ i \ {(u,v) \ {.. {.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}. + {\. hash x \ = fst i \ hash y \ = snd i})" + using a by (intro pmf_mono[OF M_def], simp add:tr_hash_def) + (metis hash_range mod_ring_carr b_assms as_subset_p lessThan_iff nat_neq_iff subset_eq) + + also have "... \ (\ i\ {(u,v) \ {.. {.. v \ + truncate_down r u \ c \ truncate_down r u = truncate_down r v}. + prob {\. hash x \ = fst i \ hash y \ = snd i})" + by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="{0.. {0.. (\ i\ {(u,v) \ {.. {.. v \ + truncate_down r u \ c \ truncate_down r u = truncate_down r v}. + prob {\. (\u \ {x,y}. hash u \ = (if u = x then (fst i) else (snd i)))})" + by (intro sum_mono pmf_mono[OF M_def]) force + + also have "... \ (\ i\ {(u,v) \ {.. {.. v \ + truncate_down r u \ c \ truncate_down r u = truncate_down r v}. 1/(real p)\<^sup>2)" + using assms as_subset_p b_assms + by (intro sum_mono, subst hash_prob) (auto simp add: mod_ring_def power2_eq_square) + + also have "... = 1/(real p)\<^sup>2 * + card {(u,v) \ {0.. {0.. v \ truncate_down r u \ c \ truncate_down r u = truncate_down r v}" + by simp + + also have "... \ 1/(real p)\<^sup>2 * + card {(u,v) \ {.. {.. v \ real u \ \ * c \ abs (real u - real v) \ \ * c * 2 powr (-real r)}" + using c + by (intro mult_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * card (\u' \ {u. u < p \ real u \ \ * c}. + {(u::nat,v::nat). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" + by (intro mult_left_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..{.. 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. + card {(u,v). u = u' \ abs (real u - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" + by (intro mult_left_mono of_nat_mono card_UN_le, auto) + + also have "... = 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. + card ((\x. (u' ,x)) ` {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'}))" + by (intro arg_cong2[where f="(*)"] arg_cong[where f="real"] sum.cong arg_cong[where f="card"]) + (auto simp add:set_eq_iff) + + also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. + card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v < p \ v \ u'})" + by (intro mult_left_mono of_nat_mono sum_mono card_image_le, auto) + + also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. + card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'})" + by (intro mult_left_mono sum_mono of_nat_mono card_mono card_nat_in_ball subsetI) auto + + also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. + real (card {v. abs (real u' - real v) \ \ * c * 2 powr (-real r) \ v \ u'}))" + by simp + + also have "... \ 1/(real p)\<^sup>2 * (\ u' \ {u. u < p \ real u \ \ * c}. 2 * (\ * c * 2 powr (-real r)))" + by (intro mult_left_mono sum_mono card_nat_in_ball(1), auto) + + also have "... = 1/(real p)\<^sup>2 * (real (card {u. u < p \ real u \ \ * c}) * (2 * (\ * c * 2 powr (-real r))))" + by simp + + also have "... \ 1/(real p)\<^sup>2 * (real (card {u. u \ nat (\\ * c \)}) * (2 * (\ * c * 2 powr (-real r))))" + using rho_c_ge_0 le_nat_floor + by (intro mult_left_mono mult_right_mono of_nat_mono card_mono subsetI) auto + + also have "... \ 1/(real p)\<^sup>2 * ((1+\ * c) * (2 * (\ * c * 2 powr (-real r))))" + using rho_c_ge_0 by (intro mult_left_mono mult_right_mono, auto) + + also have "... \ 1/(real p)\<^sup>2 * (((1+\) * c) * (2 * (\ * c * 2 powr (-real r))))" + using assms by (intro mult_mono, auto simp add:distrib_left distrib_right \_def) + + also have "... = (\ * (2 + \ * 2)) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" + by (simp add:ac_simps power2_eq_square) + + also have "... \ 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" + by (intro divide_right_mono mult_right_mono) (auto simp add:\_def) + + finally show ?thesis by simp + qed + + have "prob {\. ?l \ \ degree \ \ 1} \ + prob (\ i \ {(x,y) \ (set as) \ (set as). x < y}. {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ + tr_hash (fst i) \ = tr_hash (snd i) \})" + by (rule pmf_mono[OF M_def], simp, metis linorder_neqE_nat) + + also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. prob + {\. degree \ \ 1 \ tr_hash (fst i) \ \ c \ tr_hash (fst i) \ = tr_hash (snd i) \})" + unfolding M_def + by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="(set as) \ (set as)"]) + auto + + also have "... \ (\ i \ {(x,y) \ (set as) \ (set as). x < y}. 5 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2)" + using b by (intro sum_mono, simp add:case_prod_beta) + + also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (2 * card {(x,y) \ (set as) \ (set as). x < y})" + by simp + + also have "... = ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (card (set as) * (card (set as) - 1))" + by (subst card_ordered_pairs, auto) + + also have "... \ ((5/2) * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2) * (real (card (set as)))\<^sup>2" + by (intro mult_left_mono) (auto simp add:power2_eq_square mult_left_mono) + + also have "... = (5/2) * (real (card (set as)))\<^sup>2 * c\<^sup>2 * 2 powr (-real r) /(real p)\<^sup>2" + by (simp add:algebra_simps) + + finally have f:"prob {\. ?l \ \ degree \ \ 1} \ ?r1" by simp + + have "prob {\. ?l \} \ prob {\. ?l \ \ degree \ \ 1} + prob {\. degree \ < 1}" + by (rule pmf_add[OF M_def], auto) + also have "... \ ?r1 + ?r2" + by (intro add_mono f prob_degree_lt_1) + finally show ?thesis by simp +qed + +private lemma of_bool_square: "(of_bool x)\<^sup>2 = ((of_bool x)::real)" + by (cases x, auto) + +private definition Q where "Q y \ = card {x \ set as. int (hash x \) < y}" + +private definition m where "m = card (set as)" + +private lemma + assumes "a \ 0" + assumes "a \ int p" + shows exp_Q: "expectation (\\. real (Q a \)) = real m * (of_int a) / p" + and var_Q: "variance (\\. real (Q a \)) \ real m * (of_int a) / p" +proof - + have exp_single: "expectation (\\. of_bool (int (hash x \) < a)) = real_of_int a /real p" + if a:"x \ set as" for x + proof - + have x_le_p: "x < p" using a as_lt_p by simp + have "expectation (\\. of_bool (int (hash x \) < a)) = expectation (indicat_real {\. int (Frequency_Moment_0.hash p x \) < a})" + by (intro arg_cong2[where f="integral\<^sup>L"] ext, simp_all) + also have "... = prob {\. hash x \ \ {k. int k < a}}" + by (simp add:M_def) + also have "... = card ({k. int k < a} \ {..\. of_bool (int (hash x \) < a)) = real_of_int a /real p" + by simp + qed + + have "expectation(\\. real (Q a \)) = expectation (\\. (\x \ set as. of_bool (int (hash x \) < a)))" + by (simp add:Q_def Int_def) + also have "... = (\x \ set as. expectation (\\. of_bool (int (hash x \) < a)))" + by (rule Bochner_Integration.integral_sum, simp) + also have "... = (\ x \ set as. a /real p)" + by (rule sum.cong, simp, subst exp_single, simp, simp) + also have "... = real m * real_of_int a / real p" + by (simp add:m_def) + finally show "expectation (\\. real (Q a \)) = real m * real_of_int a / p" by simp + + have indep: "J \ set as \ card J = 2 \ indep_vars (\_. borel) (\i x. of_bool (int (hash i x) < a)) J" for J + using as_subset_p mod_ring_carr + by (intro indep_vars_compose2[where Y="\i x. of_bool (int x < a)" and M'="\_. discrete"] + k_wise_indep_vars_subset[OF k_wise_indep] finite_subset[OF _ finite_set]) auto + + have rv: "\x. x \ set as \ random_variable borel (\\. of_bool (int (hash x \) < a))" + by (simp add:M_def) + + have "variance (\\. real (Q a \)) = variance (\\. (\x \ set as. of_bool (int (hash x \) < a)))" + by (simp add:Q_def Int_def) + also have "... = (\x \ set as. variance (\\. of_bool (int (hash x \) < a)))" + by (intro var_sum_pairwise_indep_2 indep rv) auto + also have "... \ (\ x \ set as. a / real p)" + by (rule sum_mono, simp add: variance_eq of_bool_square, simp add: exp_single) + also have "... = real m * real_of_int a /real p" + by (simp add:m_def) + finally show "variance (\\. real (Q a \)) \ real m * real_of_int a / p" + by simp +qed + +private lemma t_bound: "t \ 81 / (real_of_rat \)\<^sup>2" +proof - + have "t \ 80 / (real_of_rat \)\<^sup>2 + 1" using t_def t_gt_0 by linarith + also have "... \ 80 / (real_of_rat \)\<^sup>2 + 1 / (real_of_rat \)\<^sup>2" + using \_range by (intro add_mono, simp, simp add:power_le_one) + also have "... = 81 / (real_of_rat \)\<^sup>2" by simp + finally show ?thesis by simp +qed + +private lemma t_r_bound: + "18 * 40 * (real t)\<^sup>2 * 2 powr (-real r) \ 1" +proof - + have "720 * (real t)\<^sup>2 * 2 powr (-real r) \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * 2 powr (-4 * log 2 (1 / real_of_rat \) - 23)" + using r_bound t_bound by (intro mult_left_mono mult_mono power_mono powr_mono, auto) + + also have "... \ 720 * (81 / (real_of_rat \)\<^sup>2)\<^sup>2 * (2 powr (-4 * log 2 (1 / real_of_rat \)) * 2 powr (-23))" + using \_range by (intro mult_left_mono mult_mono power_mono add_mono) + (simp_all add:power_le_one powr_diff) + + also have "... = 720 * (81\<^sup>2 / (real_of_rat \)^4) * (2 powr (log 2 ((real_of_rat \)^4)) * 2 powr (-23))" + using \_range by (intro arg_cong2[where f="(*)"]) + (simp_all add:power2_eq_square power4_eq_xxxx log_divide log_powr[symmetric]) + + also have "... = 720 * 81\<^sup>2 * 2 powr (-23)" using \_range by simp + + also have "... \ 1" by simp + + finally show ?thesis by simp +qed + +private lemma m_eq_F_0: "real m = of_rat (F 0 as)" + by (simp add:m_def F_def) + +private lemma estimate'_bounds: + "prob {\. of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ 1/3" +proof (cases "card (set as) \ t") + case True + define \' where "\' = 3 * real_of_rat \ / 4" + define u where "u = \real t * p / (m * (1+\'))\" + define v where "v = \real t * p / (m * (1-\'))\" + + define has_no_collision where + "has_no_collision = (\\. \x\ set as. \y \ set as. (tr_hash x \ = tr_hash y \ \ x = y) \ tr_hash x \ > v)" + + have "2 powr (-real r) \ 2 powr (-(4 * log 2 (1 / real_of_rat \) + 23))" + using r_bound by (intro powr_mono, linarith, simp) + also have "... = 2 powr (-4 * log 2 (1 /real_of_rat \) -23)" + by (rule arg_cong2[where f="(powr)"], auto simp add:algebra_simps) + also have "... \ 2 powr ( -1 * log 2 (1 /real_of_rat \) -4)" + using \_range by (intro powr_mono diff_mono, auto) + also have "... = 2 powr ( -1 * log 2 (1 /real_of_rat \)) / 16" + by (simp add: powr_diff) + also have "... = real_of_rat \ / 16" + using \_range by (simp add:log_divide) + also have "... < real_of_rat \ / 8" + using \_range by (subst pos_divide_less_eq, auto) + finally have r_le_\: "2 powr (-real r) < real_of_rat \ / 8" + by simp + + have \'_gt_0: "\' > 0" using \_range by (simp add:\'_def) + have "\' < 3/4" using \_range by (simp add:\'_def)+ + also have "... < 1" by simp + finally have \'_lt_1: "\' < 1" by simp + + have "t \ 81 / (real_of_rat \)\<^sup>2" + using t_bound by simp + also have "... = (81*9/16) / (\')\<^sup>2" + by (simp add:\'_def power2_eq_square) + also have "... \ 46 / \'\<^sup>2" + by (intro divide_right_mono, simp, simp) + finally have t_le_\': "t \ 46/ \'\<^sup>2" by simp + + have "80 \ (real_of_rat \)\<^sup>2 * (80 / (real_of_rat \)\<^sup>2)" using \_range by simp + also have "... \ (real_of_rat \)\<^sup>2 * t" + by (intro mult_left_mono, simp add:t_def of_nat_ceiling, simp) + finally have "80 \ (real_of_rat \)\<^sup>2 * t" by simp + hence t_ge_\': "45 \ t * \' * \'" by (simp add:\'_def power2_eq_square) + + have "m \ card {.. p" using n_le_p by simp + finally have m_le_p: "m \ p" by simp + + hence t_le_m: "t \ card (set as)" using True by simp + have m_ge_0: "real m > 0" using m_def True t_gt_0 by simp + + have "v \ real t * real p / (real m * (1 - \'))" by (simp add:v_def) + + also have "... \ real t * real p / (real m * (1/4))" + using \'_lt_1 m_ge_0 \_range + by (intro divide_left_mono mult_left_mono mult_nonneg_nonneg mult_pos_pos, simp_all add:\'_def) + + finally have v_ubound: "v \ 4 * real t * real p / real m" by (simp add:algebra_simps) + + have a_ge_1: "u \ 1" using \'_gt_0 p_gt_0 m_ge_0 t_gt_0 + by (auto intro!:mult_pos_pos divide_pos_pos simp add:u_def) + hence a_ge_0: "u \ 0" by simp + have "real m * (1 - \') < real m" using \'_gt_0 m_ge_0 by simp + also have "... \ 1 * real p" using m_le_p by simp + also have "... \ real t * real p" using t_gt_0 by (intro mult_right_mono, auto) + finally have " real m * (1 - \') < real t * real p" by simp + hence v_gt_0: "v > 0" using mult_pos_pos m_ge_0 \'_lt_1 by (simp add:v_def) + hence v_ge_1: "real_of_int v \ 1" by linarith + + have "real t \ real m" using True m_def by linarith + also have "... < (1 + \') * real m" using \'_gt_0 m_ge_0 by force + finally have a_le_p_aux: "real t < (1 + \') * real m" by simp + + have "u \ real t * real p / (real m * (1 + \'))+1" by (simp add:u_def) + also have "... < real p + 1" + using m_ge_0 \'_gt_0 a_le_p_aux a_le_p_aux p_gt_0 + by (simp add: pos_divide_less_eq ac_simps) + finally have "u \ real p" + by (metis int_less_real_le not_less of_int_le_iff of_int_of_nat_eq) + hence u_le_p: "u \ int p" by linarith + + have "prob {\. Q u \ \ t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q u \) - + expectation (\\. real (Q u \))) \ 3 * sqrt (m * real_of_int u / p)}" + proof (rule pmf_mono[OF M_def]) + fix \ + assume "\ \ {\. t \ Q u \}" + hence t_le: "t \ Q u \" by simp + have "real m * real_of_int u / real p \ real m * (real t * real p / (real m * (1 + \'))+1) / real p" + using m_ge_0 p_gt_0 by (intro divide_right_mono mult_left_mono, simp_all add: u_def) + also have "... = real m * real t * real p / (real m * (1+\') * real p) + real m / real p" + by (simp add:distrib_left add_divide_distrib) + also have "... = real t / (1+\') + real m / real p" + using p_gt_0 m_ge_0 by simp + also have "... \ real t / (1+\') + 1" + using m_le_p p_gt_0 by (intro add_mono, auto) + finally have "real m * real_of_int u / real p \ real t / (1 + \') + 1" + by simp + + hence "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ + 3 * sqrt (t / (1+\')+1)+(t/(1+\')+1)" + by (intro add_mono mult_left_mono real_sqrt_le_mono, auto) + also have "... \ 3 * sqrt (real t+1) + ((t * (1 - \' / (1+\'))) + 1)" + using \'_gt_0 t_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono) + (simp_all add: pos_divide_le_eq left_diff_distrib) + also have "... = 3 * sqrt (real t+1) + (t - \' * t / (1+\')) + 1" by (simp add:algebra_simps) + also have "... \ 3 * sqrt (46 / \'\<^sup>2 + 1 / \'\<^sup>2) + (t - \' * t/2) + 1 / \'" + using \'_gt_0 t_gt_0 \'_lt_1 add_pos_pos t_le_\' + by (intro add_mono mult_left_mono real_sqrt_le_mono add_mono) + (simp_all add: power_le_one pos_le_divide_eq) + also have "... \ (21 / \' + (t - 45 / (2*\'))) + 1 / \'" + using \'_gt_0 t_ge_\' by (intro add_mono) + (simp_all add:real_sqrt_divide divide_le_cancel real_le_lsqrt pos_divide_le_eq ac_simps) + also have "... \ t" using \'_gt_0 by simp + also have "... \ Q u \" using t_le by simp + finally have "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p \ Q u \" + by simp + hence " 3 * sqrt (real m * real_of_int u / real p) \ \real (Q u \) - expectation (\\. real (Q u \))\" + using a_ge_0 u_le_p True by (simp add:exp_Q abs_ge_iff) + + thus "\ \ {\ \ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int u / real p) \ + \real (Q u \) - expectation (\\. real (Q u \))\}" + by (simp add: M_def) + qed + also have "... \ variance (\\. real (Q u \)) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" + using a_ge_1 p_gt_0 m_ge_0 + by (intro Chebyshev_inequality, simp add:M_def, auto) + + also have "... \ (real m * real_of_int u / real p) / (3 * sqrt (real m * of_int u / real p))\<^sup>2" + using a_ge_0 u_le_p by (intro divide_right_mono var_Q, auto) + + also have "... \ 1/9" using a_ge_0 by simp + + finally have case_1: "prob {\. Q u \ \ t} \ 1/9" by simp + + have case_2: "prob {\. Q v \ < t} \ 1/9" + proof (cases "v \ p") + case True + have "prob {\. Q v \ < t} \ prob {\ \ Sigma_Algebra.space M. abs (real (Q v \) - expectation (\\. real (Q v \))) + \ 3 * sqrt (m * real_of_int v / p)}" + proof (rule pmf_mono[OF M_def]) + fix \ + assume "\ \ set_pmf (pmf_of_set space)" + have "(real t + 3 * sqrt (real t / (1 - \') )) * (1 - \') = real t - \' * t + 3 * ((1-\') * sqrt( real t / (1-\') ))" + by (simp add:algebra_simps) + + also have "... = real t - \' * t + 3 * sqrt ( (1-\')\<^sup>2 * (real t / (1-\')))" + using \'_lt_1 by (subst real_sqrt_mult, simp) + + also have "... = real t - \' * t + 3 * sqrt ( real t * (1- \'))" + by (simp add:power2_eq_square distrib_left) + + also have "... \ real t - 45/ \' + 3 * sqrt ( real t )" + using \'_gt_0 t_ge_\' \'_lt_1 by (intro add_mono mult_left_mono real_sqrt_le_mono) + (simp_all add:pos_divide_le_eq ac_simps left_diff_distrib power_le_one) + + also have "... \ real t - 45/ \' + 3 * sqrt ( 46 / \'\<^sup>2)" + using t_le_\' \'_lt_1 \'_gt_0 + by (intro add_mono mult_left_mono real_sqrt_le_mono, simp_all add:pos_divide_le_eq power_le_one) + + also have "... = real t + (3 * sqrt(46) - 45)/ \'" + using \'_gt_0 by (simp add:real_sqrt_divide diff_divide_distrib) + + also have "... \ t" + using \'_gt_0 by (simp add:pos_divide_le_eq real_le_lsqrt) + + finally have aux: "(real t + 3 * sqrt (real t / (1 - \'))) * (1 - \') \ real t " + by simp + + assume "\ \ {\. Q v \ < t}" + hence "Q v \ < t" by simp + + hence "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) + \ real t - 1 + 3 * sqrt (real m * real_of_int v / real p)" + using m_le_p p_gt_0 by (intro add_mono, auto simp add: algebra_simps add_divide_distrib) + + also have "... \ (real t-1) + 3 * sqrt (real m * (real t * real p / (real m * (1- \'))) / real p)" + by (intro add_mono mult_left_mono real_sqrt_le_mono divide_right_mono) + (auto simp add:v_def) + + also have "... \ real t + 3 * sqrt(real t / (1-\')) - 1" + using m_ge_0 p_gt_0 by simp + + also have "... \ real t / (1-\')-1" + using \'_lt_1 aux by (simp add: pos_le_divide_eq) + also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - 1" + using p_gt_0 m_ge_0 by simp + also have "... \ real m * (real t * real p / (real m * (1-\'))) / real p - real m / real p" + using m_le_p p_gt_0 + by (intro diff_mono, auto) + also have "... = real m * (real t * real p / (real m * (1-\'))-1) / real p" + by (simp add: left_diff_distrib right_diff_distrib diff_divide_distrib) + also have "... \ real m * real_of_int v / real p" + by (intro divide_right_mono mult_left_mono, simp_all add:v_def) + + finally have "real (Q v \) + 3 * sqrt (real m * real_of_int v / real p) + \ real m * real_of_int v / real p" by simp + + hence " 3 * sqrt (real m * real_of_int v / real p) \ \real (Q v \) -expectation (\\. real (Q v \))\" + using v_gt_0 True by (simp add: exp_Q abs_ge_iff) + + thus "\ \ {\\ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int v / real p) \ + \real (Q v \) - expectation (\\. real (Q v \))\}" + by (simp add:M_def) + qed + also have "... \ variance (\\. real (Q v \)) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" + using v_gt_0 p_gt_0 m_ge_0 + by (intro Chebyshev_inequality, simp add:M_def, auto) + + also have "... \ (real m * real_of_int v / real p) / (3 * sqrt (real m * real_of_int v / real p))\<^sup>2" + using v_gt_0 True by (intro divide_right_mono var_Q, auto) + + also have "... = 1/9" + using p_gt_0 v_gt_0 m_ge_0 by (simp add:power2_eq_square) + + finally show ?thesis by simp + next + case False + have "prob {\. Q v \ < t} \ prob {\. False}" + proof (rule pmf_mono[OF M_def]) + fix \ + assume a:"\ \ {\. Q v \ < t}" + assume "\ \ set_pmf (pmf_of_set space)" + hence b:"\x. x < p \ hash x \ < p" + using hash_range mod_ring_carr by (simp add:M_def measure_pmf_inverse) + have "t \ card (set as)" using True by simp + also have "... \ Q v \" + unfolding Q_def using b False as_lt_p by (intro card_mono subsetI, simp, force) + also have "... < t" using a by simp + finally have "False" by auto + thus "\ \ {\. False}" by simp + qed + also have "... = 0" by auto + finally show ?thesis by simp + qed + + have "prob {\. \has_no_collision \} \ + prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real_of_int v \ tr_hash x \ = tr_hash y \}" + by (rule pmf_mono[OF M_def]) (simp add:has_no_collision_def M_def, force) + + also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" + using collision_prob v_ge_1 by blast + + also have "... \ (5/2) * (real m)\<^sup>2 * (real_of_int v)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" + by (intro divide_right_mono add_mono mult_right_mono mult_mono power_mono, simp_all add:m_def) + + also have "... \ (5/2) * (real m)\<^sup>2 * (4 * real t * real p / real m)\<^sup>2 * (2 powr - real r) / (real p)\<^sup>2 + 1 / real p" + using v_def v_ge_1 v_ubound + by (intro add_mono divide_right_mono mult_right_mono mult_left_mono, auto) + + also have "... = 40 * (real t)\<^sup>2 * (2 powr -real r) + 1 / real p" + using p_gt_0 m_ge_0 t_gt_0 by (simp add:algebra_simps power2_eq_square) + + also have "... \ 1/18 + 1/18" + using t_r_bound p_ge_18 by (intro add_mono, simp_all add: pos_le_divide_eq) + + also have "... = 1/9" by simp + + finally have case_3: "prob {\. \has_no_collision \} \ 1/9" by simp + + have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ + prob {\. Q u \ \ t \ Q v \ < t \ \(has_no_collision \)}" + proof (rule pmf_mono[OF M_def], rule ccontr) + fix \ + assume "\ \ set_pmf (pmf_of_set space)" + assume "\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" + hence est: "real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\" by simp + assume "\ \ {\. t \ Q u \ \ Q v \ < t \ \ has_no_collision \}" + hence "\( t \ Q u \ \ Q v \ < t \ \ has_no_collision \)" by simp + hence lb: "Q u \ < t" and ub: "Q v \ \ t" and no_col: "has_no_collision \" by simp+ + + define y where "y = nth_mset (t-1) {#int (hash x \). x \# mset_set (set as)#}" + define y' where "y' = nth_mset (t-1) {#tr_hash x \. x \# mset_set (set as)#}" + + have rank_t_lb: "u \ y" + unfolding y_def using True t_gt_0 lb + by (intro nth_mset_bound_left, simp_all add:count_less_def swap_filter_image Q_def) + + have rank_t_ub: "y \ v - 1" + unfolding y_def using True t_gt_0 ub + by (intro nth_mset_bound_right, simp_all add:Q_def swap_filter_image count_le_def) + + have y_ge_0: "real_of_int y \ 0" using rank_t_lb a_ge_0 by linarith + + have "mono (\x. truncate_down r (real_of_int x))" + by (metis truncate_down_mono mono_def of_int_le_iff) + hence y'_eq: "y' = truncate_down r y" + unfolding y_def y'_def using True t_gt_0 + by (subst nth_mset_commute_mono[where f="(\x. truncate_down r (of_int x))"]) + (simp_all add: multiset.map_comp comp_def tr_hash_def) + + have "real_of_int u * (1 - 2 powr -real r) \ real_of_int y * (1 - 2 powr (-real r))" + using rank_t_lb of_int_le_iff two_pow_r_le_1 + by (intro mult_right_mono, auto) + also have "... \ y'" + using y'_eq truncate_down_pos[OF y_ge_0] by simp + finally have rank_t_lb': "u * (1 - 2 powr -real r) \ y'" by simp + + have "y' \ real_of_int y" + by (subst y'_eq, rule truncate_down_le, simp) + also have "... \ real_of_int (v-1)" + using rank_t_ub of_int_le_iff by blast + finally have rank_t_ub': "y' \ v-1" + by simp + + have "0 < u * (1-2 powr -real r)" + using a_ge_1 two_pow_r_le_1 by (intro mult_pos_pos, auto) + hence y'_pos: "y' > 0" using rank_t_lb' by linarith + + have no_col': "\x. x \ y' \ count {#tr_hash x \. x \# mset_set (set as)#} x \ 1" + using rank_t_ub' no_col + by (simp add:vimage_def card_le_Suc0_iff_eq count_image_mset has_no_collision_def) force + + have h_1: "Max (sketch_rv' \) = y'" + using True t_gt_0 no_col' + by (simp add:sketch_rv'_def y'_def nth_mset_max) + + have "card (sketch_rv' \) = card (least ((t-1)+1) (set_mset {#tr_hash x \. x \# mset_set (set as)#}))" + using t_gt_0 by (simp add:sketch_rv'_def) + also have "... = (t-1) +1" + using True t_gt_0 no_col' by (intro nth_mset_max(2), simp_all add:y'_def) + also have "... = t" using t_gt_0 by simp + finally have "card (sketch_rv' \) = t" by simp + hence h_3: "estimate' (sketch_rv' \) = real t * real p / y'" + using h_1 by (simp add:estimate'_def) + + have "(real t) * real p \ (1 + \') * real m * ((real t) * real p / (real m * (1 + \')))" + using \'_lt_1 m_def True t_gt_0 \'_gt_0 by auto + also have "... \ (1+\') * m * u" + using \'_gt_0 by (intro mult_left_mono, simp_all add:u_def) + also have "... < ((1 + real_of_rat \)*(1-real_of_rat \/8)) * m * u" + using True m_def t_gt_0 a_ge_1 \_range + by (intro mult_strict_right_mono, auto simp add:\'_def right_diff_distrib) + also have "... \ ((1 + real_of_rat \)*(1-2 powr (-r))) * m * u" + using r_le_\ \_range a_ge_0 by (intro mult_right_mono mult_left_mono, auto) + also have "... = (1 + real_of_rat \) * m * (u * (1-2 powr -real r))" + by simp + also have "... \ (1 + real_of_rat \) * m * y'" + using \_range by (intro mult_left_mono rank_t_lb', simp) + finally have "real t * real p < (1 + real_of_rat \) * m * y'" by simp + hence f_1: "estimate' (sketch_rv' \) < (1 + real_of_rat \) * m" + using y'_pos by (simp add: h_3 pos_divide_less_eq) + + have "(1 - real_of_rat \) * m * y' \ (1 - real_of_rat \) * m * v" + using \_range rank_t_ub' y'_pos by (intro mult_mono rank_t_ub', simp_all) + also have "... = (1-real_of_rat \) * (real m * v)" + by simp + also have "... < (1-\') * (real m * v)" + using \_range m_ge_0 v_ge_1 + by (intro mult_strict_right_mono mult_pos_pos, simp_all add:\'_def) + also have "... \ (1-\') * (real m * (real t * real p / (real m * (1-\'))))" + using \'_gt_0 \'_lt_1 by (intro mult_left_mono, auto simp add:v_def) + also have "... = real t * real p" + using \'_gt_0 \'_lt_1 t_gt_0 p_gt_0 m_ge_0 by auto + finally have "(1 - real_of_rat \) * m * y' < real t * real p" by simp + hence f_2: "estimate' (sketch_rv' \) > (1 - real_of_rat \) * m" + using y'_pos by (simp add: h_3 pos_less_divide_eq) + + have "abs (estimate' (sketch_rv' \) - real_of_rat (F 0 as)) < real_of_rat \ * (real_of_rat (F 0 as))" + using f_1 f_2 by (simp add:abs_less_iff algebra_simps m_eq_F_0) + thus "False" using est by linarith + qed + also have "... \ 1/9 + (1/9 + 1/9)" + by (intro pmf_add_2[OF M_def] case_1 case_2 case_3) + also have "... = 1/3" by simp + finally show ?thesis by simp +next + case False + have "prob {\. real_of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' \) - of_rat (F 0 as)\} \ + prob {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" + proof (rule pmf_mono[OF M_def]) + fix \ + assume a:"\ \ {\. real_of_rat \ * real_of_rat (F 0 as) < \estimate' (sketch_rv' \) - real_of_rat (F 0 as)\}" + assume b:"\ \ set_pmf (pmf_of_set space)" + have c: "card (set as) < t" using False by auto + hence "card ((\x. tr_hash x \) ` set as) < t" + using card_image_le order_le_less_trans by blast + hence d:"card (sketch_rv' \) = card ((\x. tr_hash x \) ` (set as))" + by (simp add:sketch_rv'_def card_least) + have "card (sketch_rv' \) < t" + by (metis List.finite_set c d card_image_le order_le_less_trans) + hence "estimate' (sketch_rv' \) = card (sketch_rv' \)" by (simp add:estimate'_def) + hence "card (sketch_rv' \) \ real_of_rat (F 0 as)" + using a \_range by simp + (metis abs_zero cancel_comm_monoid_add_class.diff_cancel of_nat_less_0_iff pos_prod_lt zero_less_of_rat_iff) + hence "card (sketch_rv' \) \ card (set as)" + using m_def m_eq_F_0 by linarith + hence "\inj_on (\x. tr_hash x \) (set as)" + using card_image d by auto + moreover have "tr_hash x \ \ real p" if a:"x \ set as" for x + proof - + have "hash x \ < p" + using hash_range as_lt_p a b by (simp add:mod_ring_carr M_def) + thus "tr_hash x \ \ real p" using truncate_down_le by (simp add:tr_hash_def) + qed + ultimately show "\ \ {\. \x \ set as. \y \ set as. x \ y \ tr_hash x \ \ real p \ tr_hash x \ = tr_hash y \}" + by (simp add:inj_on_def, blast) + qed + also have "... \ (5/2) * (real (card (set as)))\<^sup>2 * (real p)\<^sup>2 * 2 powr - real r / (real p)\<^sup>2 + 1 / real p" + using p_gt_0 by (intro collision_prob, auto) + also have "... = (5/2) * (real (card (set as)))\<^sup>2 * 2 powr (- real r) + 1 / real p" + using p_gt_0 by (simp add:ac_simps power2_eq_square) + also have "... \ (5/2) * (real t)\<^sup>2 * 2 powr (-real r) + 1 / real p" + using False by (intro add_mono mult_right_mono mult_left_mono power_mono, auto) + also have "... \ 1/6 + 1/6" + using t_r_bound p_ge_18 by (intro add_mono, simp_all) + also have "... \ 1/3" by simp + finally show ?thesis by simp +qed + +private lemma median_bounds: + "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1 - real_of_rat \" +proof - + have "strict_mono_on real_of_float A" for A by (meson less_float.rep_eq strict_mono_onI) + hence real_g_2: "\\. sketch_rv' \ = real_of_float ` sketch_rv \" + by (simp add: sketch_rv'_def sketch_rv_def tr_hash_def least_mono_commute image_comp) + + moreover have "inj_on real_of_float A" for A + using real_of_float_inject by (simp add:inj_on_def) + ultimately have card_eq: "\\. card (sketch_rv \) = card (sketch_rv' \)" + using real_g_2 by (auto intro!: card_image[symmetric]) + + have "Max (sketch_rv' \) = real_of_float (Max (sketch_rv \))" if a:"card (sketch_rv' \) \ t" for \ + proof - + have "mono real_of_float" + using less_eq_float.rep_eq mono_def by blast + moreover have "finite (sketch_rv \)" + by (simp add:sketch_rv_def least_def) + moreover have " sketch_rv \ \ {}" + using card_eq[symmetric] card_gt_0_iff t_gt_0 a by (simp, force) + ultimately show ?thesis + by (subst mono_Max_commute[where f="real_of_float"], simp_all add:real_g_2) + qed + hence real_g: "\\. estimate' (sketch_rv' \) = real_of_rat (estimate (sketch_rv \))" + by (simp add:estimate_def estimate'_def card_eq of_rat_divide of_rat_mult of_rat_add real_of_rat_of_float) + + have indep: "prob_space.indep_vars (measure_pmf \\<^sub>0) (\_. borel) (\i \. estimate' (sketch_rv' (\ i))) {0..\<^sub>0_def + by (rule indep_vars_restrict_intro', auto simp add:restrict_dfl_def lessThan_atLeast0) + + moreover have "- (18 * ln (real_of_rat \)) \ real s" + using of_nat_ceiling by (simp add:s_def) blast + + moreover have "i < s \ measure \\<^sub>0 {\. of_rat \ * of_rat (F 0 as) < \estimate' (sketch_rv' (\ i)) - of_rat (F 0 as)\} \ 1/3" + for i + using estimate'_bounds unfolding \\<^sub>0_def M_def + by (subst prob_prod_pmf_slice, simp_all) + + ultimately have "1-real_of_rat \ \ \

(\ in measure_pmf \\<^sub>0. + \median s (\i. estimate' (sketch_rv' (\ i))) - real_of_rat (F 0 as)\ \ real_of_rat \ * real_of_rat (F 0 as))" + using \_range prob_space_measure_pmf + by (intro prob_space.median_bound_2) auto + also have "... = \

(\ in measure_pmf \\<^sub>0. + \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as)" + using s_gt_0 median_rat[symmetric] real_g by (intro arg_cong2[where f="measure"]) + (simp_all add:of_rat_diff[symmetric] of_rat_mult[symmetric] of_rat_less_eq) + finally show "\

(\ in measure_pmf \\<^sub>0. \median s (\i. estimate (sketch_rv (\ i))) - F 0 as\ \ \ * F 0 as) \ 1-real_of_rat \" + by blast +qed + +lemma f0_alg_correct': + "\

(\ in measure_pmf result. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" +proof - + have f0_result_elim: "\x. f0_result (s, t, p, r, x, \i\{..i. estimate (sketch_rv (x i))))" + by (simp add:estimate_def, rule median_cong, simp) + + have "result = map_pmf (\x. (s, t, p, r, x, \i\{..\<^sub>0 \ f0_result" + by (subst result_def, subst f0_alg_sketch, simp) + also have "... = \\<^sub>0 \ (\x. return_pmf (s, t, p, r, x, \i\{.. f0_result" + by (simp add:t_def p_def r_def s_def map_pmf_def) + also have "... = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" + by (subst bind_assoc_pmf, subst bind_return_pmf, subst f0_result_elim) simp + finally have a:"result = \\<^sub>0 \ (\x. return_pmf (median s (\i. estimate (sketch_rv (x i)))))" + by simp + + show ?thesis + using median_bounds by (simp add: a map_pmf_def[symmetric]) +qed + +private lemma f_subset: + assumes "g ` A \ h ` B" + shows "(\x. f (g x)) ` A \ (\x. f (h x)) ` B" + using assms by auto + +lemma f0_exact_space_usage': + defines "\ \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" + shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" +proof - + + have log_2_4: "log 2 4 = 2" + by (metis log2_of_power_eq mult_2 numeral_Bit0 of_nat_numeral power2_eq_square) + + have a: "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ + ereal (12 + 4 * real r + 2 * log 2 (log 2 (n+13)))" if a_1:"y \ {.. 1") + case True + + have aux_1: " 0 < 2 + log 2 (real y)" + using True by (intro add_pos_nonneg, auto) + have aux_2: "0 < 2 + log 2 (real p)" + using p_gt_1 by (intro add_pos_nonneg, auto) + + have "bit_count (F\<^sub>e (float_of (truncate_down r y))) \ + ereal (10 + 4 * real r + 2 * log 2 (2 + \log 2 \real y\\))" + by (rule truncate_float_bit_count) + also have "... = ereal (10 + 4 * real r + 2 * log 2 (2 + (log 2 (real y))))" + using True by simp + also have "... \ ereal (10 + 4 * real r + 2 * log 2 (2 + log 2 p))" + using aux_1 aux_2 True p_gt_0 a_1 by simp + also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 4 + log 2 (2 * n + 40)))" + using log_2_4 p_le_n p_gt_0 + by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg, auto) + also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 (8 * n + 160)))" + by (simp add:log_mult[symmetric]) + also have "... \ ereal (10 + 4 * real r + 2 * log 2 (log 2 ((n+13) powr 2)))" + by (intro ereal_mono add_mono mult_left_mono log_mono of_nat_mono add_pos_nonneg) + (auto simp add:power2_eq_square algebra_simps) + also have "... = ereal (10 + 4 * real r + 2 * log 2 (log 2 4 * log 2 (n + 13)))" + by (subst log_powr, simp_all add:log_2_4) + also have "... = ereal (12 + 4 * real r + 2 * log 2 (log 2 (n + 13)))" + by (subst log_mult, simp_all add:log_2_4) + finally show ?thesis by simp + next + case False + hence "y = 0" using a_1 by simp + then show ?thesis by (simp add:float_bit_count_zero) + qed + + have "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. + f0_space_usage (n, \, \)" if b: "x \ {..\<^sub>E space" for x + proof - + have c: "x \ extensional {.. (\k. float_of (truncate_down r k)) ` {.. (\xa. float_of (truncate_down r (hash xa (x y)))) ` set as" + using least_subset by (auto simp add:sketch_rv_def tr_hash_def) + also have "... \ (\k. float_of (truncate_down r (real k))) ` {..y. y < s \ finite (sketch_rv (x y))" + unfolding sketch_rv_def by (rule finite_subset[OF least_subset], simp) + moreover have card_sketch: "\y. y < s \ card (sketch_rv (x y)) \ t " + by (simp add:sketch_rv_def card_least) + moreover have "\y z. y < s \ z \ sketch_rv (x y) \ + bit_count (F\<^sub>e z) \ ereal (12 + 4 * real r + 2 * log 2 (log 2 (real n + 13)))" + using a d by auto + ultimately have e: "\y. y < s \ bit_count (S\<^sub>e F\<^sub>e (sketch_rv (x y))) + \ ereal (real t) * (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1" + using float_encoding by (intro set_bit_count_est, auto) + + have f: "\y. y < s \ bit_count (P\<^sub>e p 2 (x y)) \ ereal (real 2 * (log 2 (real p) + 1))" + using p_gt_1 b + by (intro bounded_degree_polynomial_bit_count) (simp_all add:space_def PiE_def Pi_def) + + have "bit_count (encode_f0_state (s, t, p, r, x, \i\{..e s) + bit_count (N\<^sub>e t) + bit_count (N\<^sub>e p) + bit_count (N\<^sub>e r) + + bit_count (([0..\<^sub>e P\<^sub>e p 2) x) + + bit_count (([0..\<^sub>e S\<^sub>e F\<^sub>e) (\i\{.. ereal (2* log 2 (real s + 1) + 1) + ereal (2* log 2 (real t + 1) + 1) + + ereal (2* log 2 (real p + 1) + 1) + ereal (2 * log 2 (real r + 1) + 1) + + (ereal (real s) * (ereal (real 2 * (log 2 (real p) + 1)))) + + (ereal (real s) * ((ereal (real t) * + (ereal (12 + 4 * real r + 2 * log 2 (log 2 (real (n + 13)))) + 1) + 1)))" + using c e f + by (intro add_mono exp_golomb_bit_count fun_bit_count_est[where xs="[0.. ereal ( 4 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + + 2 * log 2 (2 * (21 + real n)) + 2 * log 2 (real r + 1) + real s * (3 + 2 * log 2 (2 * (21 + real n)) + + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" + using p_le_n p_gt_0 + by (intro ereal_mono add_mono mult_left_mono, auto) + also have "... = ereal (6 + 2 * log 2 (real s + 1) + 2 * log 2 (real t + 1) + + 2 * log 2 (21 + real n) + 2 * log 2 (real r + 1) + real s * (5 + 2 * log 2 (21 + real n) + + real t * (13 + (4 * real r + 2 * log 2 (log 2 (real n + 13))))))" + by (subst (1 2) log_mult, auto) + also have "... \ f0_space_usage (n, \, \)" + by (simp add:s_def[symmetric] r_def[symmetric] t_def[symmetric] Let_def) + (simp add:algebra_simps) + finally show "bit_count (encode_f0_state (s, t, p, r, x, \i\{.. + f0_space_usage (n, \, \)" by simp + qed + hence "\x. x \ set_pmf \\<^sub>0 \ + bit_count (encode_f0_state (s, t, p, r, x, \i\{.. ereal (f0_space_usage (n, \, \))" + by (simp add:\\<^sub>0_def set_prod_pmf del:f0_space_usage.simps) + hence "\y. y \ set_pmf \ \ bit_count (encode_f0_state y) \ ereal (f0_space_usage (n, \, \))" + by (simp add: \_def f0_alg_sketch del:f0_space_usage.simps f0_init.simps) + (metis (no_types, lifting) image_iff pmf.set_map) + thus ?thesis + by (simp add: AE_measure_pmf_iff del:f0_space_usage.simps) +qed + +end + +text \Main results of this section:\ + +theorem f0_alg_correct: + assumes "\ \ {0<..<1}" + assumes "\ \ {0<..<1}" + assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n) \ f0_result" + shows "\

(\ in measure_pmf \. \\ - F 0 as\ \ \ * F 0 as) \ 1 - of_rat \" + using f0_alg_correct'[OF assms(1-3)] unfolding \_def by blast + +theorem f0_exact_space_usage: + assumes "\ \ {0<..<1}" + assumes "\ \ {0<..<1}" + assumes "set as \ {.. \ fold (\a state. state \ f0_update a) as (f0_init \ \ n)" + shows "AE \ in \. bit_count (encode_f0_state \) \ f0_space_usage (n, \, \)" + using f0_exact_space_usage'[OF assms(1-3)] unfolding \_def by blast + +theorem f0_asympotic_space_complexity: + "f0_space_usage \ O[at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\(n, \, \). ln (1 / of_rat \) * + (ln (real n) + 1 / (of_rat \)\<^sup>2 * (ln (ln (real n)) + ln (1 / of_rat \))))" + (is "_ \ O[?F](?rhs)") +proof - + define n_of :: "nat \ rat \ rat \ nat" where "n_of = (\(n, \, \). n)" + define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" + define \_of :: "nat \ rat \ rat \ rat" where "\_of = (\(n, \, \). \)" + define t_of where "t_of = (\x. nat \80 / (real_of_rat (\_of x))\<^sup>2\)" + define s_of where "s_of = (\x. nat \-(18 * ln (real_of_rat (\_of x)))\)" + define r_of where "r_of = (\x. nat (4 * \log 2 (1 / real_of_rat (\_of x))\ + 23))" + + define g where "g = (\x. ln (1 / of_rat (\_of x)) * (ln (real (n_of x)) + + 1 / (of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / of_rat (\_of x)))))" + + have evt: "(\x. + 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ + 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ + real (n_of x) \ n \ P x) \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") + for \ \ n P + apply (rule eventually_mono[where P="?prem" and Q="P"]) + apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def) + apply (intro eventually_conj eventually_prod1' eventually_prod2' + sequentially_inf eventually_at_right_less inv_at_right_0_inf) + by (auto simp add:prod_filter_eq_bot) + + have exp_pos: "exp k \ real x \ x > 0" for k x + using exp_gt_zero gr0I by force + + have exp_gt_1: "exp 1 \ (1::real)" + by simp + + have 1: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) + + have 2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (auto intro!:landau_o.big_mono evt[where \="exp 1"] iffD2[OF ln_ge_iff] simp add:abs_ge_iff) + + have 3: " (\x. 1) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" + using exp_pos + by (intro landau_sum_2 2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) + have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + using one_le_power + by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) + + have "(\x. 80 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + by (subst landau_o.big.cmult_in_iff, auto) + hence 5: "(\x. real (t_of x)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + unfolding t_of_def + by (intro landau_real_nat landau_ceil 4, auto) + + have "(\x. ln (real_of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_o.big_mono evt[where \="1"], auto simp add:ln_div) + hence 6: "(\x. real (s_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + unfolding s_of_def by (intro landau_nat_ceil 1, simp) + + have 7: " (\x. 1) \ O[?F](\x. ln (real (n_of x)))" + using exp_pos by (auto intro!: landau_o.big_mono evt[where n="exp 1"] iffD2[OF ln_ge_iff] simp: abs_ge_iff) + + have 8:" (\_. 1) \ + O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + using order_trans[OF exp_gt_1] exp_pos + by (intro landau_sum_1 7 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] + mult_nonneg_nonneg add_nonneg_nonneg) auto + + have "(\x. ln (real (s_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_ln_3 sum_in_bigo 6 1, simp) + + hence 9: "(\x. log 2 (real (s_of x) + 1)) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 8, auto simp:log_def) + have 10: "(\x. 1) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 8 1) + + have "(\x. ln (real (t_of x) + 1)) \ + O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + using 5 by (intro landau_o.big_mult_1 3 landau_ln_3 sum_in_bigo 4, simp_all) + hence " (\x. log 2 (real (t_of x) + 1)) \ + O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + using order_trans[OF exp_gt_1] exp_pos + by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff] + mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) + hence 11: "(\x. log 2 (real (t_of x) + 1)) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1' 1, auto) + have " (\x. 1) \ O[?F](\x. real (n_of x))" + by (intro landau_o.big_mono evt[where n="1"], auto) + hence "(\x. ln (real (n_of x) + 21)) \ O[?F](\x. ln (real (n_of x)))" + by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) + + hence 12: "(\x. log 2 (real (n_of x) + 21)) \ O[?F](g)" + unfolding g_def using exp_pos order_trans[OF exp_gt_1] + by (intro landau_o.big_mult_1' 1 landau_sum_1 evt[where n="exp 1" and \="1"] + ln_ge_zero iffD2[OF ln_ge_iff] mult_nonneg_nonneg add_nonneg_nonneg) (auto simp add:log_def) + + have "(\x. ln (1 / real_of_rat (\_of x))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + by (intro landau_ln_3 evt[where \="1"] landau_o.big_mono) + (auto simp add:power_one_over[symmetric] self_le_power) + hence " (\x. real (nat (4*\log 2 (1 / real_of_rat (\_of x))\+23))) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + using 4 by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) + hence " (\x. ln (real (r_of x) + 1)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + unfolding r_of_def + by (intro landau_ln_3 sum_in_bigo 4, auto) + hence " (\x. log 2 (real (r_of x) + 1)) \ + O[?F](\x. (1 / (real_of_rat (\_of x))\<^sup>2) * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + by (intro landau_o.big_mult_1 3, simp add:log_def) + hence " (\x. log 2 (real (r_of x) + 1)) \ + O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + using exp_pos order_trans[OF exp_gt_1] + by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero + iffD2[OF ln_ge_iff] add_nonneg_nonneg mult_nonneg_nonneg) (auto) + hence 13: "(\x. log 2 (real (r_of x) + 1)) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1' 1, auto) + have 14: "(\x. 1) \ O[?F](\x. real (n_of x))" + by (intro landau_o.big_mono evt[where n="1"], auto) + + have "(\x. ln (real (n_of x) + 13)) \ O[?F](\x. ln (real (n_of x)))" + using 14 by (intro landau_ln_2[where a="2"] evt[where n="2"] sum_in_bigo, auto) + + hence "(\x. ln (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))))" + using exp_pos by (intro landau_ln_2[where a="2"] iffD2[OF ln_ge_iff] evt[where n="exp 2"]) + (auto simp add:log_def) + + hence "(\x. log 2 (log 2 (real (n_of x) + 13))) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" + using exp_pos by (intro landau_sum_1 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff]) + (auto simp add:log_def) + + moreover have "(\x. real (r_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + unfolding r_of_def using 2 + by (auto intro!: landau_real_nat sum_in_bigo landau_ceil simp:log_def) + hence "(\x. real (r_of x)) \ O[?F](\x. ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" + using exp_pos + by (intro landau_sum_2 evt[where n="exp 1" and \="1"] ln_ge_zero iffD2[OF ln_ge_iff], auto) + + ultimately have 15:" (\x. real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) + \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + using 5 3 + by (intro landau_o.mult sum_in_bigo, auto) + + have "(\x. 5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13)))) + \ O[?F](\x. ln (real (n_of x)) + 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x))))" + proof - + have "\\<^sub>F x in ?F. 0 \ ln (real (n_of x))" + by (intro evt[where n="1"] ln_ge_zero, auto) + moreover have "\\<^sub>F x in ?F. 0 \ 1 / (real_of_rat (\_of x))\<^sup>2 * (ln (ln (real (n_of x))) + ln (1 / real_of_rat (\_of x)))" + using exp_pos + by (intro evt[where n="exp 1" and \="1"] mult_nonneg_nonneg add_nonneg_nonneg + ln_ge_zero iffD2[OF ln_ge_iff]) auto + moreover have " (\x. ln (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" + using 14 by (intro landau_ln_2[where a="2"] sum_in_bigo evt[where n="2"], auto) + hence "(\x. 5 + 2 * log 2 (21 + real (n_of x))) \ O[?F](\x. ln (real (n_of x)))" + using 7 by (intro sum_in_bigo, auto simp add:log_def) + ultimately show ?thesis + using 15 by (rule landau_sum) + qed + + hence 16: "(\x. real (s_of x) * (5 + 2 * log 2 (21 + real (n_of x)) + real (t_of x) * + (13 + 4 * real (r_of x) + 2 * log 2 (log 2 (real (n_of x) + 13))))) \ O[?F](g)" + unfolding g_def + by (intro landau_o.mult 6, auto) + + have "f0_space_usage = (\x. f0_space_usage (n_of x, \_of x, \_of x))" + by (simp add:case_prod_beta' n_of_def \_of_def \_of_def) + also have "... \ O[?F](g)" + using 9 10 11 12 13 16 + by (simp add:fun_cong[OF s_of_def[symmetric]] fun_cong[OF t_of_def[symmetric]] + fun_cong[OF r_of_def[symmetric]] Let_def) (intro sum_in_bigo, auto) + also have "... = O[?F](?rhs)" + by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def) + finally show ?thesis + by simp +qed + +end diff --git a/thys/Frequency_Moments/Frequency_Moment_2.thy b/thys/Frequency_Moments/Frequency_Moment_2.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Frequency_Moment_2.thy @@ -0,0 +1,720 @@ +section \Frequency Moment $2$\ + +theory Frequency_Moment_2 + imports + Universal_Hash_Families.Carter_Wegman_Hash_Family + Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration + Landau_Ext + Median_Method.Median + Product_PMF_Ext + Universal_Hash_Families.Field + Frequency_Moments +begin + +text \This section contains a formalization of the algorithm for the second frequency moment. +It is based on the algorithm described in \cite[\textsection 2.2]{alon1999}. +The only difference is that the algorithm is adapted to work with prime field of odd order, which +greatly reduces the implementation complexity.\ + +fun f2_hash where + "f2_hash p h k = (if even (ring.hash (mod_ring p) k h) then int p - 1 else - int p - 1)" + +type_synonym f2_state = "nat \ nat \ nat \ (nat \ nat \ nat list) \ (nat \ nat \ int)" + +fun f2_init :: "rat \ rat \ nat \ f2_state pmf" where + "f2_init \ \ n = + do { + let s\<^sub>1 = nat \6 / \\<^sup>2\; + let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\; + let p = prime_above (max n 3); + h \ prod_pmf ({..1} \ {..2}) (\_. pmf_of_set (bounded_degree_polynomials (mod_ring p) 4)); + return_pmf (s\<^sub>1, s\<^sub>2, p, h, (\_ \ {..1} \ {..2}. (0 :: int))) + }" + +fun f2_update :: "nat \ f2_state \ f2_state pmf" where + "f2_update x (s\<^sub>1, s\<^sub>2, p, h, sketch) = + return_pmf (s\<^sub>1, s\<^sub>2, p, h, \i \ {..1} \ {..2}. f2_hash p (h i) x + sketch i)" + +fun f2_result :: "f2_state \ rat pmf" where + "f2_result (s\<^sub>1, s\<^sub>2, p, h, sketch) = + return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {..2}. + (\i\<^sub>1\{..1} . (rat_of_int (sketch (i\<^sub>1, i\<^sub>2)))\<^sup>2) / (((rat_of_nat p)\<^sup>2-1) * rat_of_nat s\<^sub>1)))" + +fun f2_space_usage :: "(nat \ nat \ rat \ rat) \ real" where + "f2_space_usage (n, m, \, \) = ( + let s\<^sub>1 = nat \6 / \\<^sup>2 \ in + let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in + 3 + + 2 * log 2 (s\<^sub>1 + 1) + + 2 * log 2 (s\<^sub>2 + 1) + + 2 * log 2 (9 + 2 * real n) + + s\<^sub>1 * s\<^sub>2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))" + +definition encode_f2_state :: "f2_state \ bool list option" where + "encode_f2_state = + N\<^sub>e \\<^sub>e (\s\<^sub>1. + N\<^sub>e \\<^sub>e (\s\<^sub>2. + N\<^sub>e \\<^sub>e (\p. + (List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) \\<^sub>e + (List.product [0..1] [0..2] \\<^sub>e I\<^sub>e))))" + +lemma "inj_on encode_f2_state (dom encode_f2_state)" +proof - + have " is_encoding encode_f2_state" + unfolding encode_f2_state_def + by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding) + + thus ?thesis + by (rule encoding_imp_inj) +qed + +context + fixes \ \ :: rat + fixes n :: nat + fixes as :: "nat list" + fixes result + assumes \_range: "\ \ {0<..<1}" + assumes \_range: "\ > 0" + assumes as_range: "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" +begin + +private definition s\<^sub>1 where "s\<^sub>1 = nat \6 / \\<^sup>2\" + +lemma s1_gt_0: "s\<^sub>1 > 0" + using \_range by (simp add:s\<^sub>1_def) + +private definition s\<^sub>2 where "s\<^sub>2 = nat \-(18* ln (real_of_rat \))\" + +lemma s2_gt_0: "s\<^sub>2 > 0" + using \_range by (simp add:s\<^sub>2_def) + +private definition p where "p = prime_above (max n 3)" + +lemma p_prime: "Factorial_Ring.prime p" + unfolding p_def using prime_above_prime by blast + +lemma p_ge_3: "p \ 3" + unfolding p_def by (meson max.boundedE prime_above_lower_bound) + +lemma p_gt_0: "p > 0" using p_ge_3 by linarith + +lemma p_gt_1: "p > 1" using p_ge_3 by simp + +lemma p_ge_n: "p \ n" unfolding p_def + by (meson max.boundedE prime_above_lower_bound ) + +interpretation carter_wegman_hash_family "mod_ring p" 4 + using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite] + using p_prime by auto + +definition sketch where "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" +private definition \ where"\ = prod_pmf ({..1} \ {..2}) (\_. pmf_of_set space)" +private definition \\<^sub>p where"\\<^sub>p = measure_pmf \" +private definition sketch_rv where "sketch_rv \ = of_int (sum_list (map (f2_hash p \) as))^2" +private definition mean_rv where "mean_rv \ = (\i\<^sub>2. (\i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) / (((of_nat p)\<^sup>2 - 1) * of_nat s\<^sub>1))" +private definition result_rv where "result_rv \ = median s\<^sub>2 (\i\<^sub>2\{..2}. mean_rv \ i\<^sub>2)" + +lemma mean_rv_alg_sketch: + "sketch = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" +proof - + have "sketch = fold (\a state. state \ f2_update a) as (f2_init \ \ n)" + by (simp add:sketch_def) + also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, + \i \ {..1} \ {..2}. sum_list (map (f2_hash p (\ i)) as)))" + proof (induction as rule:rev_induct) + case Nil + then show ?case + by (simp add:s\<^sub>1_def s\<^sub>2_def space_def p_def[symmetric] \_def restrict_def Let_def) + next + case (snoc a as) + have "fold (\a state. state \ f2_update a) (as @ [a]) (f2_init \ \ n) = \ \ + (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \s \ {..1} \ {..2}. (\x \ as. f2_hash p (\ s) x)) \ f2_update a)" + using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps) + also have "... = \ \ (\\. return_pmf (s\<^sub>1, s\<^sub>2, p, \, \i \ {..1} \ {..2}. (\x \ as@[a]. f2_hash p (\ i) x)))" + by (subst bind_return_pmf) (simp add: add.commute del:f2_hash.simps cong:restrict_cong) + finally show ?case by blast + qed + finally show ?thesis by auto +qed + +lemma distr: "result = map_pmf result_rv \" +proof - + have "result = sketch \ f2_result" + by (simp add:result_def sketch_def) + also have "... = \ \ (\x. f2_result (s\<^sub>1, s\<^sub>2, p, x, \i\{..1} \ {..2}. sum_list (map (f2_hash p (x i)) as)))" + by (simp add: mean_rv_alg_sketch bind_assoc_pmf bind_return_pmf) + also have "... = map_pmf result_rv \" + by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong) + finally show ?thesis by simp +qed + +private lemma f2_hash_pow_exp: + assumes "k < p" + shows + "expectation (\\. real_of_int (f2_hash p \ k) ^m) = + ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" +proof - + + have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp + then obtain t where t_def: "p=2*t+1" + using oddE by blast + + have "Collect even \ {..<2 * t + 1} \ (*) 2 ` {.. Collect even \ {..<2 * t + 1}" + by (rule image_subsetI, simp) + ultimately have "card ({k. even k} \ {..x. 2*x) ` {.. {.. {... hash k \ \ Collect even} = (real p + 1)/(2*real p)" + using assms by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def) + + have "p = card {.. {.. ({k. even k} \ {.. {.. {.. {.. {.. {... hash k \ \ Collect odd} = (real p - 1)/(2*real p)" + using assms by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def) + + have "expectation (\x. real_of_int (f2_hash p x k) ^ m) = + expectation (\\. indicator {\. even (hash k \)} \ * (real p - 1)^m + + indicator {\. odd (hash k \)} \ * (-real p - 1)^m)" + by (rule Bochner_Integration.integral_cong, simp, simp) + also have "... = + prob {\. hash k \ \ Collect even} * (real p - 1) ^ m + + prob {\. hash k \ \ Collect odd} * (-real p - 1) ^ m " + by (simp, simp add:M_def) + also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)" + by (subst prob_even, subst prob_odd, simp) + also have "... = + ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" + by (simp add:add_divide_distrib ac_simps) + finally show "expectation (\x. real_of_int (f2_hash p x k) ^ m) = + ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp +qed + +lemma + shows var_sketch_rv:"variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" (is "?A") + and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)\<^sup>2-1)" (is "?B") +proof - + define h where "h = (\\ x. real_of_int (f2_hash p \ x))" + define c where "c = (\x. real (count_list as x))" + define r where "r = (\(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))" + define h_prod where "h_prod = (\as \. prod_list (map (h \) as))" + + define exp_h_prod :: "nat list \ real" where "exp_h_prod = (\as. (\i \ set as. r (count_list as i)))" + + have f_eq: "sketch_rv = (\\. (\x \ set as. c x * h \ x)^2)" + by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps) + + have r_one: "r (Suc 0) = 0" + by (simp add:r_def algebra_simps) + + have r_two: "r 2 = (real p^2-1)" + using p_gt_0 unfolding r_def power2_eq_square + by (simp add:nonzero_divide_eq_eq, simp add:algebra_simps) + + have"(real p)^2 \ 2^2" + by (rule power_mono, use p_gt_1 in linarith, simp) + hence p_square_ge_4: "(real p)\<^sup>2 \ 4" by simp + + have "r 4 = (real p)^4+2*(real p)\<^sup>2 - 3" + using p_gt_0 unfolding r_def + by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps) + also have "... \ (real p)^4+2*(real p)\<^sup>2 + 3" + by simp + also have "... \ 3 * r 2 * r 2" + using p_square_ge_4 + by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono) + finally have r_four_est: "r 4 \ 3 * r 2 * r 2" by simp + + have exp_h_prod_elim: "exp_h_prod = (\as. prod_list (map (r \ count_list as) (remdups as)))" + by (simp add:exp_h_prod_def prod.set_conv_list[symmetric]) + + have exp_h_prod: "\x. set x \ set as \ length x \ 4 \ expectation (h_prod x) = exp_h_prod x" + proof - + fix x + assume "set x \ set as" + hence x_sub_p: "set x \ {..k. k \ set x \ k < p" by auto + assume "length x \ 4" + hence card_x: "card (set x) \ 4" using card_length dual_order.trans by blast + + have "set x \ carrier (mod_ring p) " + using x_sub_p by (simp add:mod_ring_def) + + hence h_indep: "indep_vars (\_. borel) (\i \. h \ i ^ count_list x i) (set x)" + using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def + by (auto intro:indep_vars_compose2[where X="hash" and M'=" (\_. discrete)"]) + + have "expectation (h_prod x) = expectation (\\. \ i \ set x. h \ i^(count_list x i))" + by (simp add:h_prod_def prod_list_eval) + also have "... = (\i \ set x. expectation (\\. h \ i^(count_list x i)))" + by (simp add: indep_vars_lebesgue_integral[OF _ h_indep]) + also have "... = (\i \ set x. r (count_list x i))" + using f2_hash_pow_exp x_le_p + by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps) + also have "... = exp_h_prod x" + by (simp add:exp_h_prod_def) + finally show "expectation (h_prod x) = exp_h_prod x" by simp + qed + + have "\x y. kernel_of x = kernel_of y \ exp_h_prod x = exp_h_prod y" + proof - + fix x y :: "nat list" + assume a:"kernel_of x = kernel_of y" + then obtain f where b:"bij_betw f (set x) (set y)" and c:"\z. z \ set x \ count_list x z = count_list y (f z)" + using kernel_of_eq_imp_bij by blast + have "exp_h_prod x = prod ( (\i. r(count_list y i)) \ f) (set x)" + by (simp add:exp_h_prod_def c) + also have "... = (\i \ f ` (set x). r(count_list y i))" + by (metis b bij_betw_def prod.reindex) + also have "... = exp_h_prod y" + unfolding exp_h_prod_def + by (rule prod.cong, metis b bij_betw_def) simp + finally show "exp_h_prod x = exp_h_prod y" by simp + qed + + hence exp_h_prod_cong: "\p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p = + of_bool (kernel_of x = kernel_of p) * exp_h_prod x" + by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left) + + have c:"(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r" + if a:"length xs = n" for xs :: "nat list" and n and r :: real + proof - + have "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)" + using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def) + thus "(\p\enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)" + by (simp add:sum_list_mult_const) + qed + + have "expectation sketch_rv = (\i\set as. (\j\set as. c i * c j * expectation (h_prod [i,j])))" + by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) + also have "... = (\i\set as. (\j\set as. c i * c j * exp_h_prod [i,j]))" + by (simp add:exp_h_prod) + also have "... = (\i \ set as. (\j \ set as. + c i * c j * (sum_list (map (\p. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))" + by (subst exp_h_prod_cong, simp add:c) + also have "... = (\i\set as. c i * c i * r 2)" + by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse) + also have "... = real_of_rat (F 2 as) * ((real p)^2-1)" + by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two) + finally show b:?B by simp + + have "expectation (\x. (sketch_rv x)\<^sup>2) = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. + c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))" + by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps) + also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. + c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))" + by (simp add:exp_h_prod) + also have "... = (\i1 \ set as. (\i2 \ set as. (\i3 \ set as. (\i4 \ set as. + c i1 * c i2 * c i3 * c i4 * + (sum_list (map (\p. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))" + by (subst exp_h_prod_cong, simp add:c) + also have "... = + 3 * (\i \ set as. (\j \ set as. c i^2 * c j^2 * r 2 * r 2)) + ((\ i \ set as. c i^4 * r 4) - 3 * (\ i \ set as. c i ^ 4 * r 2 * r 2))" + apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one) (* large intermediate terms *) + apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute) + apply (simp add: algebra_simps sum_subtractf sum_collapse) + by (simp add: sum_distrib_left algebra_simps) + also have "... = 3 * (\i \ set as. c i^2 * r 2)^2 + (\ i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" + by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf) + also have "... = 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))" + by (simp add:power_mult_distrib sum_distrib_right[symmetric]) + also have "... \ 3 * (\i \ set as. c i^2)^2 * (r 2)^2 + (\i \ set as. c i ^ 4 * 0)" + using r_four_est + by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos) + also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" + by (simp add:c_def r_two F_def of_rat_sum of_rat_power) + finally have "expectation (\x. (sketch_rv x)\<^sup>2) \ 3 * (real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" + by simp + + thus "variance sketch_rv \ 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2" + by (simp add: variance_eq, simp add:power_mult_distrib b) +qed + +lemma space_omega_1 [simp]: "Sigma_Algebra.space \\<^sub>p = UNIV" + by (simp add:\\<^sub>p_def) + +interpretation \: prob_space "\\<^sub>p" + by (simp add:\\<^sub>p_def prob_space_measure_pmf) + +lemma integrable_\: + fixes f :: "((nat \ nat) \ (nat list)) \ real" + shows "integrable \\<^sub>p f" + unfolding \\<^sub>p_def \_def + by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf) + +lemma sketch_rv_exp: + assumes "i\<^sub>2 < s\<^sub>2" + assumes "i\<^sub>1 \ {0..1}" + shows "\.expectation (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) = real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)" +proof - + have "\.expectation (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2))) :: real) = expectation sketch_rv" + using integrable_\ integrable_M assms + unfolding \_def \\<^sub>p_def M_def + by (subst expectation_Pi_pmf_slice, auto) + also have "... = (real_of_rat (F 2 as)) * ((real p)\<^sup>2 - 1)" + using exp_sketch_rv by simp + finally show ?thesis by simp +qed + +lemma sketch_rv_var: + assumes "i\<^sub>2 < s\<^sub>2" + assumes "i\<^sub>1 \ {0..1}" + shows "\.variance (\\. sketch_rv (\ (i\<^sub>1, i\<^sub>2))) \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" +proof - + have "\.variance (\\. (sketch_rv (\ (i\<^sub>1, i\<^sub>2)) :: real)) = variance sketch_rv" + using integrable_\ integrable_M assms + unfolding \_def \\<^sub>p_def M_def + by (subst variance_prod_pmf_slice, auto) + also have "... \ 2 * (real_of_rat (F 2 as))\<^sup>2 * ((real p)\<^sup>2 - 1)\<^sup>2" + using var_sketch_rv by simp + finally show ?thesis by simp +qed + +lemma mean_rv_exp: + assumes "i < s\<^sub>2" + shows "\.expectation (\\. mean_rv \ i) = real_of_rat (F 2 as)" +proof - + have a:"(real p)\<^sup>2 > 1" using p_gt_1 by simp + + have "\.expectation (\\. mean_rv \ i) = (\i\<^sub>1 = 0..1. \.expectation (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" + using assms integrable_\ by (simp add:mean_rv_def) + also have "... = (\i\<^sub>1 = 0..1. real_of_rat (F 2 as) * ((real p)\<^sup>2 - 1)) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)" + using sketch_rv_exp[OF assms] by simp + also have "... = real_of_rat (F 2 as)" + using s1_gt_0 a by simp + finally show ?thesis by simp +qed + +lemma mean_rv_var: + assumes "i < s\<^sub>2" + shows "\.variance (\\. mean_rv \ i) \ (real_of_rat (\ * F 2 as))\<^sup>2 / 3" +proof - + have a: "\.indep_vars (\_. borel) (\i\<^sub>1 x. sketch_rv (x (i\<^sub>1, i))) {0..1}" + using assms + unfolding \\<^sub>p_def \_def + by (intro indep_vars_restrict_intro'[where f="fst"]) + (auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0) + + have p_sq_ne_1: "(real p)^2 \ 1" + by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff) + + have s1_bound: " 6 / (real_of_rat \)\<^sup>2 \ real s\<^sub>1" + unfolding s\<^sub>1_def + by (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge) + + have "\.variance (\\. mean_rv \ i) = \.variance (\\. \i\<^sub>1 = 0..1. sketch_rv (\ (i\<^sub>1, i))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" + unfolding mean_rv_def by (subst \.variance_divide[OF integrable_\], simp) + also have "... = (\i\<^sub>1 = 0..1. \.variance (\\. sketch_rv (\ (i\<^sub>1, i)))) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" + by (subst \.var_sum_all_indep[OF _ _ integrable_\ a]) (auto simp: \_def \\<^sub>p_def) + also have "... \ (\i\<^sub>1 = 0..1. 2*(real_of_rat (F 2 as)^2) * ((real p)\<^sup>2-1)\<^sup>2) / (((real p)\<^sup>2 - 1) * real s\<^sub>1)\<^sup>2" + by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto) + also have "... = 2 * (real_of_rat (F 2 as)^2) / real s\<^sub>1" + using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square) + also have "... \ 2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat \)\<^sup>2)" + using s1_gt_0 \_range by (intro divide_left_mono mult_pos_pos s1_bound) auto + also have "... = (real_of_rat (\ * F 2 as))\<^sup>2 / 3" + by (simp add:of_rat_mult algebra_simps) + finally show ?thesis by simp +qed + +lemma mean_rv_bounds: + assumes "i < s\<^sub>2" + shows "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ 1/3" +proof (cases "as = []") + case True + then show ?thesis + using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def) +next + case False + hence "F 2 as > 0" using F_gr_0 by auto + + hence a: "0 < real_of_rat (\ * F 2 as)" + using \_range by simp + have [simp]: "(\\. mean_rv \ i) \ borel_measurable \\<^sub>p" + by (simp add:\_def \\<^sub>p_def) + have "\.prob {\. real_of_rat \ * real_of_rat (F 2 as) < \mean_rv \ i - real_of_rat (F 2 as)\} \ + \.prob {\. real_of_rat (\ * F 2 as) \ \mean_rv \ i - real_of_rat (F 2 as)\}" + by (rule \.pmf_mono[OF \\<^sub>p_def], simp add:of_rat_mult) + also have "... \ \.variance (\\. mean_rv \ i) / (real_of_rat (\ * F 2 as))\<^sup>2" + using \.Chebyshev_inequality[where a="real_of_rat (\ * F 2 as)" and f="\\. mean_rv \ i",simplified] + a prob_space_measure_pmf[where p="\"] mean_rv_exp[OF assms] integrable_\ by simp + also have "... \ ((real_of_rat (\ * F 2 as))\<^sup>2/3) / (real_of_rat (\ * F 2 as))\<^sup>2" + by (rule divide_right_mono, rule mean_rv_var[OF assms], simp) + also have "... = 1/3" using a by force + finally show ?thesis by blast +qed + +lemma f2_alg_correct': + "\

(\ in measure_pmf result. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" +proof - + have a: "\.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" + using s1_gt_0 unfolding \\<^sub>p_def \_def + by (intro indep_vars_restrict_intro'[where f="snd"]) + (auto simp: \\<^sub>p_def \_def mean_rv_def restrict_dfl_def) + + have b: "- 18 * ln (real_of_rat \) \ real s\<^sub>2" + unfolding s\<^sub>2_def using of_nat_ceiling by auto + + have "1 - of_rat \ \ \.prob {\. \median s\<^sub>2 (mean_rv \) - real_of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" + using \_range \.median_bound_2[OF _ a b, where \="real_of_rat \ * real_of_rat (F 2 as)" + and \="real_of_rat (F 2 as)"] mean_rv_bounds + by simp + also have "... = \.prob {\. \real_of_rat (result_rv \) - of_rat (F 2 as) \ \ of_rat \ * of_rat (F 2 as)}" + by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0] + mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power) + also have "... = \.prob {\. \result_rv \ - F 2 as\ \ \ * F 2 as} " + by (simp add:of_rat_less_eq of_rat_mult[symmetric] of_rat_diff[symmetric] set_eq_iff) + finally have "\.prob {y. \result_rv y - F 2 as\ \ \ * F 2 as} \ 1-of_rat \ " by simp + thus ?thesis by (simp add: distr \\<^sub>p_def) +qed + +lemma f2_exact_space_usage': + "AE \ in sketch . bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" +proof - + have "p \ 2 * max n 3 + 2" + by (subst p_def, rule prime_above_upper_bound) + also have "... \ 2 * n + 8" + by (cases "n \ 2", simp_all) + finally have p_bound: "p \ 2 * n + 8" + by simp + have "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (real p + 1) + 1)" + by (rule exp_golomb_bit_count) + also have "... \ ereal (2 * log 2 (2 * real n + 9) + 1)" + using p_bound by simp + finally have p_bit_count: "bit_count (N\<^sub>e p) \ ereal (2 * log 2 (2 * real n + 9) + 1)" + by simp + + have a: "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. + sum_list (map (f2_hash p (y i)) as))) \ ereal (f2_space_usage (n, length as, \, \))" + if a:"y\{..1} \ {..2} \\<^sub>E bounded_degree_polynomials (mod_ring p) 4" for y + proof - + have "y \ extensional ({..1} \ {..2})" using a PiE_iff by blast + hence y_ext: "y \ extensional (set (List.product [0..1] [0..2]))" + by (simp add:lessThan_atLeast0) + + have h_bit_count_aux: "bit_count (P\<^sub>e p 4 (y x)) \ ereal (4 + 4 * log 2 (8 + 2 * real n))" + if b:"x \ set (List.product [0..1] [0..2])" for x + proof - + have "y x \ bounded_degree_polynomials (Field.mod_ring p) 4" + using b a by force + hence "bit_count (P\<^sub>e p 4 (y x)) \ ereal ( real 4 * (log 2 (real p) + 1))" + by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] ) + also have "... \ ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )" + using p_gt_0 p_bound by simp + also have "... \ ereal (4 + 4 * log 2 (8 + 2 * real n))" + by simp + finally show ?thesis + by blast + qed + + have h_bit_count: + "bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) \ ereal (real s\<^sub>1 * real s\<^sub>2 * (4 + 4 * log 2 (8 + 2 * real n)))" + using fun_bit_count_est[where e="P\<^sub>e p 4", OF y_ext h_bit_count_aux] + by simp + + have sketch_bit_count_aux: + "bit_count (I\<^sub>e (sum_list (map (f2_hash p (y x)) as))) \ ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs \ ?rhs") + if " x \ {0..1} \ {0..2}" for x + proof - + have "\sum_list (map (f2_hash p (y x)) as)\ \ sum_list (map (abs \ (f2_hash p (y x))) as)" + by (subst map_map[symmetric]) (rule sum_list_abs) + also have "... \ sum_list (map (\_. (int p+1)) as)" + by (rule sum_list_mono) (simp add:p_gt_0) + also have "... = int (length as) * (int p+1)" + by (simp add: sum_list_triv) + also have "... \ int (length as) * (9+2*(int n))" + using p_bound by (intro mult_mono, auto) + finally have "\sum_list (map (f2_hash p (y x)) as)\ \ int (length as) * (9 + 2 * int n)" by simp + hence "?lhs \ ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)" + by (rule int_bit_count_est) + also have "... = ?rhs" by (simp add:algebra_simps) + finally show "?thesis" by simp + qed + + have + "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) + \ ereal (real (length (List.product [0..1] [0..2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" + by (intro fun_bit_count_est) + (simp_all add:extensional_def lessThan_atLeast0 sketch_bit_count_aux del:f2_hash.simps) + also have "... = ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" + by simp + finally have sketch_bit_count: + "bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ + ereal (real s\<^sub>1 * real s\<^sub>2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp + + have "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ + bit_count (N\<^sub>e s\<^sub>1) + bit_count (N\<^sub>e s\<^sub>2) +bit_count (N\<^sub>e p) + + bit_count ((List.product [0..1] [0..2] \\<^sub>e P\<^sub>e p 4) y) + + bit_count ((List.product [0..1] [0..2] \\<^sub>e I\<^sub>e) (\i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as)))" + by (simp add:Let_def s\<^sub>1_def s\<^sub>2_def encode_f2_state_def dependent_bit_count add.assoc) + also have "... \ ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) + + (ereal (real s\<^sub>1 * real s\<^sub>2) * (4 + 4 * log 2 (8 + 2 * real n))) + + (ereal (real s\<^sub>1 * real s\<^sub>2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))" + by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count) + also have "... = ereal (f2_space_usage (n, length as, \, \))" + by (simp add:distrib_left add.commute s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) + finally show "bit_count (encode_f2_state (s\<^sub>1, s\<^sub>2, p, y, \i\{..1} \ {..2}. sum_list (map (f2_hash p (y i)) as))) \ + ereal (f2_space_usage (n, length as, \, \))" + by simp + qed + + have "set_pmf \ = {..1} \ {..2} \\<^sub>E bounded_degree_polynomials (Field.mod_ring p) 4" + by (simp add: \_def set_prod_pmf) (simp add: space_def) + thus ?thesis + by (simp add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a) +qed + +end + + +text \Main results of this section:\ + +theorem f2_alg_correct: + assumes "\ \ {0<..<1}" + assumes "\ > 0" + assumes "set as \ {.. \ fold (\a state. state \ f2_update a) as (f2_init \ \ n) \ f2_result" + shows "\

(\ in measure_pmf \. \\ - F 2 as\ \ \ * F 2 as) \ 1-of_rat \" + using f2_alg_correct'[OF assms(1,2,3)] \_def by auto + +theorem f2_exact_space_usage: + assumes "\ \ {0<..<1}" + assumes "\ > 0" + assumes "set as \ {.. fold (\a state. state \ f2_update a) as (f2_init \ \ n)" + shows "AE \ in M. bit_count (encode_f2_state \) \ f2_space_usage (n, length as, \, \)" + using f2_exact_space_usage'[OF assms(1,2,3)] + by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp) + +theorem f2_asympotic_space_complexity: + "f2_space_usage \ O[at_top \\<^sub>F at_top \\<^sub>F at_right 0 \\<^sub>F at_right 0](\ (n, m, \, \). + (ln (1 / of_rat \)) / (of_rat \)\<^sup>2 * (ln (real n) + ln (real m)))" + (is "_ \ O[?F](?rhs)") +proof - + define n_of :: "nat \ nat \ rat \ rat \ nat" where "n_of = (\(n, m, \, \). n)" + define m_of :: "nat \ nat \ rat \ rat \ nat" where "m_of = (\(n, m, \, \). m)" + define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" + define \_of :: "nat \ nat \ rat \ rat \ rat" where "\_of = (\(n, m, \, \). \)" + + define g where "g = (\x. (1/ (of_rat (\_of x))\<^sup>2) * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" + + have evt: "(\x. + 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ + 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ + real (n_of x) \ n \ real (m_of x) \ m\ P x) + \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") + for \ \ n m P + apply (rule eventually_mono[where P="?prem" and Q="P"]) + apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def m_of_def) + apply (intro eventually_conj eventually_prod1' eventually_prod2' + sequentially_inf eventually_at_right_less inv_at_right_0_inf) + by (auto simp add:prod_filter_eq_bot) + + have unit_1: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + using one_le_power + by (intro landau_o.big_mono evt[where \="1"], auto simp add:power_one_over[symmetric]) + + have unit_2: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_o.big_mono evt[where \="exp 1"]) + (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff) + + have unit_3: "(\_. 1) \ O[?F](\x. real (n_of x))" + by (intro landau_o.big_mono evt, auto) + + have unit_4: "(\_. 1) \ O[?F](\x. real (m_of x))" + by (intro landau_o.big_mono evt, auto) + + have unit_5: "(\_. 1) \ O[?F](\x. ln (real (n_of x)))" + by (auto intro!: landau_o.big_mono evt[where n="exp 1"]) + (metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans) + + have unit_6: "(\_. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_1 evt unit_5 iffD2[OF ln_ge_iff], auto) + + have unit_7: "(\_. 1) \ O[?F](\x. 1 / real_of_rat (\_of x))" + by (intro landau_o.big_mono evt[where \="1"], auto) + + have unit_8: "(\_. 1) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6) + + have unit_9: "(\_. 1) \ O[?F](\x. real (n_of x) * real (m_of x))" + by (intro landau_o.big_mult_1 unit_3 unit_4) + + have " (\x. 6 * (1 / (real_of_rat (\_of x))\<^sup>2)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + by (subst landau_o.big.cmult_in_iff, simp_all) + hence l1: "(\x. real (nat \6 / (\_of x)\<^sup>2\)) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + by (intro landau_real_nat landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power) + + have "(\x. - ( ln (real_of_rat (\_of x)))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_o.big_mono evt) (subst ln_div, auto) + hence l2: "(\x. real (nat \- (18 * ln (real_of_rat (\_of x)))\)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_real_nat landau_ceil[OF unit_2], simp) + + have l3_aux: " (\x. real (m_of x) * (18 + 4 * real (n_of x)) + 1) \ O[?F](\x. real (n_of x) * real (m_of x))" + by (rule sum_in_bigo[OF _unit_9], subst mult.commute) + (intro landau_o.mult sum_in_bigo, auto simp:unit_3) + + have "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x) * real (m_of x)))" + apply (rule landau_ln_2[where a="2"], simp, simp) + apply (rule evt[where m="2" and n="1"]) + apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1)) + using l3_aux by simp + also have "(\x. ln (real (n_of x) * real (m_of x))) \ O[?F](\x. ln (real (n_of x)) + ln(real (m_of x)))" + by (intro landau_o.big_mono evt[where m="1" and n="1"], auto simp add:ln_mult) + finally have l3: "(\x. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + using landau_o.big_trans by simp + + have l4: "(\x. ln (8 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) + (auto intro!: sum_in_bigo simp add:unit_3) + + have l5: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_1 evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) + (auto intro!: sum_in_bigo simp add:unit_3) + + have l6: "(\x. ln (real (nat \6 / (\_of x)\<^sup>2\) + 1)) \ O[?F](g)" + unfolding g_def + by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp) + + have l7: "(\x. ln (9 + 2 * real (n_of x))) \ O[?F](g)" + unfolding g_def + by (intro landau_o.big_mult_1' unit_1 unit_2 l5) + + have l8: "(\x. ln (real (nat \- (18 * ln (real_of_rat (\_of x)))\) + 1) ) \ O[?F](g)" + unfolding g_def + by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp + + have l9: "(\x. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2) + \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro sum_in_bigo, auto simp: l3 l4 unit_6) + + have l10: "(\x. real (nat \6 / (\_of x)\<^sup>2\) * real (nat \- (18 * ln (real_of_rat (\_of x)))\) * + (5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)) + \ O[?F](g)" + unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9) + + have "f2_space_usage = (\x. f2_space_usage (n_of x, m_of x, \_of x, \_of x))" + by (simp add:case_prod_beta' n_of_def \_of_def \_of_def m_of_def) + also have "... \ O[?F](g)" + by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8) + also have "... = O[?F](?rhs)" + by (simp add:case_prod_beta' g_def n_of_def \_of_def \_of_def m_of_def) + finally show ?thesis by simp +qed + +end diff --git a/thys/Frequency_Moments/Frequency_Moment_k.thy b/thys/Frequency_Moments/Frequency_Moment_k.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Frequency_Moment_k.thy @@ -0,0 +1,1015 @@ +section \Frequency Moment $k$\ + +theory Frequency_Moment_k + imports + Frequency_Moments + Landau_Ext + Lp.Lp + Median_Method.Median + Product_PMF_Ext +begin + +text \This section contains a formalization of the algorithm for the $k$-th frequency moment. +It is based on the algorithm described in \cite[\textsection 2.1]{alon1999}.\ + +type_synonym fk_state = "nat \ nat \ nat \ nat \ (nat \ nat \ (nat \ nat))" + +fun fk_init :: "nat \ rat \ rat \ nat \ fk_state pmf" where + "fk_init k \ \ n = + do { + let s\<^sub>1 = nat \3 * real k * n powr (1-1/real k) / (real_of_rat \)\<^sup>2\; + let s\<^sub>2 = nat \-18 * ln (real_of_rat \)\; + return_pmf (s\<^sub>1, s\<^sub>2, k, 0, (\_ \ {0..1} \ {0..2}. (0,0))) + }" + +fun fk_update :: "nat \ fk_state \ fk_state pmf" where + "fk_update a (s\<^sub>1, s\<^sub>2, k, m, r) = + do { + coins \ prod_pmf ({0..1} \ {0..2}) (\_. bernoulli_pmf (1/(real m+1))); + return_pmf (s\<^sub>1, s\<^sub>2, k, m+1, \i \ {0..1} \ {0..2}. + if coins i then + (a,0) + else ( + let (x,l) = r i in (x, l + of_bool (x=a)) + ) + ) + }" + +fun fk_result :: "fk_state \ rat pmf" where + "fk_result (s\<^sub>1, s\<^sub>2, k, m, r) = + return_pmf (median s\<^sub>2 (\i\<^sub>2 \ {0..2}. + (\i\<^sub>1\{0..1}. rat_of_nat (let t = snd (r (i\<^sub>1, i\<^sub>2)) + 1 in m * (t^k - (t - 1)^k))) / (rat_of_nat s\<^sub>1)) + )" + +lemma bernoulli_pmf_1: "bernoulli_pmf 1 = return_pmf True" + by (rule pmf_eqI, simp add:indicator_def) + +fun fk_space_usage :: "(nat \ nat \ nat \ rat \ rat) \ real" where + "fk_space_usage (k, n, m, \, \) = ( + let s\<^sub>1 = nat \3*real k* (real n) powr (1-1/ real k) / (real_of_rat \)\<^sup>2 \ in + let s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\ in + 4 + + 2 * log 2 (s\<^sub>1 + 1) + + 2 * log 2 (s\<^sub>2 + 1) + + 2 * log 2 (real k + 1) + + 2 * log 2 (real m + 1) + + s\<^sub>1 * s\<^sub>2 * (2 + 2 * log 2 (real n+1) + 2 * log 2 (real m+1)))" + +definition encode_fk_state :: "fk_state \ bool list option" where + "encode_fk_state = + N\<^sub>e \\<^sub>e (\s\<^sub>1. + N\<^sub>e \\<^sub>e (\s\<^sub>2. + N\<^sub>e \\<^sub>e + N\<^sub>e \\<^sub>e + (List.product [0..1] [0..2] \\<^sub>e (N\<^sub>e \\<^sub>e N\<^sub>e))))" + +lemma "inj_on encode_fk_state (dom encode_fk_state)" +proof - + have "is_encoding encode_fk_state" + by (simp add:encode_fk_state_def) + (intro dependent_encoding exp_golomb_encoding fun_encoding) + + thus ?thesis by (rule encoding_imp_inj) +qed + +text \This is an intermediate non-parallel form @{term "fk_update"} used only in the correctness proof.\ + +fun fk_update_2 :: "'a \ (nat \ 'a \ nat) \ (nat \ 'a \ nat) pmf" where + "fk_update_2 a (m,x,l) = + do { + coin \ bernoulli_pmf (1/(real m+1)); + return_pmf (m+1,if coin then (a,0) else (x, l + of_bool (x=a))) + }" + +definition sketch where "sketch as i = (as ! i, count_list (drop (i+1) as) (as ! i))" + +lemma fk_update_2_distr: + assumes "as \ []" + shows "fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0)) = + pmf_of_set {.. (\k. return_pmf (length as, sketch as k))" + using assms +proof (induction as rule:rev_nonempty_induct) + case (single x) + show ?case using single + by (simp add:bind_return_pmf pmf_of_set_singleton bernoulli_pmf_1 lessThan_def sketch_def) +next + case (snoc x xs) + let ?h = "(\xs k. count_list (drop (Suc k) xs) (xs ! k))" + let ?q = "(\xs k. (length xs, sketch xs k))" + + have non_empty: " {.. {}" " {.. {}" using snoc by auto + + have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" + by auto + + have "pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ + (\coin. return_pmf (if coin then length xs else k))) = + bernoulli_pmf (1 / (real (length xs) + 1)) \ (\y. pmf_of_set {.. + (\k. return_pmf (if y then length xs else k)))" + by (subst bind_commute_pmf, simp) + also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ + (\coin. return_pmf (if coin then length xs else k))) = pmf_of_set {..x s. (s \ fk_update_2 x)) (xs@[x]) (return_pmf (0,0,0)) = + (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ fk_update_2 x" + using snoc by (simp add:case_prod_beta') + also have "... = (pmf_of_set {.. (\k. return_pmf (length xs, sketch xs k))) \ + (\(m,a,l). bernoulli_pmf (1 / (real m + 1)) \ (\coin. + return_pmf (m + 1, if coin then (x, 0) else (a, (l + of_bool (a = x))))))" + by (subst fk_update_2_eta, subst fk_update_2.simps, simp add:case_prod_beta') + also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ + (\coin. return_pmf (length xs + 1, if coin then (x, 0) else (xs ! k, ?h xs k + of_bool (xs ! k = x)))))" + by (subst bind_assoc_pmf, simp add: bind_return_pmf sketch_def) + also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ + (\coin. return_pmf (if coin then length xs else k) \ (\k'. return_pmf (?q (xs@[x]) k'))))" + using non_empty + by (intro bind_pmf_cong, auto simp add:bind_return_pmf nth_append count_list_append sketch_def) + also have "... = pmf_of_set {.. (\k. bernoulli_pmf (1 / (real (length xs) + 1)) \ + (\coin. return_pmf (if coin then length xs else k))) \ (\k'. return_pmf (?q (xs@[x]) k'))" + by (subst bind_assoc_pmf, subst bind_assoc_pmf, simp) + also have "... = pmf_of_set {.. (\k'. return_pmf (?q (xs@[x]) k'))" + by (subst b, simp) + finally show ?case by simp +qed + +context + fixes \ \ :: rat + fixes n k :: nat + fixes as + assumes k_ge_1: "k \ 1" + assumes \_range: "\ \ {0<..<1}" + assumes \_range: "\ > 0" + assumes as_range: "set as \ {..1 where "s\<^sub>1 = nat \3 * real k * (real n) powr (1-1/real k) / (real_of_rat \)\<^sup>2\" +definition s\<^sub>2 where "s\<^sub>2 = nat \-(18 * ln (real_of_rat \))\" + +definition "M\<^sub>1 = {(u, v). v < count_list as u}" +definition "\\<^sub>1 = measure_pmf (pmf_of_set M\<^sub>1)" + +definition "M\<^sub>2 = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set M\<^sub>1)" +definition "\\<^sub>2 = measure_pmf M\<^sub>2" + +interpretation prob_space "\\<^sub>1" + unfolding \\<^sub>1_def by (simp add:prob_space_measure_pmf) + +interpretation \\<^sub>2:prob_space "\\<^sub>2" + unfolding \\<^sub>2_def by (simp add:prob_space_measure_pmf) + +lemma split_space: "(\a\M\<^sub>1. f (snd a)) = (\u \ set as. (\v \{0..u. {u} \ {v. v < count_list as u})" + + have a: "inj_on snd (A x)" for x + by (simp add:A_def inj_on_def) + + have "\u v. u < count_list as v \ v \ set as" + by (subst count_list_gr_1, force) + hence "M\<^sub>1 = \ (A ` set as)" + by (auto simp add:set_eq_iff A_def M\<^sub>1_def) + hence "(\a\M\<^sub>1. f (snd a)) = sum (f \ snd) (\ (A ` set as))" + by (intro sum.cong, auto) + also have "... = sum (\x. sum (f \ snd) (A x)) (set as)" + by (rule sum.UNION_disjoint, simp, simp add:A_def, simp add:A_def, blast) + also have "... = sum (\x. sum f (snd ` A x)) (set as)" + by (intro sum.cong, auto simp add:sum.reindex[OF a]) + also have "... = (\u \ set as. (\v \{0.. []" + shows fin_space: "finite M\<^sub>1" + and non_empty_space: "M\<^sub>1 \ {}" + and card_space: "card M\<^sub>1 = length as" +proof - + have "M\<^sub>1 \ set as \ {k. k < length as}" + proof (rule subsetI) + fix x + assume a:"x \ M\<^sub>1" + have "fst x \ set as" + using a by (simp add:case_prod_beta count_list_gr_1 M\<^sub>1_def) + moreover have "snd x < length as" + using a count_le_length order_less_le_trans + by (simp add:case_prod_beta M\<^sub>1_def) fast + ultimately show "x \ set as \ {k. k < length as}" + by (simp add:mem_Times_iff) + qed + thus fin_space: "finite M\<^sub>1" + using finite_subset by blast + + have "(as ! 0, 0) \ M\<^sub>1" + using assms(1) unfolding M\<^sub>1_def + by (simp, metis count_list_gr_1 gr0I length_greater_0_conv not_one_le_zero nth_mem) + thus "M\<^sub>1 \ {}" by blast + + show "card M\<^sub>1 = length as" + using fin_space split_space[where f="\_. (1::nat)"] + by (simp add:sum_count_set[where X="set as" and xs="as", simplified]) +qed + +lemma + assumes "as \ []" + shows integrable_1: "integrable \\<^sub>1 (f :: _ \ real)" and + integrable_2: "integrable \\<^sub>2 (g :: _ \ real)" +proof - + have fin_omega: "finite (set_pmf (pmf_of_set M\<^sub>1))" + using fin_space[OF assms] non_empty_space[OF assms] by auto + thus "integrable \\<^sub>1 f" + unfolding \\<^sub>1_def + by (rule integrable_measure_pmf_finite) + + have "finite (set_pmf M\<^sub>2)" + unfolding M\<^sub>2_def using fin_omega + by (subst set_prod_pmf) (auto intro:finite_PiE) + + thus "integrable \\<^sub>2 g" + unfolding \\<^sub>2_def by (intro integrable_measure_pmf_finite) +qed + +lemma sketch_distr: + assumes "as \ []" + shows "pmf_of_set {.. (\k. return_pmf (sketch as k)) = pmf_of_set M\<^sub>1" +proof - + have "x < y \ y < length as \ + count_list (drop (y+1) as) (as ! y) < count_list (drop (x+1) as) (as ! y)" for x y + by (intro count_list_lt_suffix suffix_drop_drop, simp_all) + (metis Suc_diff_Suc diff_Suc_Suc diff_add_inverse lessI less_natE) + hence a1: "inj_on (sketch as) {k. k < length as}" + unfolding sketch_def by (intro inj_onI) (metis Pair_inject mem_Collect_eq nat_neq_iff) + + have "x < length as \ count_list (drop (x+1) as) (as ! x) < count_list as (as ! x)" for x + by (rule count_list_lt_suffix, auto simp add:suffix_drop) + hence "sketch as ` {k. k < length as} \ M\<^sub>1" + by (intro image_subsetI, simp add:sketch_def M\<^sub>1_def) + moreover have "card M\<^sub>1 \ card (sketch as ` {k. k < length as})" + by (simp add: card_space[OF assms(1)] card_image[OF a1]) + ultimately have "sketch as ` {k. k < length as} = M\<^sub>1" + using fin_space[OF assms(1)] by (intro card_seteq, simp_all) + hence "bij_betw (sketch as) {k. k < length as} M\<^sub>1" + using a1 by (simp add:bij_betw_def) + hence "map_pmf (sketch as) (pmf_of_set {k. k < length as}) = pmf_of_set M\<^sub>1" + using assms by (intro map_pmf_of_set_bij_betw, auto) + thus ?thesis by (simp add: sketch_def map_pmf_def lessThan_def) +qed + +lemma fk_update_distr: + "fold (\x s. s \ fk_update x) as (fk_init k \ \ n) = + prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0,0,0))) + \ (\x. return_pmf (s\<^sub>1,s\<^sub>2,k, length as, \i\{0..1}\{0..2}. snd (x i)))" +proof (induction as rule:rev_induct) + case Nil + then show ?case + by (auto simp:Let_def s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] bind_return_pmf) +next + case (snoc x xs) + + have fk_update_2_eta:"fk_update_2 x = (\a. fk_update_2 x (fst a, fst (snd a), snd (snd a)))" + by auto + + have a: "fk_update x (s\<^sub>1, s\<^sub>2, k, length xs, \i\{0..1} \ {0..2}. snd (f i)) = + prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ + (\a. return_pmf (s\<^sub>1,s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" + if b: "f \ set_pmf (prod_pmf ({0..1} \ {0..2}) + (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))))" for f + proof - + have c:"fst (f i) = length xs" if d:"i \ {0..1} \{0..2}" for i + proof (cases "xs = []") + case True + then show ?thesis using b d by (simp add: set_Pi_pmf) + next + case False + hence "{.. {}" by force + thus ?thesis using b d + by (simp add:set_Pi_pmf fk_update_2_distr[OF False] PiE_dflt_def) force + qed + show ?thesis + apply (subst fk_update_2_eta, subst fk_update_2.simps, simp) + apply (simp add: Pi_pmf_bind_return[where d'="undefined"] bind_assoc_pmf) + apply (rule bind_pmf_cong, simp add:c cong:Pi_pmf_cong) + by (auto simp add:bind_return_pmf case_prod_beta) + qed + + have "fold (\x s. s \ fk_update x) (xs @ [x]) (fk_init k \ \ n) = + prod_pmf ({0..1} \ {0..2}) (\_. fold (\x s. s \ fk_update_2 x) xs (return_pmf (0,0,0))) + \ (\\. return_pmf (s\<^sub>1,s\<^sub>2,k, length xs, \i\{0..1}\{0..2}. snd (\ i)) \ fk_update x)" + using snoc + by (simp add:restrict_def bind_assoc_pmf del:fk_init.simps) + also have "... = prod_pmf ({0..1} \ {0..2}) + (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ + (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i)) \ + (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i))))" + using a + by (intro bind_pmf_cong, simp_all add:bind_return_pmf del:fk_update.simps) + also have "... = prod_pmf ({0..1} \ {0..2}) + (\_. fold (\a s. s \ fk_update_2 a) xs (return_pmf (0, 0, 0))) \ + (\f. prod_pmf ({0..1} \ {0..2}) (\i. fk_update_2 x (f i))) \ + (\a. return_pmf (s\<^sub>1, s\<^sub>2, k, Suc (length xs), \i\{0..1} \ {0..2}. snd (a i)))" + by (simp add:bind_assoc_pmf) + also have "... = (prod_pmf ({0..1} \ {0..2}) + (\_. fold (\a s. s \ fk_update_2 a) (xs@[x]) (return_pmf (0,0,0))) + \ (\a. return_pmf (s\<^sub>1,s\<^sub>2,k, length (xs@[x]), \i\{0..1}\{0..2}. snd (a i))))" + by (simp, subst Pi_pmf_bind, auto) + + finally show ?case by blast +qed + +lemma power_diff_sum: + fixes a b :: "'a :: {comm_ring_1,power}" + assumes "k > 0" + shows "a^k - b^k = (a-b) * (\i = 0.. insert m {Suc m..i. a * (a^i * b^(k-1-i))) {0..i. b * (a^i * b^(k-1-i))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * (b^(1+(k-1-i))))) {0..i. (a^i * b^(k-i))) \ (\i. i+1)) {0..i. (a^i * b^(k-i))) {0..i. (a^i * b^(k-i))) (insert k {1..i. (a^i * b^(k-i))) (insert 0 {Suc 0.. 0" + assumes "(a :: real) \ b" + assumes "b \ 0" + shows "a^k -b^k \ (a-b) * k * a^(k-1)" +proof - + have " \i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ i * a ^ (k-1-i)" + using assms by (intro mult_left_mono power_mono) auto + also have "\i. i < k \ a ^ i * a ^ (k - 1 - i) = a ^ (k - Suc 0)" + using assms(1) by (subst power_add[symmetric], simp) + finally have a: "\i. i < k \ a ^ i * b ^ (k - 1 - i) \ a ^ (k - Suc 0)" + by blast + have "a^k - b^k = (a-b) * (\i = 0.. (a-b) * (\i = 0..Specialization of the Hoelder inquality for sums.\ +lemma Holder_inequality_sum: + assumes "p > (0::real)" "q > 0" "1/p + 1/q = 1" + assumes "finite A" + shows "\\x\A. f x * g x\ \ (\x\A. \f x\ powr p) powr (1/p) * (\x\A. \g x\ powr q) powr (1/q)" +proof - + have "\LINT x|count_space A. f x * g x\ \ + (LINT x|count_space A. \f x\ powr p) powr (1 / p) * + (LINT x|count_space A. \g x\ powr q) powr (1 / q)" + using assms integrable_count_space + by (intro Lp.Holder_inequality, auto) + thus ?thesis + using assms by (simp add: lebesgue_integral_count_space_finite[symmetric]) +qed + +lemma real_count_list_pos: + assumes "x \ set as" + shows "real (count_list as x) > 0" + using count_list_gr_1 assms by force + +lemma fk_estimate: + assumes "as \ []" + shows "length as * of_rat (F (2*k-1) as) \ n powr (1 - 1 / real k) * (of_rat (F k as))^2" + (is "?lhs \ ?rhs") +proof (cases "k \ 2") + case True + define M where "M = Max (count_list as ` set as)" + have "M \ count_list as ` set as" + unfolding M_def using assms by (intro Max_in, auto) + then obtain m where m_in: "m \ set as" and m_def: "M = count_list as m" + by blast + + have a: "real M > 0" using m_in count_list_gr_1 by (simp add:m_def, force) + have b: "2*k-1 = (k-1) + k" by simp + + have " 0 < real (count_list as m)" + using m_in count_list_gr_1 by force + hence "M powr k = real (count_list as m) ^ k" + by (simp add: powr_realpow m_def) + also have "... \ (\x\set as. real (count_list as x) ^ k)" + using m_in by (intro member_le_sum, simp_all) + also have "... \ real_of_rat (F k as)" + by (simp add:F_def of_rat_sum of_rat_power) + finally have d: "M powr k \ real_of_rat (F k as)" by simp + + have e: "0 \ real_of_rat (F k as)" + using F_gr_0[OF assms(1)] by (simp add: order_le_less) + + have "real (k - 1) / real k + 1 = real (k - 1) / real k + real k / real k" + using assms True by simp + also have "... = real (2 * k - 1) / real k" + using b by (subst add_divide_distrib[symmetric], force) + finally have f: "real (k - 1) / real k + 1 = real (2 * k - 1) / real k" + by blast + + have "real_of_rat (F (2*k-1) as) = + (\x\set as. real (count_list as x) ^ (k - 1) * real (count_list as x) ^ k)" + using b by (simp add:F_def of_rat_sum sum_distrib_left of_rat_mult power_add of_rat_power) + also have "... \ (\x\set as. real M ^ (k - 1) * real (count_list as x) ^ k)" + by (intro sum_mono mult_right_mono power_mono of_nat_mono) (auto simp:M_def) + also have "... = M powr (k-1) * of_rat (F k as)" using a + by (simp add:sum_distrib_left F_def of_rat_mult of_rat_sum of_rat_power powr_realpow) + also have "... = (M powr k) powr (real (k - 1) / real k) * of_rat (F k as) powr 1" + using e by (simp add:powr_powr) + also have "... \ (real_of_rat (F k as)) powr ((k-1)/k) * (real_of_rat (F k as) powr 1)" + using d by (intro mult_right_mono powr_mono2, auto) + also have "... = (real_of_rat (F k as)) powr ((2*k-1) / k)" + by (subst powr_add[symmetric], subst f, simp) + finally have a: "real_of_rat (F (2*k-1) as) \ (real_of_rat (F k as)) powr ((2*k-1) / k)" + by blast + + have g: "card (set as) \ n" + using card_mono[OF _ as_range] by simp + + have "length as = abs (sum (\x. real (count_list as x)) (set as))" + by (subst of_nat_sum[symmetric], simp add: sum_count_set) + also have "... \ card (set as) powr ((k-Suc 0)/k) * + (sum (\x. \real (count_list as x)\ powr k) (set as)) powr (1/k)" + using assms True + by (intro Holder_inequality_sum[where p="k/(k-1)" and q="k" and f="\_.1", simplified]) + (auto simp add:algebra_simps add_divide_distrib[symmetric]) + also have "... = (card (set as)) powr ((k-1) / real k) * of_rat (F k as) powr (1/ k)" + using real_count_list_pos + by (simp add:F_def of_rat_sum of_rat_power powr_realpow) + also have "... = (card (set as)) powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" + using k_ge_1 + by (subst of_nat_diff[OF k_ge_1], subst diff_divide_distrib, simp) + also have "... \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/ k)" + using k_ge_1 g + by (intro mult_right_mono powr_mono2, auto) + finally have h: "length as \ n powr (1 - 1 / real k) * of_rat (F k as) powr (1/real k)" + by blast + + have i:"1 / real k + real (2 * k - 1) / real k = real 2" + using True by (subst add_divide_distrib[symmetric], simp_all add:of_nat_diff) + + have "?lhs \ n powr (1 - 1/k) * of_rat (F k as) powr (1/k) * (of_rat (F k as)) powr ((2*k-1) / k)" + using a h F_ge_0 by (intro mult_mono mult_nonneg_nonneg, auto) + also have "... = ?rhs" + using i F_gr_0[OF assms] by (simp add:powr_add[symmetric] powr_realpow[symmetric]) + finally show ?thesis + by blast +next + case False + have "n = 0 \ False" + using as_range assms by auto + hence "n > 0" + by auto + moreover have "k = 1" + using assms k_ge_1 False by linarith + moreover have "length as = real_of_rat (F (Suc 0) as)" + by (simp add:F_def sum_count_set of_nat_sum[symmetric] del:of_nat_sum) + ultimately show ?thesis + by (simp add:power2_eq_square) +qed + +definition result + where "result a = of_nat (length as) * of_nat (Suc (snd a) ^ k - snd a ^ k)" + +lemma result_exp_1: + assumes "as \ []" + shows "expectation result = real_of_rat (F k as)" +proof - + have "expectation result = (\a\M\<^sub>1. result a * pmf (pmf_of_set M\<^sub>1) a)" + unfolding \\<^sub>1_def using non_empty_space assms fin_space + by (subst integral_measure_pmf_real) auto + also have "... = (\a\M\<^sub>1. result a / real (length as))" + using non_empty_space assms fin_space card_space by simp + also have "... = (\a\M\<^sub>1. real (Suc (snd a) ^ k - snd a ^ k))" + using assms by (simp add:result_def) + also have "... = (\u\set as. \v = 0..u\set as. real (count_list as u)^k)" + using k_ge_1 by (subst sum_Suc_diff') (auto simp add:zero_power) + also have "... = of_rat (F k as)" + by (simp add:F_def of_rat_sum of_rat_power) + finally show ?thesis by simp +qed + +lemma result_var_1: + assumes "as \ []" + shows "variance result \ (of_rat (F k as))\<^sup>2 * k * n powr (1 - 1 / real k)" +proof - + have k_gt_0: "k > 0" using k_ge_1 by linarith + + have c:"real (Suc v ^ k) - real (v ^ k) \ k * real (count_list as a) ^ (k - Suc 0)" + if c_1: "v < count_list as a" for a v + proof - + have "real (Suc v ^ k) - real (v ^ k) \ (real (v+1) - real v) * k * (1 + real v) ^ (k - Suc 0)" + using k_gt_0 power_diff_est[where a="Suc v" and b="v"] by simp + moreover have "(real (v+1) - real v) = 1" by auto + ultimately have "real (Suc v ^ k) - real (v ^ k) \ k * (1 + real v) ^ (k - Suc 0)" + by auto + also have "... \ k * real (count_list as a) ^ (k- Suc 0)" + using c_1 by (intro mult_left_mono power_mono, auto) + finally show ?thesis by blast + qed + + have "length as * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) = + length as * (\a\ set as. (\v \ {0.. length as * (\a\ set as. (\v \ {0..a\ set as. real (count_list as a) ^ (k-1) * + (\v \ {0..a\ set as. real (count_list as a ^ (2*k-1)))" + using assms k_ge_1 + by (subst sum_Suc_diff', auto simp: zero_power[OF k_gt_0] mult_2 power_add[symmetric]) + also have "... = k * (length as * of_rat (F (2*k-1) as))" + by (simp add:sum_distrib_left[symmetric] F_def of_rat_sum of_rat_power) + also have "... \ k * (of_rat (F k as)^2 * n powr (1 - 1 / real k))" + using fk_estimate[OF assms] by (intro mult_left_mono) (auto simp: mult.commute) + finally have b: "real (length as) * (\a\ M\<^sub>1. (real (Suc (snd a) ^ k - (snd a) ^ k))\<^sup>2) \ + k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" + by blast + + have "expectation (\\. (result \ :: real)^2) - (expectation result)^2 \ expectation (\\. result \^2)" + by simp + also have "... = (\a\M\<^sub>1. (length as * real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2 * pmf (pmf_of_set M\<^sub>1) a)" + using fin_space non_empty_space assms unfolding \\<^sub>1_def result_def + by (subst integral_measure_pmf_real[where A="M\<^sub>1"], auto) + also have "... = (\a\M\<^sub>1. length as * (real (Suc (snd a) ^ k - snd a ^ k))\<^sup>2)" + using assms non_empty_space fin_space by (subst pmf_of_set) + (simp_all add:card_space power_mult_distrib power2_eq_square ac_simps) + also have "... \ k * ((of_rat (F k as))\<^sup>2 * n powr (1 - 1 / real k))" + using b by (simp add:sum_distrib_left[symmetric]) + also have "... = of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" + by (simp add:ac_simps) + finally have "expectation (\\. result \^2) - (expectation result)^2 \ + of_rat (F k as)^2 * k * n powr (1 - 1 / real k)" + by blast + + thus ?thesis + using integrable_1[OF assms] by (simp add:variance_eq) +qed + +theorem fk_alg_sketch: + assumes "as \ []" + shows "fold (\a state. state \ fk_update a) as (fk_init k \ \ n) = + map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" (is "?lhs = ?rhs") +proof - + have "?lhs = prod_pmf ({0..1} \ {0..2}) + (\_. fold (\x s. s \ fk_update_2 x) as (return_pmf (0, 0, 0))) \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" + by (subst fk_update_distr, simp) + also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. + (\k. return_pmf (length as, sketch as k))) \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" + by (subst fk_update_2_distr[OF assms], simp) + also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. + (\k. return_pmf (sketch as k)) \ (\s. return_pmf (length as, s))) \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" + by (subst bind_assoc_pmf, subst bind_return_pmf, simp) + also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. + (\k. return_pmf (sketch as k))) \ + (\x. return_pmf (\i \ {0..1} \ {0..2}. (length as, x i))) \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, \i\{0..1} \ {0..2}. snd (x i)))" + by (subst Pi_pmf_bind_return[where d'="undefined"], simp, simp add:restrict_def) + also have "... = prod_pmf ({0..1} \ {0..2}) (\_. pmf_of_set {.. + (\k. return_pmf (sketch as k))) \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" + by (subst bind_assoc_pmf, simp add:bind_return_pmf cong:restrict_cong) + also have "... = M\<^sub>2 \ + (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, restrict x ({0..1} \ {0..2})))" + by (subst sketch_distr[OF assms], simp add:M\<^sub>2_def) + also have "... = M\<^sub>2 \ (\x. return_pmf (s\<^sub>1, s\<^sub>2, k, length as, x))" + by (rule bind_pmf_cong, auto simp add:PiE_dflt_def M\<^sub>2_def set_Pi_pmf) + also have "... = ?rhs" + by (simp add:map_pmf_def) + finally show ?thesis by simp +qed + +definition mean_rv + where "mean_rv \ i\<^sub>2 = (\i\<^sub>1 = 0..1. result (\ (i\<^sub>1, i\<^sub>2))) / of_nat s\<^sub>1" + +definition median_rv + where "median_rv \ = median s\<^sub>2 (\i\<^sub>2. mean_rv \ i\<^sub>2)" + +lemma fk_alg_correct': + defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" + shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" +proof (cases "as = []") + case True + have a: "nat \- (18 * ln (real_of_rat \))\ > 0" using \_range by simp + show ?thesis using True \_range + by (simp add:F_def M_def bind_return_pmf median_const[OF a] Let_def) +next + case False + + have "set as \ {}" using assms False by blast + hence n_nonzero: "n > 0" using as_range by fastforce + + have fk_nonzero: "F k as > 0" + using F_gr_0[OF False] by simp + + have s1_nonzero: "s\<^sub>1 > 0" + using \_range k_ge_1 n_nonzero by (simp add:s\<^sub>1_def) + have s2_nonzero: "s\<^sub>2 > 0" + using \_range by (simp add:s\<^sub>2_def) + + have real_of_rat_mean_rv: "\x i. mean_rv x = (\i. real_of_rat (mean_rv x i))" + by (rule ext, simp add:of_rat_divide of_rat_sum of_rat_mult result_def mean_rv_def) + have real_of_rat_median_rv: "\x. median_rv x = real_of_rat (median_rv x)" + unfolding median_rv_def using s2_nonzero + by (subst real_of_rat_mean_rv, simp add: median_rat median_restrict) + + + have space_\\<^sub>2: "space \\<^sub>2 = UNIV" by (simp add:\\<^sub>2_def) + + have fk_result_eta: "fk_result = (\(x,y,z,u,v). fk_result (x,y,z,u,v))" + by auto + + have a:"fold (\x state. state \ fk_update x) as (fk_init k \ \ n) = + map_pmf (\x. (s\<^sub>1,s\<^sub>2,k,length as, x)) M\<^sub>2" + by (subst fk_alg_sketch[OF False]) (simp add:s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric]) + + have "M = map_pmf (\x. (s\<^sub>1, s\<^sub>2, k, length as, x)) M\<^sub>2 \ fk_result" + by (subst M_def, subst a, simp) + also have "... = M\<^sub>2 \ return_pmf \ median_rv" + by (subst fk_result_eta) + (auto simp add:map_pmf_def bind_assoc_pmf bind_return_pmf median_rv_def mean_rv_def comp_def + M\<^sub>1_def result_def median_restrict) + finally have b: "M = M\<^sub>2 \ return_pmf \ median_rv" + by simp + + have result_exp: + "i\<^sub>1 < s\<^sub>1 \ i\<^sub>2 < s\<^sub>2 \ \\<^sub>2.expectation (\x. result (x (i\<^sub>1, i\<^sub>2))) = real_of_rat (F k as)" + for i\<^sub>1 i\<^sub>2 + unfolding \\<^sub>2_def M\<^sub>2_def + using integrable_1[OF False] result_exp_1[OF False] + by (subst expectation_Pi_pmf_slice, auto simp:\\<^sub>1_def) + + + have result_var: "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2))) \ of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" + if result_var_assms: "i\<^sub>1 < s\<^sub>1" "i\<^sub>2 < s\<^sub>2" for i\<^sub>1 i\<^sub>2 + proof - + have "3 * real k * n powr (1 - 1 / real k) = + (of_rat \)\<^sup>2 * (3 * real k * n powr (1 - 1 / real k) / (of_rat \)\<^sup>2)" + using \_range by simp + also have "... \ (real_of_rat \)\<^sup>2 * (real s\<^sub>1)" + unfolding s\<^sub>1_def + by (intro mult_mono of_nat_ceiling, simp_all) + finally have f2_var_2: "3 * real k * n powr (1 - 1 / real k) \ (of_rat \)\<^sup>2 * (real s\<^sub>1)" + by blast + + have "\\<^sub>2.variance (\\. result (\ (i\<^sub>1, i\<^sub>2)) :: real) = variance result" + using result_var_assms integrable_1[OF False] + unfolding \\<^sub>2_def M\<^sub>2_def \\<^sub>1_def + by (subst variance_prod_pmf_slice, auto) + also have "... \ of_rat (F k as)^2 * real k * n powr (1 - 1 / real k)" + using assms False result_var_1 \\<^sub>1_def by simp + also have "... = + of_rat (F k as)^2 * (real k * n powr (1 - 1 / real k))" + by (simp add:ac_simps) + also have "... \ of_rat (F k as)^2 * (of_rat \^2 * (real s\<^sub>1 / 3))" + using f2_var_2 by (intro mult_left_mono, auto) + also have "... = of_rat (F k as * \)^2 * (real s\<^sub>1 / 3)" + by (simp add: of_rat_mult power_mult_distrib) + also have "... = of_rat (\ * F k as)^2 * real s\<^sub>1 / 3" + by (simp add:ac_simps) + finally show ?thesis + by simp + qed + + have mean_rv_exp: "\\<^sub>2.expectation (\\. mean_rv \ i) = real_of_rat (F k as)" + if mean_rv_exp_assms: "i < s\<^sub>2" for i + proof - + have "\\<^sub>2.expectation (\\. mean_rv \ i) = \\<^sub>2.expectation (\\. \n = 0..1. result (\ (n, i)) / real s\<^sub>1)" + by (simp add:mean_rv_def sum_divide_distrib) + also have "... = (\n = 0..1. \\<^sub>2.expectation (\\. result (\ (n, i))) / real s\<^sub>1)" + using integrable_2[OF False] + by (subst Bochner_Integration.integral_sum, auto) + also have "... = of_rat (F k as)" + using s1_nonzero mean_rv_exp_assms + by (simp add:result_exp) + finally show ?thesis by simp + qed + + have mean_rv_var: "\\<^sub>2.variance (\\. mean_rv \ i) \ real_of_rat (\ * F k as)^2/3" + if mean_rv_var_assms: "i < s\<^sub>2" for i + proof - + have a:"\\<^sub>2.indep_vars (\_. borel) (\n x. result (x (n, i)) / real s\<^sub>1) {0..1}" + unfolding \\<^sub>2_def M\<^sub>2_def using mean_rv_var_assms + by (intro indep_vars_restrict_intro'[where f="fst"], simp, simp add:restrict_dfl_def, simp, simp) + have "\\<^sub>2.variance (\\. mean_rv \ i) = \\<^sub>2.variance (\\. \j = 0..1. result (\ (j, i)) / real s\<^sub>1)" + by (simp add:mean_rv_def sum_divide_distrib) + also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i)) / real s\<^sub>1))" + using a integrable_2[OF False] + by (subst \\<^sub>2.var_sum_all_indep, auto simp add:\\<^sub>2_def) + also have "... = (\j = 0..1. \\<^sub>2.variance (\\. result (\ (j, i))) / real s\<^sub>1^2)" + using integrable_2[OF False] + by (subst \\<^sub>2.variance_divide, auto) + also have "... \ (\j = 0..1. ((real_of_rat (\ * F k as))\<^sup>2 * real s\<^sub>1 / 3) / (real s\<^sub>1^2))" + using result_var[OF _ mean_rv_var_assms] + by (intro sum_mono divide_right_mono, auto) + also have "... = real_of_rat (\ * F k as)^2/3" + using s1_nonzero + by (simp add:algebra_simps power2_eq_square) + finally show ?thesis by simp + qed + + have "\\<^sub>2.prob {y. of_rat (\ * F k as) < \mean_rv y i - real_of_rat (F k as)\} \ 1/3" + (is "?lhs \ _") if c_assms: "i < s\<^sub>2" for i + proof - + define a where "a = real_of_rat (\ * F k as)" + have c: "0 < a" unfolding a_def + using assms \_range fk_nonzero + by (metis zero_less_of_rat_iff mult_pos_pos) + have "?lhs \ \\<^sub>2.prob {y \ space \\<^sub>2. a \ \mean_rv y i - \\<^sub>2.expectation (\\. mean_rv \ i)\}" + by (intro \\<^sub>2.pmf_mono[OF \\<^sub>2_def], simp add:a_def mean_rv_exp[OF c_assms] space_\\<^sub>2) + also have "... \ \\<^sub>2.variance (\\. mean_rv \ i)/a^2" + by (intro \\<^sub>2.Chebyshev_inequality integrable_2 c False) (simp add:\\<^sub>2_def) + also have "... \ 1/3" using c + using mean_rv_var[OF c_assms] + by (simp add:algebra_simps, simp add:a_def) + finally show ?thesis + by blast + qed + + moreover have "\\<^sub>2.indep_vars (\_. borel) (\i \. mean_rv \ i) {0..2}" + using s1_nonzero unfolding \\<^sub>2_def M\<^sub>2_def + by (intro indep_vars_restrict_intro'[where f="snd"] finite_cartesian_product) + (simp_all add:mean_rv_def restrict_dfl_def space_\\<^sub>2) + moreover have " - (18 * ln (real_of_rat \)) \ real s\<^sub>2" + by (simp add:s\<^sub>2_def, linarith) + ultimately have "1 - of_rat \ \ + \\<^sub>2.prob {y \ space \\<^sub>2. \median s\<^sub>2 (mean_rv y) - real_of_rat (F k as)\ \ of_rat (\ * F k as)}" + using \_range + by (intro \\<^sub>2.median_bound_2, simp_all add:space_\\<^sub>2) + also have "... = \\<^sub>2.prob {y. \median_rv y - real_of_rat (F k as)\ \ real_of_rat (\ * F k as)}" + by (simp add:median_rv_def space_\\<^sub>2) + also have "... = \\<^sub>2.prob {y. \median_rv y - F k as\ \ \ * F k as}" + by (simp add:real_of_rat_median_rv of_rat_less_eq flip: of_rat_diff) + also have "... = \

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as)" + by (simp add: b comp_def map_pmf_def[symmetric] \\<^sub>2_def) + finally show ?thesis by simp +qed + +lemma fk_exact_space_usage': + defines "M \ fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" + shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" + (is "AE \ in M. (_ \ ?rhs)") +proof - + define H where "H = (if as = [] then return_pmf (\i\ {0..1}\{0..2}. (0,0)) else M\<^sub>2)" + + have a:"M = map_pmf (\x.(s\<^sub>1,s\<^sub>2,k,length as, x)) H" + proof (cases "as \ []") + case True + then show ?thesis + unfolding M_def fk_alg_sketch[OF True] H_def + by (simp add:M\<^sub>2_def) + next + case False + then show ?thesis + by (simp add:H_def M_def s\<^sub>1_def[symmetric] Let_def s\<^sub>2_def[symmetric] map_pmf_def bind_return_pmf) + qed + + have "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" + if b:"y \ set_pmf H" for y + proof - + have b0:" as \ [] \ y \ {0..1} \ {0..2} \\<^sub>E M\<^sub>1" + using b non_empty_space fin_space by (simp add:H_def M\<^sub>2_def set_prod_pmf) + + have "bit_count ((N\<^sub>e \\<^sub>e N\<^sub>e) (y x)) \ + ereal (2 * log 2 (real n + 1) + 1) + ereal (2 * log 2 (real (length as) + 1) + 1)" + (is "_ \ ?rhs1") + if b1_assms: "x \ {0..1}\{0..2}" for x + proof - + have "fst (y x) \ n" + proof (cases "as = []") + case True + then show ?thesis using b b1_assms by (simp add:H_def) + next + case False + hence "1 \ count_list as (fst (y x))" + using b0 b1_assms by (simp add:PiE_iff case_prod_beta M\<^sub>1_def, fastforce) + hence "fst (y x) \ set as" + using count_list_gr_1 by metis + then show ?thesis + by (meson lessThan_iff less_imp_le_nat subsetD as_range) + qed + moreover have "snd (y x) \ length as" + proof (cases "as = []") + case True + then show ?thesis using b b1_assms by (simp add:H_def) + next + case False + hence "(y x) \ M\<^sub>1" + using b0 b1_assms by auto + hence "snd (y x) \ count_list as (fst (y x))" + by (simp add:M\<^sub>1_def case_prod_beta) + then show ?thesis using count_le_length by (metis order_trans) + qed + ultimately have "bit_count (N\<^sub>e (fst (y x))) + bit_count (N\<^sub>e (snd (y x))) \ ?rhs1" + using exp_golomb_bit_count_est by (intro add_mono, auto) + thus ?thesis + by (subst dependent_bit_count_2, simp) + qed + + moreover have "y \ extensional ({0..1} \ {0..2})" + using b0 b PiE_iff by (cases "as = []", auto simp:H_def PiE_iff) + + ultimately have "bit_count ((List.product [0..1] [0..2] \\<^sub>e N\<^sub>e \\<^sub>e N\<^sub>e) y) \ + ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n + 1) + 1) + + ereal (2 * log 2 (real (length as) + 1) + 1))" + by (intro fun_bit_count_est[where xs="(List.product [0..1] [0..2])", simplified], auto) + hence "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ + ereal (2 * log 2 (real s\<^sub>1 + 1) + 1) + + (ereal (2 * log 2 (real s\<^sub>2 + 1) + 1) + + (ereal (2 * log 2 (real k + 1) + 1) + + (ereal (2 * log 2 (real (length as) + 1) + 1) + + (ereal (real s\<^sub>1 * real s\<^sub>2) * (ereal (2 * log 2 (real n+1) + 1) + + ereal (2 * log 2 (real (length as)+1) + 1))))))" + unfolding encode_fk_state_def dependent_bit_count + by (intro add_mono exp_golomb_bit_count, auto) + also have "... \ ?rhs" + by (simp add: s\<^sub>1_def[symmetric] s\<^sub>2_def[symmetric] Let_def) (simp add:ac_simps) + finally show "bit_count (encode_fk_state (s\<^sub>1, s\<^sub>2, k, length as, y)) \ ?rhs" + by blast + qed + thus ?thesis + by (simp add: a AE_measure_pmf_iff del:fk_space_usage.simps) +qed + +end + +text \Main results of this section:\ + +theorem fk_alg_correct: + assumes "k \ 1" + assumes "\ \ {0<..<1}" + assumes "\ > 0" + assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n) \ fk_result" + shows "\

(\ in measure_pmf M. \\ - F k as\ \ \ * F k as) \ 1 - of_rat \" + unfolding M_def using fk_alg_correct'[OF assms(1-4)] by blast + +theorem fk_exact_space_usage: + assumes "k \ 1" + assumes "\ \ {0<..<1}" + assumes "\ > 0" + assumes "set as \ {.. fold (\a state. state \ fk_update a) as (fk_init k \ \ n)" + shows "AE \ in M. bit_count (encode_fk_state \) \ fk_space_usage (k, n, length as, \, \)" + unfolding M_def using fk_exact_space_usage'[OF assms(1-4)] by blast + +theorem fk_asympotic_space_complexity: + "fk_space_usage \ + O[at_top \\<^sub>F at_top \\<^sub>F at_top \\<^sub>F at_right (0::rat) \\<^sub>F at_right (0::rat)](\ (k, n, m, \, \). + real k * real n powr (1-1/ real k) / (of_rat \)\<^sup>2 * (ln (1 / of_rat \)) * (ln (real n) + ln (real m)))" + (is "_ \ O[?F](?rhs)") +proof - + define k_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "k_of = (\(k, n, m, \, \). k)" + define n_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "n_of = (\(k, n, m, \, \). n)" + define m_of :: "nat \ nat \ nat \ rat \ rat \ nat" where "m_of = (\(k, n, m, \, \). m)" + define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" + define \_of :: "nat \ nat \ nat \ rat \ rat \ rat" where "\_of = (\(k, n, m, \, \). \)" + + define g1 where + "g1 = (\x. real (k_of x)*(real (n_of x)) powr (1-1/ real (k_of x)) * (1 / of_rat (\_of x)^2))" + + define g where + "g = (\x. g1 x * (ln (1 / of_rat (\_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))" + + define s1_of where "s1_of = (\x. + nat \3 * real (k_of x) * real (n_of x) powr (1 - 1 / real (k_of x)) / (real_of_rat (\_of x))\<^sup>2\)" + define s2_of where "s2_of = (\x. nat \- (18 * ln (real_of_rat (\_of x)))\)" + + have evt: "(\x. + 0 < real_of_rat (\_of x) \ 0 < real_of_rat (\_of x) \ + 1/real_of_rat (\_of x) \ \ \ 1/real_of_rat (\_of x) \ \ \ + real (n_of x) \ n \ real (k_of x) \ k \ real (m_of x) \ m\ P x) + \ eventually P ?F" (is "(\x. ?prem x \ _) \ _") + for \ \ n k m P + apply (rule eventually_mono[where P="?prem" and Q="P"]) + apply (simp add:\_of_def case_prod_beta' \_of_def n_of_def k_of_def m_of_def) + apply (intro eventually_conj eventually_prod1' eventually_prod2' + sequentially_inf eventually_at_right_less inv_at_right_0_inf) + by (auto simp add:prod_filter_eq_bot) + + have 1: + "(\_. 1) \ O[?F](\x. real (n_of x))" + "(\_. 1) \ O[?F](\x. real (m_of x))" + "(\_. 1) \ O[?F](\x. real (k_of x))" + by (intro landau_o.big_mono eventually_mono[OF evt], auto)+ + + + have "(\x. ln (real (m_of x) + 1)) \ O[?F](\x. ln (real (m_of x)))" + by (intro landau_ln_2[where a="2"] evt[where m="2"] sum_in_bigo 1, auto) + hence 2: " (\x. log 2 (real (m_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_2 eventually_mono[OF evt[where n="1" and m="1"]]) + (auto simp add:log_def) + + have 3: "(\_. 1) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + using order_less_le_trans[OF exp_gt_zero] ln_ge_iff + by (intro landau_o.big_mono evt[where \="exp 1"]) + (simp add: abs_ge_iff, blast) + + have 4: "(\_. 1) \ O[?F](\x. 1 / (real_of_rat (\_of x))\<^sup>2)" + using one_le_power + by (intro landau_o.big_mono evt[where \="1"]) + (simp add:power_one_over[symmetric], blast) + + have "(\x. 1) \ O[?F](\x. ln (real (n_of x)))" + using order_less_le_trans[OF exp_gt_zero] ln_ge_iff + by (intro landau_o.big_mono evt[where n="exp 1"]) + (simp add: abs_ge_iff, blast) + + hence 5: "(\x. 1) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_1 evt[where n="1" and m="1"], auto) + + have "(\x. -ln(of_rat (\_of x))) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + by (intro landau_o.big_mono evt) (auto simp add:ln_div) + hence 6: "(\x. real (s2_of x)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + unfolding s2_of_def + by (intro landau_nat_ceil 3, simp) + + have 7: "(\_. 1) \ O[?F](\x. real (n_of x) powr (1 - 1 / real (k_of x)))" + by (intro landau_o.big_mono evt[where n="1" and k="1"]) + (auto simp add: ge_one_powr_ge_zero) + + have 8: "(\_. 1) \ O[?F](g1)" + unfolding g1_def by (intro landau_o.big_mult_1 1 7 4) + + have "(\x. 3 * (real (k_of x) * (n_of x) powr (1 - 1 / real (k_of x)) / (of_rat (\_of x))\<^sup>2)) + \ O[?F](g1)" + by (subst landau_o.big.cmult_in_iff, simp, simp add:g1_def) + hence 9: "(\x. real (s1_of x)) \ O[?F](g1)" + unfolding s1_of_def by (intro landau_nat_ceil 8, auto simp:ac_simps) + + have 10: "(\_. 1) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 8 3 5) + + have "(\x. real (s1_of x)) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 5 3 9) + hence "(\x. ln (real (s1_of x) + 1)) \ O[?F](g)" + using 10 by (intro landau_ln_3 sum_in_bigo, auto) + hence 11: "(\x. log 2 (real (s1_of x) + 1)) \ O[?F](g)" + by (simp add:log_def) + + have 12: " (\x. ln (real (s2_of x) + 1)) \ O[?F](\x. ln (1 / real_of_rat (\_of x)))" + using evt[where \="2"] 6 3 + by (intro landau_ln_3 sum_in_bigo, auto) + + have 13: "(\x. log 2 (real (s2_of x) + 1)) \ O[?F](g)" + unfolding g_def + by (rule landau_o.big_mult_1, rule landau_o.big_mult_1', auto simp add: 8 5 12 log_def) + + have "(\x. real (k_of x)) \ O[?F](g1)" + unfolding g1_def using 7 4 + by (intro landau_o.big_mult_1, simp_all) + hence "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g1)" + by (simp add:log_def) (intro landau_ln_3 sum_in_bigo 8, auto) + hence 14: "(\x. log 2 (real (k_of x) + 1)) \ O[?F](g)" + unfolding g_def by (intro landau_o.big_mult_1 3 5) + + have 15: "(\x. log 2 (real (m_of x) + 1)) \ O[?F](g)" + unfolding g_def using 2 8 3 + by (intro landau_o.big_mult_1', simp_all) + + have "(\x. ln (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)))" + by (intro landau_ln_2[where a="2"] eventually_mono[OF evt[where n="2"]] sum_in_bigo 1, auto) + hence " (\x. log 2 (real (n_of x) + 1)) \ O[?F](\x. ln (real (n_of x)) + ln (real (m_of x)))" + by (intro landau_sum_1 evt[where n="1" and m="1"]) + (auto simp add:log_def) + hence 16: "(\x. real (s1_of x) * real (s2_of x) * + (2 + 2 * log 2 (real (n_of x) + 1) + 2 * log 2 (real (m_of x) + 1))) \ O[?F](g)" + unfolding g_def using 9 6 5 2 + by (intro landau_o.mult sum_in_bigo, auto) + + have "fk_space_usage = (\x. fk_space_usage (k_of x, n_of x, m_of x, \_of x, \_of x))" + by (simp add:case_prod_beta' k_of_def n_of_def \_of_def \_of_def m_of_def) + also have "... \ O[?F](g)" + using 10 11 13 14 15 16 + by (simp add:fun_cong[OF s1_of_def[symmetric]] fun_cong[OF s2_of_def[symmetric]] Let_def) + (intro sum_in_bigo, auto) + also have "... = O[?F](?rhs)" + by (simp add:case_prod_beta' g1_def g_def n_of_def \_of_def \_of_def m_of_def k_of_def) + finally show ?thesis by simp +qed + +end diff --git a/thys/Frequency_Moments/Frequency_Moments.thy b/thys/Frequency_Moments/Frequency_Moments.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Frequency_Moments.thy @@ -0,0 +1,115 @@ +section "Frequency Moments" + +theory Frequency_Moments + imports + Frequency_Moments_Preliminary_Results + Universal_Hash_Families.Field + Interpolation_Polynomials_HOL_Algebra.Interpolation_Polynomial_Cardinalities +begin + +text \This section contains a definition of the frequency moments of a stream and a few general results about +frequency moments..\ + +definition F where + "F k xs = (\ x \ set xs. (rat_of_nat (count_list xs x)^k))" + +lemma F_ge_0: "F k as \ 0" + unfolding F_def by (rule sum_nonneg, simp) + +lemma F_gr_0: + assumes "as \ []" + shows "F k as > 0" +proof - + have "rat_of_nat 1 \ rat_of_nat (card (set as))" + using assms card_0_eq[where A="set as"] + by (intro of_nat_mono) + (metis List.finite_set One_nat_def Suc_leI neq0_conv set_empty) + also have "... = (\x\set as. 1)" by simp + also have "... \ (\x\set as. rat_of_nat (count_list as x) ^ k)" + by (intro sum_mono one_le_power) + (metis count_list_gr_1 of_nat_1 of_nat_le_iff) + also have "... \ F k as" + by (simp add:F_def) + finally show ?thesis by simp +qed + +definition P\<^sub>e :: "nat \ nat \ nat list \ bool list option" where + "P\<^sub>e p n f = (if p > 1 \ f \ bounded_degree_polynomials (Field.mod_ring p) n then + ([0..\<^sub>e Nb\<^sub>e p) (\i \ {..e p n)" +proof (cases "p > 1") + case True + interpret cring "Field.mod_ring p" + using mod_ring_is_cring True by blast + have a:"inj_on (\x. (\i \ {.. bounded_degree_polynomials (mod_ring p) n" + assume c:"y \ bounded_degree_polynomials (mod_ring p) n" + assume d:"restrict (coeff x) {.. n" by linarith + have "coeff x i = \\<^bsub>mod_ring p\<^esub>" + using b e by (subst coeff_length, auto simp:bounded_degree_polynomials_length) + also have "... = coeff y i" + using c e by (subst coeff_length, auto simp:bounded_degree_polynomials_length) + finally show ?thesis by simp + qed + then show "x = y" + using b c univ_poly_carrier + by (subst coeff_iff_polynomial_cond) (auto simp:bounded_degree_polynomials_length) + qed + + have "is_encoding (\f. P\<^sub>e p n f)" + unfolding P\<^sub>e_def using a True + by (intro encoding_compose[where f="([0..\<^sub>e Nb\<^sub>e p)"] fun_encoding bounded_nat_encoding) + auto + thus ?thesis by simp +next + case False + hence "is_encoding (\f. P\<^sub>e p n f)" + unfolding P\<^sub>e_def using encoding_triv by simp + then show ?thesis by simp +qed + +lemma bounded_degree_polynomial_bit_count: + assumes "p > 1" + assumes "x \ bounded_degree_polynomials (Field.mod_ring p) n" + shows "bit_count (P\<^sub>e p n x) \ ereal (real n * (log 2 p + 1))" +proof - + interpret cring "Field.mod_ring p" + using mod_ring_is_cring assms by blast + + have a: "x \ carrier (poly_ring (mod_ring p))" + using assms(2) by (simp add:bounded_degree_polynomials_def) + + have "real_of_int \log 2 (p-1)\+1 \ log 2 (p-1) + 1" + using floor_eq_iff by (intro add_mono, auto) + also have "... \ log 2 p + 1" + using assms by (intro add_mono, auto) + finally have b: "\log 2 (p-1)\+1 \ log 2 p + 1" + by simp + + have "bit_count (P\<^sub>e p n x) = (\ k \ [0..e p (coeff x k)))" + using assms restrict_extensional + by (auto intro!:arg_cong[where f="sum_list"] simp add:P\<^sub>e_def fun_bit_count lessThan_atLeast0) + also have "... = (\ k \ [0..log 2 (p-1)\+1)" + using assms(1) by (simp add:floorlog_def) + also have "... \ ereal (real n * (log 2 p + 1))" + by (subst ereal_less_eq, intro mult_left_mono b, auto) + finally show ?thesis by simp +qed + +end \ No newline at end of file diff --git a/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy b/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Frequency_Moments_Preliminary_Results.thy @@ -0,0 +1,500 @@ +section \Preliminary Results\ + +theory Frequency_Moments_Preliminary_Results + imports + "HOL.Transcendental" + "HOL-Computational_Algebra.Primes" + "HOL-Library.Extended_Real" + "HOL-Library.Multiset" + "HOL-Library.Sublist" + Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators + Bertrands_Postulate.Bertrand +begin + +text \This section contains various preliminary results.\ + +lemma card_ordered_pairs: + fixes M :: "('a ::linorder) set" + assumes "finite M" + shows "2 * card {(x,y) \ M \ M. x < y} = card M * (card M - 1)" +proof - + have a: "finite (M \ M)" using assms by simp + + have inj_swap: "inj (\x. (snd x, fst x))" + by (rule inj_onI, simp add: prod_eq_iff) + + have "2 * card {(x,y) \ M \ M. x < y} = + card {(x,y) \ M \ M. x < y} + card ((\x. (snd x, fst x))`{(x,y) \ M \ M. x < y})" + by (simp add: card_image[OF inj_on_subset[OF inj_swap]]) + also have "... = card {(x,y) \ M \ M. x < y} + card {(x,y) \ M \ M. y < x}" + by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff) + also have "... = card ({(x,y) \ M \ M. x < y} \ {(x,y) \ M \ M. y < x})" + by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M \ M"] subsetI) auto + also have "... = card ((M \ M) - {(x,y) \ M \ M. x = y})" + by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff) + also have "... = card (M \ M) - card {(x,y) \ M \ M. x = y}" + by (intro card_Diff_subset a finite_subset[where B="M \ M"] subsetI) auto + also have "... = card M ^ 2 - card ((\x. (x,x)) ` M)" + using assms + by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"]) + (auto simp:power2_eq_square set_eq_iff image_iff) + also have "... = card M ^ 2 - card M" + by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto) + also have "... = card M * (card M - 1)" + by (cases "card M \ 0", auto simp:power2_eq_square algebra_simps) + finally show ?thesis by simp +qed + +lemma ereal_mono: "x \ y \ ereal x \ ereal y" + by simp + +lemma log_mono: "a > 1 \ x \ y \ 0 < x \ log a x \ log a y" + by (subst log_le_cancel_iff, auto) + +lemma abs_ge_iff: "((x::real) \ abs y) = (x \ y \ x \ -y)" + by linarith + +lemma count_list_gr_1: + "(x \ set xs) = (count_list xs x \ 1)" + by (induction xs, simp, simp) + +lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v" + by (induction xs, simp, simp) + +lemma count_list_lt_suffix: + assumes "suffix a b" + assumes "x \ {b ! i| i. i < length b - length a}" + shows "count_list a x < count_list b x" +proof - + have "length a \ length b" using assms(1) + by (simp add: suffix_length_le) + hence "x \ set (nths b {i. i < length b - length a})" + using assms diff_commute by (auto simp add:set_nths) + hence a:"x \ set (take (length b - length a) b)" + by (subst (asm) lessThan_def[symmetric], simp) + have "b = (take (length b - length a) b)@drop (length b - length a) b" + by simp + also have "... = (take (length b - length a) b)@a" + using assms(1) suffix_take by auto + finally have b:"b = (take (length b - length a) b)@a" by simp + + have "count_list a x < 1 + count_list a x" by simp + also have "... \ count_list (take (length b - length a) b) x + count_list a x" + using a count_list_gr_1 + by (intro add_mono, fast, simp) + also have "... = count_list b x" + using b count_list_append by metis + finally show ?thesis by simp +qed + +lemma suffix_drop_drop: + assumes "x \ y" + shows "suffix (drop x a) (drop y a)" +proof - + have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)" + by (subst append_take_drop_id, simp) + also have " ... = take (x-y) (drop y a)@drop x a" + using assms by simp + finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp + thus ?thesis + by (auto simp add:suffix_def) +qed + +lemma count_list_card: "count_list xs x = card {k. k < length xs \ xs ! k = x}" +proof - + have "count_list xs x = length (filter ((=) x) xs)" + by (induction xs, simp, simp) + also have "... = card {k. k < length xs \ xs ! k = x}" + by (subst length_filter_conv_card, metis) + finally show ?thesis by simp +qed + +lemma card_gr_1_iff: + assumes "finite S" "x \ S" "y \ S" "x \ y" + shows "card S > 1" + using assms card_le_Suc0_iff_eq leI by auto + +lemma count_list_ge_2_iff: + assumes "y < z" + assumes "z < length xs" + assumes "xs ! y = xs ! z" + shows "count_list xs (xs ! y) > 1" +proof - + have " 1 < card {k. k < length xs \ xs ! k = xs ! y}" + using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto) + + thus ?thesis + by (simp add: count_list_card) +qed + +text \Results about multisets and sorting\ + +text \This is a induction scheme over the distinct elements of a multisets: +We can represent each multiset as a sum like: +@{text "replicate_mset n\<^sub>1 x\<^sub>1 + replicate_mset n\<^sub>2 x\<^sub>2 + ... + replicate_mset n\<^sub>k x\<^sub>k"} where the +@{term "x\<^sub>i"} are distinct.\ + +lemma disj_induct_mset: + assumes "P {#}" + assumes "\n M x. P M \ \(x \# M) \ n > 0 \ P (M + replicate_mset n x)" + shows "P M" +proof (induction "size M" arbitrary: M rule:nat_less_induct) + case 1 + show ?case + proof (cases "M = {#}") + case True + then show ?thesis using assms by simp + next + case False + then obtain x where x_def: "x \# M" using multiset_nonemptyE by auto + define M1 where "M1 = M - replicate_mset (count M x) x" + then have M_def: "M = M1 + replicate_mset (count M x) x" + by (metis count_le_replicate_mset_subset_eq dual_order.refl subset_mset.diff_add) + have "size M1 < size M" + by (metis M_def x_def count_greater_zero_iff less_add_same_cancel1 size_replicate_mset size_union) + hence "P M1" using 1 by blast + then show "P M" + apply (subst M_def, rule assms(2), simp) + by (simp add:M1_def x_def count_eq_zero_iff[symmetric])+ + qed +qed + +lemma prod_mset_conv: + fixes f :: "'a \ 'b::{comm_monoid_mult}" + shows "prod_mset (image_mset f A) = prod (\x. f x^(count A x)) (set_mset A)" +proof (induction A rule: disj_induct_mset) + case 1 + then show ?case by simp +next + case (2 n M x) + moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff) + moreover have "\y. y \ set_mset M \ y \ x" using 2 by blast + ultimately show ?case by (simp add:algebra_simps) +qed + +lemma sum_collapse: + fixes f :: "'a \ 'b::{comm_monoid_add}" + assumes "finite A" + assumes "z \ A" + assumes "\y. y \ A \ y \ z \ f y = 0" + shows "sum f A = f z" + using sum.union_disjoint[where A="A-{z}" and B="{z}" and g="f"] + by (simp add: assms sum.insert_if) + +text \There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work +if the function maps into the reals.\ + +lemma sum_list_eval: + fixes f :: "'a \ 'b::{ring,semiring_1}" + shows "sum_list (map f xs) = (\x \ set xs. of_nat (count_list xs x) * f x)" +proof - + define M where "M = mset xs" + have "sum_mset (image_mset f M) = (\x \ set_mset M. of_nat (count M x) * f x)" + proof (induction "M" rule:disj_induct_mset) + case 1 + then show ?case by simp + next + case (2 n M x) + have a:"\y. y \ set_mset M \ y \ x" using 2(2) by blast + show ?case using 2 by (simp add:a count_eq_zero_iff[symmetric]) + qed + moreover have "\x. count_list xs x = count (mset xs) x" + by (induction xs, simp, simp) + ultimately show ?thesis + by (simp add:M_def sum_mset_sum_list[symmetric]) +qed + +lemma prod_list_eval: + fixes f :: "'a \ 'b::{ring,semiring_1,comm_monoid_mult}" + shows "prod_list (map f xs) = (\x \ set xs. (f x)^(count_list xs x))" +proof - + define M where "M = mset xs" + have "prod_mset (image_mset f M) = (\x \ set_mset M. f x ^ (count M x))" + proof (induction "M" rule:disj_induct_mset) + case 1 + then show ?case by simp + next + case (2 n M x) + have a:"\y. y \ set_mset M \ y \ x" using 2(2) by blast + have b:"count M x = 0" using 2 by (subst count_eq_zero_iff) blast + show ?case using 2 by (simp add:a b mult.commute) + qed + moreover have "\x. count_list xs x = count (mset xs) x" + by (induction xs, simp, simp) + ultimately show ?thesis + by (simp add:M_def prod_mset_prod_list[symmetric]) +qed + +lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)" + by (induction M, auto simp:sorted_insort) + +lemma count_mset: "count (mset xs) a = count_list xs a" + by (induction xs, auto) + +lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g \ f) A)" + by (induction A, auto) + +lemma list_eq_iff: + assumes "mset xs = mset ys" + assumes "sorted xs" + assumes "sorted ys" + shows "xs = ys" + using assms properties_for_sort by blast + +lemma sorted_list_of_multiset_image_commute: + assumes "mono f" + shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)" +proof - + have "sorted (sorted_list_of_multiset (image_mset f M))" + by (simp add:sorted_sorted_list_of_multiset) + moreover have " sorted_wrt (\x y. f x \ f y) (sorted_list_of_multiset M)" + by (rule sorted_wrt_mono_rel[where P="\x y. x \ y"]) + (auto intro: monoD[OF assms] sorted_sorted_list_of_multiset) + hence "sorted (map f (sorted_list_of_multiset M))" + by (subst sorted_wrt_map) + ultimately show ?thesis + by (intro list_eq_iff, auto) +qed + +text \Results about rounding and floating point numbers\ + +lemma round_down_ge: + "x \ round_down prec x + 2 powr (-prec)" + using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq) + +lemma truncate_down_ge: + "x \ truncate_down prec x + abs x * 2 powr (-prec)" +proof (cases "abs x > 0") + case True + have "x \ round_down (int prec - \log 2 \x\\) x + 2 powr (-real_of_int(int prec - \log 2 \x\\))" + by (rule round_down_ge) + also have "... \ truncate_down prec x + 2 powr ( \log 2 \x\\) * 2 powr (-real prec)" + by (rule add_mono, simp_all add:powr_add[symmetric] truncate_down_def) + also have "... \ truncate_down prec x + \x\ * 2 powr (-real prec)" + using True + by (intro add_mono mult_right_mono, simp_all add:le_log_iff[symmetric]) + finally show ?thesis by simp +next + case False + then show ?thesis by simp +qed + +lemma truncate_down_pos: + assumes "x \ 0" + shows "x * (1 - 2 powr (-prec)) \ truncate_down prec x" + by (simp add:right_diff_distrib diff_le_eq) + (metis truncate_down_ge assms abs_of_nonneg) + +lemma truncate_down_eq: + assumes "truncate_down r x = truncate_down r y" + shows "abs (x-y) \ max (abs x) (abs y) * 2 powr (-real r)" +proof - + have "x - y \ truncate_down r x + abs x * 2 powr (-real r) - y" + by (rule diff_right_mono, rule truncate_down_ge) + also have "... \ y + abs x * 2 powr (-real r) - y" + using truncate_down_le + by (intro diff_right_mono add_mono, subst assms(1), simp_all) + also have "... \ abs x * 2 powr (-real r)" by simp + also have "... \ max (abs x) (abs y) * 2 powr (-real r)" by simp + finally have a:"x - y \ max (abs x) (abs y) * 2 powr (-real r)" by simp + + have "y - x \ truncate_down r y + abs y * 2 powr (-real r) - x" + by (rule diff_right_mono, rule truncate_down_ge) + also have "... \ x + abs y * 2 powr (-real r) - x" + using truncate_down_le + by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto) + also have "... \ abs y * 2 powr (-real r)" by simp + also have "... \ max (abs x) (abs y) * 2 powr (-real r)" by simp + finally have b:"y - x \ max (abs x) (abs y) * 2 powr (-real r)" by simp + + show ?thesis + using abs_le_iff a b by linarith +qed + +definition rat_of_float :: "float \ rat" where + "rat_of_float f = of_int (mantissa f) * + (if exponent f \ 0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))" + +lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x" +proof - + have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))" + by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide) + also have "... = real_of_float x" + using mantissa_exponent by simp + finally show ?thesis by simp +qed + +lemma log_est: "log 2 (real n + 1) \ n" +proof - + have "1 + real n = real (n + 1)" + by simp + also have "... \ real (2 ^ n)" + by (intro of_nat_mono suc_n_le_2_pow_n) + also have "... = 2 powr (real n)" + by (simp add:powr_realpow) + finally have "1 + real n \ 2 powr (real n)" + by simp + thus ?thesis + by (simp add: Transcendental.log_le_iff) +qed + +lemma truncate_mantissa_bound: + "abs (\x * 2 powr (real r - real_of_int \log 2 \x\\)\) \ 2 ^ (r+1)" (is "?lhs \ _") +proof - + define q where "q = \x * 2 powr (real r - real_of_int (\log 2 \x\\))\" + + have "abs q \ 2 ^ (r + 1)" if a:"x > 0" + proof - + have "abs q = q" + using a by (intro abs_of_nonneg, simp add:q_def) + also have "... \ x * 2 powr (real r - real_of_int \log 2 \x\\)" + unfolding q_def using of_int_floor_le by blast + also have "... = x * 2 powr real_of_int (int r - \log 2 \x\\)" + by auto + also have "... = 2 powr (log 2 x + real_of_int (int r - \log 2 \x\\))" + using a by (simp add:powr_add) + also have "... \ 2 powr (real r + 1)" + using a by (intro powr_mono, linarith+) + also have "... = 2 ^ (r+1)" + by (subst powr_realpow[symmetric], simp_all add:add.commute) + finally show "abs q \ 2 ^ (r+1)" + by (metis of_int_le_iff of_int_numeral of_int_power) + qed + + moreover have "abs q \ (2 ^ (r + 1))" if a: "x < 0" + proof - + have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)" + by (subst powr_realpow[symmetric], simp_all add: add.commute) + also have "... < -(2 powr (log 2 (- x) + (r - \log 2 \x\\)) + 1)" + using a by (simp, linarith) + also have "... = x * 2 powr (r - \log 2 \x\\) - 1" + using a by (simp add:powr_add) + also have "... \ q" + by (simp add:q_def) + also have "... = - abs q" + using a + by (subst abs_of_neg, simp_all add: mult_pos_neg2 q_def) + finally have "-(2 ^ (r+1)+1) < - abs q" using of_int_less_iff by fastforce + hence "-(2 ^ (r+1)) \ - abs q" by linarith + thus "abs q \ 2^(r+1)" by linarith + qed + + moreover have "x = 0 \ abs q \ 2^(r+1)" + by (simp add:q_def) + ultimately have "abs q \ 2^(r+1)" + by fastforce + thus ?thesis using q_def by blast +qed + +lemma truncate_float_bit_count: + "bit_count (F\<^sub>e (float_of (truncate_down r x))) \ 10 + 4 * real r + 2*log 2 (2 + \log 2 \x\\)" + (is "?lhs \ ?rhs") +proof - + define m where "m = \x * 2 powr (real r - real_of_int \log 2 \x\\)\" + define e where "e = \log 2 \x\\ - int r" + + have a: "(real_of_int \log 2 \x\\ - real r) = e" + by (simp add:e_def) + have "abs m + 2 \ 2 ^ (r + 1) + 2^1" + using truncate_mantissa_bound + by (intro add_mono, simp_all add:m_def) + also have "... \ 2 ^ (r+2)" + by simp + finally have b:"abs m + 2 \ 2 ^ (r+2)" by simp + hence "real_of_int (\m\ + 2) \ real_of_int (4 * 2 ^ r)" + by (subst of_int_le_iff, simp) + hence "\real_of_int m\ + 2 \ 4 * 2 ^ r" + by simp + hence c:"log 2 (real_of_int (\m\ + 2)) \ r+2" + by (simp add: Transcendental.log_le_iff powr_add powr_realpow) + + have "real_of_int (abs e + 1) \ real_of_int \\log 2 \x\\\ + real_of_int r + 1" + by (simp add:e_def) + also have "... \ 1 + abs (log 2 (abs x)) + real_of_int r + 1" + by (simp add:abs_le_iff, linarith) + also have "... \ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" + by (simp add:distrib_left distrib_right) + finally have d:"real_of_int (abs e + 1) \ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp + + have "log 2 (real_of_int (abs e + 1)) \ log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))" + using d by (simp add: log_mult[symmetric]) + also have "... \ r + log 2 (2 + abs (log 2 (abs x)))" + using log_est by (intro add_mono, simp_all add:add.commute) + finally have e: "log 2 (real_of_int (abs e + 1)) \ r + log 2 (2 + abs (log 2 (abs x)))" by simp + + have "?lhs = bit_count (F\<^sub>e (float_of (real_of_int m * 2 powr real_of_int e)))" + by (simp add:truncate_down_def round_down_def m_def[symmetric] a) + also have "... \ ereal (6 + (2 * log 2 (real_of_int (\m\ + 2)) + 2 * log 2 (real_of_int (\e\ + 1))))" + using float_bit_count_2 by simp + also have "... \ ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))" + using c e + by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+) + also have "... = ?rhs" by simp + finally show ?thesis by simp +qed + +definition prime_above :: "nat \ nat" + where "prime_above n = (SOME x. x \ {n..(2*n+2)} \ prime x)" + +text \The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}. +Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to +replace this with an algorithm, that finds such a prime exactly or approximately. + +The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the +high-level mathematical correctness proof.\ + +lemma ex_subset: + assumes "\x \ A. P x" + assumes "A \ B" + shows "\x \ B. P x" + using assms by auto + +lemma + shows prime_above_prime: "prime (prime_above n)" + and prime_above_range: "prime_above n \ {n..(2*n+2)}" +proof - + define r where "r = (\x. x \ {n..(2*n+2)} \ prime x)" + have "\x. r x" + proof (cases "n>2") + case True + hence "n-1 > 1" by simp + hence "\x \ {(n-1)<..<(2*(n-1))}. prime x" + using bertrand by simp + moreover have "{n - 1<..<2 * (n - 1)} \ {n..2 * n + 2}" + by (intro subsetI, auto) + ultimately have "\x \ {n..(2*n+2)}. prime x" + by (rule ex_subset) + then show ?thesis by (simp add:r_def Bex_def) + next + case False + hence "2 \ {n..(2*n+2)}" + by simp + moreover have "prime (2::nat)" + using two_is_prime_nat by blast + ultimately have "r 2" + using r_def by simp + then show ?thesis by (rule exI) + qed + moreover have "prime_above n = (SOME x. r x)" + by (simp add:prime_above_def r_def) + ultimately have a:"r (prime_above n)" + using someI_ex by metis + show "prime (prime_above n)" + using a unfolding r_def by blast + show "prime_above n \ {n..(2*n+2)}" + using a unfolding r_def by blast +qed + +lemma prime_above_min: "prime_above n \ 2" + using prime_above_prime + by (simp add: prime_ge_2_nat) + +lemma prime_above_lower_bound: "prime_above n \ n" + using prime_above_range + by simp + +lemma prime_above_upper_bound: "prime_above n \ 2*n+2" + using prime_above_range + by simp + +end diff --git a/thys/Frequency_Moments/K_Smallest.thy b/thys/Frequency_Moments/K_Smallest.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/K_Smallest.thy @@ -0,0 +1,385 @@ +section \Ranks, $k$ smallest element and elements\ + +theory K_Smallest + imports + Frequency_Moments_Preliminary_Results + Interpolation_Polynomials_HOL_Algebra.Interpolation_Polynomial_Cardinalities +begin + +text \This section contains definitions and results for the selection of the $k$ smallest elements, the $k$-th smallest element, rank of an element in an ordered set.\ + +definition rank_of :: "'a :: linorder \ 'a set \ nat" where "rank_of x S = card {y \ S. y < x}" +text \The function @{term "rank_of"} returns the rank of an element within a set.\ + +lemma rank_mono: + assumes "finite S" + shows "x \ y \ rank_of x S \ rank_of y S" + unfolding rank_of_def using assms by (intro card_mono, auto) + +lemma rank_mono_2: + assumes "finite S" + shows "S' \ S \ rank_of x S' \ rank_of x S" + unfolding rank_of_def using assms by (intro card_mono, auto) + +lemma rank_mono_commute: + assumes "finite S" + assumes "S \ T" + assumes "strict_mono_on f T" + assumes "x \ T" + shows "rank_of x S = rank_of (f x) (f ` S)" +proof - + have a: "inj_on f T" + by (metis assms(3) strict_mono_on_imp_inj_on) + + have "rank_of (f x) (f ` S) = card (f ` {y \ S. f y < f x})" + unfolding rank_of_def by (intro arg_cong[where f="card"], auto) + also have "... = card (f ` {y \ S. y < x})" + using assms by (intro arg_cong[where f="card"] arg_cong[where f="(`) f"]) + (meson in_mono linorder_not_le strict_mono_onD strict_mono_on_leD set_eq_iff) + also have "... = card {y \ S. y < x}" + using assms by (intro card_image inj_on_subset[OF a], blast) + also have "... = rank_of x S" + by (simp add:rank_of_def) + finally show ?thesis + by simp +qed + +definition least where "least k S = {y \ S. rank_of y S < k}" +text \The function @{term "least"} returns the k smallest elements of a finite set.\ + +lemma rank_strict_mono: + assumes "finite S" + shows "strict_mono_on (\x. rank_of x S) S" +proof - + have "\x y. x \ S \ y \ S \ x < y \ rank_of x S < rank_of y S" + unfolding rank_of_def using assms + by (intro psubset_card_mono, auto) + + thus ?thesis + by (simp add:rank_of_def strict_mono_on_def) +qed + +lemma rank_of_image: + assumes "finite S" + shows "(\x. rank_of x S) ` S = {0..x. x \ S \ card {y \ S. y < x} < card S" + by (rule psubset_card_mono, metis assms, blast) + thus "(\x. rank_of x S) ` S \ {0..x. rank_of x S) S" + by (metis strict_mono_on_imp_inj_on rank_strict_mono assms) + thus "card {0.. card ((\x. rank_of x S) ` S)" + by (simp add:card_image) +qed + +lemma card_least: + assumes "finite S" + shows "card (least k S) = min k (card S)" +proof (cases "card S < k") + case True + have "\t. rank_of t S \ card S" + unfolding rank_of_def using assms + by (intro card_mono, auto) + hence "\t. rank_of t S < k" + by (metis True not_less_iff_gr_or_eq order_less_le_trans) + hence "least k S = S" + by (simp add:least_def) + then show ?thesis using True by simp +next + case False + hence a:"card S \ k" using leI by blast + hence "card ((\x. rank_of x S) -` {0.. S) = card {0.. S" + by (simp add:least_def) + +lemma least_mono_commute: + assumes "finite S" + assumes "strict_mono_on f S" + shows "f ` least k S = least k (f ` S)" +proof - + have a:"inj_on f S" + using strict_mono_on_imp_inj_on[OF assms(2)] by simp + + have "card (least k (f ` S)) = min k (card (f ` S))" + by (subst card_least, auto simp add:assms) + also have "... = min k (card S)" + by (subst card_image, metis a, auto) + also have "... = card (least k S)" + by (subst card_least, auto simp add:assms) + also have "... = card (f ` least k S)" + by (subst card_image[OF inj_on_subset[OF a]], simp_all add:least_def) + finally have b: "card (least k (f ` S)) \ card (f ` least k S)" by simp + + have c: "f ` least k S \least k (f ` S)" + using assms by (intro image_subsetI) + (simp add:least_def rank_mono_commute[symmetric, where T="S"]) + + show ?thesis + using b c assms by (intro card_seteq, simp_all add:least_def) +qed + +lemma least_eq_iff: + assumes "finite B" + assumes "A \ B" + assumes "\x. x \ B \ rank_of x B < k \ x \ A" + shows "least k A = least k B" +proof - + have "least k B \ least k A" + using assms rank_mono_2[OF assms(1,2)] order_le_less_trans + by (simp add:least_def, blast) + moreover have "card (least k B) \ card (least k A)" + using assms finite_subset[OF assms(2,1)] card_mono[OF assms(1,2)] + by (simp add: card_least min_le_iff_disj) + moreover have "finite (least k A)" + using finite_subset least_subset assms(1,2) by metis + ultimately show ?thesis + by (intro card_seteq[symmetric], simp_all) +qed + +lemma least_insert: + assumes "finite S" + shows "least k (insert x (least k S)) = least k (insert x S)" (is "?lhs = ?rhs") +proof (rule least_eq_iff) + show "finite (insert x S)" + using assms(1) by simp + show "insert x (least k S) \ insert x S" + using least_subset by blast + show "y \ insert x (least k S)" if a: "y \ insert x S" and b: "rank_of y (insert x S) < k" for y + proof - + have "rank_of y S \ rank_of y (insert x S)" + using assms by (intro rank_mono_2, auto) + also have "... < k" using b by simp + finally have "rank_of y S < k" by simp + hence "y = x \ (y \ S \ rank_of y S < k)" + using a by simp + thus ?thesis by (simp add:least_def) + qed +qed + + +definition count_le where "count_le x M = size {#y \# M. y \ x#}" +definition count_less where "count_less x M = size {#y \# M. y < x#}" + +definition nth_mset :: "nat \ ('a :: linorder) multiset \ 'a" where + "nth_mset k M = sorted_list_of_multiset M ! k" + +lemma nth_mset_bound_left: + assumes "k < size M" + assumes "count_less x M \ k" + shows "x \ nth_mset k M" +proof (rule ccontr) + define xs where "xs = sorted_list_of_multiset M" + have s_xs: "sorted xs" by (simp add:xs_def sorted_sorted_list_of_multiset) + have l_xs: "k < length xs" + using assms(1) by (simp add:xs_def size_mset[symmetric]) + have M_xs: "M = mset xs" by (simp add:xs_def) + hence a:"\i. i \ k \ xs ! i \ xs ! k" + using s_xs l_xs sorted_iff_nth_mono by blast + + assume "\(x \ nth_mset k M)" + hence "x > nth_mset k M" by simp + hence b:"x > xs ! k" by (simp add:nth_mset_def xs_def[symmetric]) + + have "k < card {0..k}" by simp + also have "... \ card {i. i < length xs \ xs ! i < x}" + using a b l_xs order_le_less_trans + by (intro card_mono subsetI) auto + also have "... = length (filter (\y. y < x) xs)" + by (subst length_filter_conv_card, simp) + also have "... = size (mset (filter (\y. y < x) xs))" + by (subst size_mset, simp) + also have "... = count_less x M" + by (simp add:count_less_def M_xs) + also have "... \ k" + using assms by simp + finally show "False" by simp +qed + +lemma nth_mset_bound_left_excl: + assumes "k < size M" + assumes "count_le x M \ k" + shows "x < nth_mset k M" +proof (rule ccontr) + define xs where "xs = sorted_list_of_multiset M" + have s_xs: "sorted xs" by (simp add:xs_def sorted_sorted_list_of_multiset) + have l_xs: "k < length xs" + using assms(1) by (simp add:xs_def size_mset[symmetric]) + have M_xs: "M = mset xs" by (simp add:xs_def) + hence a:"\i. i \ k \ xs ! i \ xs ! k" + using s_xs l_xs sorted_iff_nth_mono by blast + + assume "\(x < nth_mset k M)" + hence "x \ nth_mset k M" by simp + hence b:"x \ xs ! k" by (simp add:nth_mset_def xs_def[symmetric]) + + have "k+1 \ card {0..k}" by simp + also have "... \ card {i. i < length xs \ xs ! i \ xs ! k}" + using a b l_xs order_le_less_trans + by (intro card_mono subsetI, auto) + also have "... \ card {i. i < length xs \ xs ! i \ x}" + using b by (intro card_mono subsetI, auto) + also have "... = length (filter (\y. y \ x) xs)" + by (subst length_filter_conv_card, simp) + also have "... = size (mset (filter (\y. y \ x) xs))" + by (subst size_mset, simp) + also have "... = count_le x M" + by (simp add:count_le_def M_xs) + also have "... \ k" + using assms by simp + finally show "False" by simp +qed + +lemma nth_mset_bound_right: + assumes "k < size M" + assumes "count_le x M > k" + shows "nth_mset k M \ x" +proof (rule ccontr) + define xs where "xs = sorted_list_of_multiset M" + have s_xs: "sorted xs" by (simp add:xs_def sorted_sorted_list_of_multiset) + have l_xs: "k < length xs" + using assms(1) by (simp add:xs_def size_mset[symmetric]) + have M_xs: "M = mset xs" by (simp add:xs_def) + + assume "\(nth_mset k M \ x)" + hence "x < nth_mset k M" by simp + hence "x < xs ! k" + by (simp add:nth_mset_def xs_def[symmetric]) + hence a:"\i. i < length xs \ xs ! i \ x \ i < k" + using s_xs l_xs sorted_iff_nth_mono leI by fastforce + have "count_le x M = size (mset (filter (\y. y \ x) xs))" + by (simp add:count_le_def M_xs) + also have "... = length (filter (\y. y \ x) xs)" + by (subst size_mset, simp) + also have "... = card {i. i < length xs \ xs ! i \ x}" + by (subst length_filter_conv_card, simp) + also have "... \ card {i. i < k}" + using a by (intro card_mono subsetI, auto) + also have "... = k" by simp + finally have "count_le x M \ k" by simp + thus "False" using assms by simp +qed + +lemma nth_mset_commute_mono: + assumes "mono f" + assumes "k < size M" + shows "f (nth_mset k M) = nth_mset k (image_mset f M)" +proof - + have a:"k < length (sorted_list_of_multiset M)" + by (metis assms(2) mset_sorted_list_of_multiset size_mset) + show ?thesis + using a by (simp add:nth_mset_def sorted_list_of_multiset_image_commute[OF assms(1)]) +qed + +lemma nth_mset_max: + assumes "size A > k" + assumes "\x. x \ nth_mset k A \ count A x \ 1" + shows "nth_mset k A = Max (least (k+1) (set_mset A))" and "card (least (k+1) (set_mset A)) = k+1" +proof - + define xs where "xs = sorted_list_of_multiset A" + have k_bound: "k < length xs" unfolding xs_def + by (metis size_mset mset_sorted_list_of_multiset assms(1)) + + have A_def: "A = mset xs" by (simp add:xs_def) + have s_xs: "sorted xs" by (simp add:xs_def sorted_sorted_list_of_multiset) + have "\x. x \ xs ! k \ count A x \ Suc 0" + using assms(2) by (simp add:xs_def[symmetric] nth_mset_def) + hence no_col: "\x. x \ xs ! k \ count_list xs x \ 1" + by (simp add:A_def count_mset) + + have inj_xs: "inj_on (\k. xs ! k) {0..k}" + by (rule inj_onI, simp) (metis (full_types) count_list_ge_2_iff k_bound no_col + le_neq_implies_less linorder_not_le order_le_less_trans s_xs sorted_iff_nth_mono) + + have "\y. y < length xs \ rank_of (xs ! y) (set xs) < k+1 \ y < k+1" + proof (rule ccontr) + fix y + assume b:"y < length xs" + assume "\y < k +1" + hence a:"k + 1 \ y" by simp + + have d:"Suc k < length xs" using a b by simp + + have "k+1 = card ((!) xs ` {0..k})" + by (subst card_image[OF inj_xs], simp) + also have "... \ rank_of (xs ! (k+1)) (set xs)" + unfolding rank_of_def using k_bound + by (intro card_mono image_subsetI conjI, simp_all) (metis count_list_ge_2_iff no_col not_le le_imp_less_Suc s_xs + sorted_iff_nth_mono d order_less_le) + also have "... \ rank_of (xs ! y) (set xs)" + unfolding rank_of_def + by (intro card_mono subsetI, simp_all) + (metis Suc_eq_plus1 a b s_xs order_less_le_trans sorted_iff_nth_mono) + also assume "... < k+1" + finally show "False" by force + qed + + moreover have "rank_of (xs ! y) (set xs) < k+1" if a:"y < k + 1" for y + proof - + have "rank_of (xs ! y) (set xs) \ card ((\k. xs ! k) ` {k. k < length xs \ xs ! k < xs ! y})" + unfolding rank_of_def + by (intro card_mono subsetI, simp) + (metis (no_types, lifting) imageI in_set_conv_nth mem_Collect_eq) + also have "... \ card {k. k < length xs \ xs ! k < xs ! y}" + by (rule card_image_le, simp) + also have "... \ card {k. k < y}" + by (intro card_mono subsetI, simp_all add:not_less) + (metis sorted_iff_nth_mono s_xs linorder_not_less) + also have "... = y" by simp + also have "... < k + 1" using a by simp + finally show "rank_of (xs ! y) (set xs) < k+1" by simp + qed + + ultimately have rank_conv: "\y. y < length xs \ rank_of (xs ! y) (set xs) < k+1 \ y < k+1" + by blast + + have "y \ xs ! k" if a:"y \ least (k+1) (set xs)" for y + proof - + have "y \ set xs" using a least_subset by blast + then obtain i where i_bound: "i < length xs" and y_def: "y = xs ! i" using in_set_conv_nth by metis + hence "rank_of (xs ! i) (set xs) < k+1" + using a y_def i_bound by (simp add: least_def) + hence "i < k+1" + using rank_conv i_bound by blast + hence "i \ k" by linarith + hence "xs ! i \ xs ! k" + using s_xs i_bound k_bound sorted_nth_mono by blast + thus "y \ xs ! k" using y_def by simp + qed + + moreover have "xs ! k \ least (k+1) (set xs)" + using k_bound rank_conv by (simp add:least_def) + + ultimately have "Max (least (k+1) (set xs)) = xs ! k" + by (intro Max_eqI finite_subset[OF least_subset], auto) + + hence "nth_mset k A = Max (K_Smallest.least (Suc k) (set xs))" + by (simp add:nth_mset_def xs_def[symmetric]) + also have "... = Max (least (k+1) (set_mset A))" + by (simp add:A_def) + finally show "nth_mset k A = Max (least (k+1) (set_mset A))" by simp + + have "k + 1 = card ((\i. xs ! i) ` {0..k})" + by (subst card_image[OF inj_xs], simp) + also have "... \ card (least (k+1) (set xs))" + using rank_conv k_bound + by (intro card_mono image_subsetI finite_subset[OF least_subset], simp_all add:least_def) + finally have "card (least (k+1) (set xs)) \ k+1" by simp + moreover have "card (least (k+1) (set xs)) \ k+1" + by (subst card_least, simp, simp) + ultimately have "card (least (k+1) (set xs)) = k+1" by simp + thus "card (least (k+1) (set_mset A)) = k+1" by (simp add:A_def) +qed + +end diff --git a/thys/Frequency_Moments/Landau_Ext.thy b/thys/Frequency_Moments/Landau_Ext.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Landau_Ext.thy @@ -0,0 +1,243 @@ +section \Landau Symbols\ + +theory Landau_Ext + imports + "HOL-Library.Landau_Symbols" + "HOL.Topological_Spaces" +begin + +text \This section contains results about Landau Symbols in addition to "HOL-Library.Landau".\ + +lemma landau_sum: + assumes "eventually (\x. g1 x \ (0::real)) F" + assumes "eventually (\x. g2 x \ 0) F" + assumes "f1 \ O[F](g1)" + assumes "f2 \ O[F](g2)" + shows "(\x. f1 x + f2 x) \ O[F](\x. g1 x + g2 x)" +proof - + obtain c1 where a1: "c1 > 0" and b1: "eventually (\x. abs (f1 x) \ c1 * abs (g1 x)) F" + using assms(3) by (simp add:bigo_def, blast) + obtain c2 where a2: "c2 > 0" and b2: "eventually (\x. abs (f2 x) \ c2 * abs (g2 x)) F" + using assms(4) by (simp add:bigo_def, blast) + have "eventually (\x. abs (f1 x + f2 x) \ (max c1 c2) * abs (g1 x + g2 x)) F" + proof (rule eventually_mono[OF eventually_conj[OF b1 eventually_conj[OF b2 eventually_conj[OF assms(1,2)]]]]) + fix x + assume a: "\f1 x\ \ c1 * \g1 x\ \ \f2 x\ \ c2 * \g2 x\ \ 0 \ g1 x \ 0 \ g2 x" + have "\f1 x + f2 x\ \ \f1 x \ + \f2 x\" using abs_triangle_ineq by blast + also have "... \ c1 * \g1 x\ + c2 * \g2 x\" using a add_mono by blast + also have "... \ max c1 c2 * \g1 x\ + max c1 c2 * \g2 x\" + by (intro add_mono mult_right_mono) auto + also have "... = max c1 c2 * (\g1 x\ + \g2 x\)" + by (simp add:algebra_simps) + also have "... \ max c1 c2 * (\g1 x + g2 x\)" + using a a1 a2 by (intro mult_left_mono) auto + finally show "\f1 x + f2 x\ \ max c1 c2 * \g1 x + g2 x\" + by (simp add:algebra_simps) + qed + hence " 0 < max c1 c2 \ (\\<^sub>F x in F. \f1 x + f2 x\ \ max c1 c2 * \g1 x + g2 x\)" + using a1 a2 by linarith + thus ?thesis + by (simp add: bigo_def, blast) +qed + +lemma landau_sum_1: + assumes "eventually (\x. g1 x \ (0::real)) F" + assumes "eventually (\x. g2 x \ 0) F" + assumes "f \ O[F](g1)" + shows "f \ O[F](\x. g1 x + g2 x)" +proof - + have "f = (\x. f x + 0)" by simp + also have "... \ O[F](\x. g1 x + g2 x)" + using assms zero_in_bigo by (intro landau_sum) + finally show ?thesis by simp +qed + +lemma landau_sum_2: + assumes "eventually (\x. g1 x \ (0::real)) F" + assumes "eventually (\x. g2 x \ 0) F" + assumes "f \ O[F](g2)" + shows "f \ O[F](\x. g1 x + g2 x)" +proof - + have "f = (\x. 0 + f x)" by simp + also have "... \ O[F](\x. g1 x + g2 x)" + using assms zero_in_bigo by (intro landau_sum) + finally show ?thesis by simp +qed + +lemma landau_ln_3: + assumes "eventually (\x. (1::real) \ f x) F" + assumes "f \ O[F](g)" + shows "(\x. ln (f x)) \ O[F](g)" +proof - + have "1 \ x \ \ln x\ \ \x\" for x :: real + using ln_bound by auto + hence "(\x. ln (f x)) \ O[F](f)" + by (intro landau_o.big_mono eventually_mono[OF assms(1)]) simp + thus ?thesis + using assms(2) landau_o.big_trans by blast +qed + +lemma landau_ln_2: + assumes "a > (1::real)" + assumes "eventually (\x. 1 \ f x) F" + assumes "eventually (\x. a \ g x) F" + assumes "f \ O[F](g)" + shows "(\x. ln (f x)) \ O[F](\x. ln (g x))" +proof - + obtain c where a: "c > 0" and b: "eventually (\x. abs (f x) \ c * abs (g x)) F" + using assms(4) by (simp add:bigo_def, blast) + define d where "d = 1 + (max 0 (ln c)) / ln a" + have d:"eventually (\x. abs (ln (f x)) \ d * abs (ln (g x))) F" + proof (rule eventually_mono[OF eventually_conj[OF b eventually_conj[OF assms(3,2)]]]) + fix x + assume c:"\f x\ \ c * \g x\ \ a \ g x \ 1 \ f x" + have "abs (ln (f x)) = ln (f x)" + by (subst abs_of_nonneg, rule ln_ge_zero, metis c, simp) + also have "... \ ln (c * abs (g x))" + using c assms(1) mult_pos_pos[OF a] by auto + also have "... \ ln c + ln (abs (g x))" + using c assms(1) + by (simp add: ln_mult[OF a]) + also have "... \ (d-1)*ln a + ln (g x)" + using assms(1) c + by (intro add_mono iffD2[OF ln_le_cancel_iff], simp_all add:d_def) + also have "... \ (d-1)* ln (g x) + ln (g x)" + using assms(1) c + by (intro add_mono mult_left_mono iffD2[OF ln_le_cancel_iff], simp_all add:d_def) + also have "... = d * ln (g x)" by (simp add:algebra_simps) + also have "... = d * abs (ln (g x))" + using c assms(1) by auto + finally show "abs (ln (f x)) \ d * abs (ln (g x))" by simp + qed + hence "\\<^sub>F x in F. \ln (f x)\ \ d * \ln (g x)\" + by simp + moreover have "0 < d" + unfolding d_def using assms(1) + by (intro add_pos_nonneg divide_nonneg_pos, auto) + ultimately show ?thesis + by (auto simp:bigo_def) +qed + +lemma landau_real_nat: + fixes f :: "'a \ int" + assumes "(\x. of_int (f x)) \ O[F](g)" + shows "(\x. real (nat (f x))) \ O[F](g)" +proof - + obtain c where a: "c > 0" and b: "eventually (\x. abs (of_int (f x)) \ c * abs (g x)) F" + using assms(1) by (simp add:bigo_def, blast) + have "\\<^sub>F x in F. real (nat (f x)) \ c * \g x\" + by (rule eventually_mono[OF b], simp) + thus ?thesis using a + by (auto simp:bigo_def) +qed + +lemma landau_ceil: + assumes "(\_. 1) \ O[F'](g)" + assumes "f \ O[F'](g)" + shows "(\x. real_of_int \f x\) \ O[F'](g)" +proof - + have "(\x. real_of_int \f x\) \ O[F'](\x. 1 + abs (f x))" + by (intro landau_o.big_mono always_eventually allI, simp, linarith) + also have "(\x. 1 + abs(f x)) \ O[F'](g)" + using assms(2) by (intro sum_in_bigo assms(1), auto) + finally show ?thesis by simp +qed + +lemma landau_rat_ceil: + assumes "(\_. 1) \ O[F'](g)" + assumes "(\x. real_of_rat (f x)) \ O[F'](g)" + shows "(\x. real_of_int \f x\) \ O[F'](g)" +proof - + have a:"\real_of_int \x\\ \ 1 + real_of_rat \x\" for x :: rat + proof (cases "x \ 0") + case True + then show ?thesis + by (simp, metis add.commute of_int_ceiling_le_add_one of_rat_ceiling) + next + case False + have "real_of_rat x - 1 \ real_of_rat x" + by simp + also have "... \ real_of_int \x\" + by (metis ceiling_correct of_rat_ceiling) + finally have " real_of_rat (x)-1 \ real_of_int \x\" by simp + + hence "- real_of_int \x\ \ 1 + real_of_rat (- x)" + by (simp add: of_rat_minus) + then show ?thesis using False by simp + qed + have "(\x. real_of_int \f x\) \ O[F'](\x. 1 + abs (real_of_rat (f x)))" + using a + by (intro landau_o.big_mono always_eventually allI, simp) + also have "(\x. 1 + abs (real_of_rat (f x))) \ O[F'](g)" + using assms + by (intro sum_in_bigo assms(1), subst landau_o.big.abs_in_iff, simp) + finally show ?thesis by simp +qed + +lemma landau_nat_ceil: + assumes "(\_. 1) \ O[F'](g)" + assumes "f \ O[F'](g)" + shows "(\x. real (nat \f x\)) \ O[F'](g)" + using assms + by (intro landau_real_nat landau_ceil, auto) + +lemma eventually_prod1': + assumes "B \ bot" + assumes " (\\<^sub>F x in A. P x)" + shows "(\\<^sub>F x in A \\<^sub>F B. P (fst x))" +proof - + have "(\\<^sub>F x in A \\<^sub>F B. P (fst x)) = (\\<^sub>F (x,y) in A \\<^sub>F B. P x)" + by (simp add:case_prod_beta') + also have "... = (\\<^sub>F x in A. P x)" + by (subst eventually_prod1[OF assms(1)], simp) + finally show ?thesis using assms(2) by simp +qed + +lemma eventually_prod2': + assumes "A \ bot" + assumes " (\\<^sub>F x in B. P x)" + shows "(\\<^sub>F x in A \\<^sub>F B. P (snd x))" +proof - + have "(\\<^sub>F x in A \\<^sub>F B. P (snd x)) = (\\<^sub>F (x,y) in A \\<^sub>F B. P y)" + by (simp add:case_prod_beta') + also have "... = (\\<^sub>F x in B. P x)" + by (subst eventually_prod2[OF assms(1)], simp) + finally show ?thesis using assms(2) by simp +qed + +lemma sequentially_inf: "\\<^sub>F x in sequentially. n \ real x" + by (meson eventually_at_top_linorder nat_ceiling_le_eq) + +instantiation rat :: linorder_topology +begin + +definition open_rat :: "rat set \ bool" + where "open_rat = generate_topology (range (\a. {..< a}) \ range (\a. {a <..}))" + +instance + by standard (rule open_rat_def) +end + +lemma inv_at_right_0_inf: + "\\<^sub>F x in at_right 0. c \ 1 / real_of_rat x" +proof - + have a:" c \ 1 / real_of_rat x" if b:" x \ {0<..<1 / rat_of_int (max \c\ 1)}" for x + proof - + have "c * real_of_rat x \ real_of_int (max \c\ 1) * real_of_rat x" + using b by (intro mult_right_mono, linarith, auto) + also have "... < real_of_int (max \c\ 1) * real_of_rat (1/rat_of_int (max \c\ 1) )" + using b by (intro mult_strict_left_mono iffD2[OF of_rat_less], auto) + also have "... \ 1" + by (simp add:of_rat_divide) + finally have "c * real_of_rat x \ 1" by simp + moreover have "0 < real_of_rat x" + using b by simp + ultimately show ?thesis by (subst pos_le_divide_eq, auto) + qed + + show ?thesis + using a + by (intro eventually_at_rightI[where b="1/rat_of_int (max \c\ 1)"], simp_all) +qed + +end \ No newline at end of file diff --git a/thys/Frequency_Moments/Probability_Ext.thy b/thys/Frequency_Moments/Probability_Ext.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Probability_Ext.thy @@ -0,0 +1,328 @@ +section \Probability Spaces\ + +text \Some additional results about probability spaces in addition to "HOL-Probability".\ + +theory Probability_Ext + imports + "HOL-Probability.Stream_Space" + Universal_Hash_Families.Carter_Wegman_Hash_Family + Frequency_Moments_Preliminary_Results +begin + +text \Random variables that depend on disjoint sets of the components of a product space are +independent.\ + +lemma make_ext: + assumes "\x. P x = P (restrict x I)" + shows "(\x \ Pi I A. P x) = (\x \ PiE I A. P x)" + using assms by (simp add:PiE_def Pi_def set_eq_iff, force) + +lemma PiE_reindex: + assumes "inj_on f I" + shows "PiE I (A \ f) = (\a. restrict (a \ f) I) ` PiE (f ` I) A" (is "?lhs = ?g ` ?rhs") +proof - + have "?lhs \ ?g` ?rhs" + proof (rule subsetI) + fix x + assume a:"x \ Pi\<^sub>E I (A \ f)" + define y where y_def: "y = (\k. if k \ f ` I then x (the_inv_into I f k) else undefined)" + have b:"y \ PiE (f ` I) A" + using a assms the_inv_into_f_eq[OF assms] + by (simp add: y_def PiE_iff extensional_def) + have c: "x = (\a. restrict (a \ f) I) y" + using a assms the_inv_into_f_eq extensional_arb + by (intro ext, simp add:y_def PiE_iff, fastforce) + show "x \ ?g ` ?rhs" using b c by blast + qed + moreover have "?g ` ?rhs \ ?lhs" + by (rule image_subsetI, simp add:Pi_def PiE_def) + ultimately show ?thesis by blast +qed + +context prob_space +begin + +lemma indep_sets_reindex: + assumes "inj_on f I" + shows "indep_sets A (f ` I) = indep_sets (\i. A (f i)) I" +proof - + have a:"\J g. J \ I \ (\j \ f ` J. g j) = (\j \ J. g (f j))" + by (metis assms prod.reindex_cong subset_inj_on) + + have "J \ I \ (\\<^sub>E i \ J. A (f i)) = (\a. restrict (a \ f) J) ` PiE (f ` J) A" for J + using assms inj_on_subset + by (subst PiE_reindex[symmetric]) auto + + hence b:"\P J. J \ I \ (\x. P x = P (restrict x J)) \ (\A' \ \\<^sub>E i \ J. A (f i). P A') = (\A'\PiE (f ` J) A. P (A' \ f))" + by simp + + have c:"\J. J \ I \ finite (f ` J) = finite J" + by (meson assms finite_image_iff inj_on_subset) + + show ?thesis + by (simp add:indep_sets_def all_subset_image a c) + (simp add:make_ext b cong:restrict_cong)+ +qed + +lemma indep_vars_cong_AE: + assumes "AE x in M. (\i \ I. X' i x = Y' i x)" + assumes "indep_vars M' X' I" + assumes "\i. i \ I \ random_variable (M' i) (Y' i)" + shows "indep_vars M' Y' I" +proof (cases "I \ {}") + case True + + have a: "AE x in M. (\i\I. Y' i x) = (\i\I. X' i x)" + by (rule AE_mp[OF assms(1)], rule AE_I2, simp cong:restrict_cong) + have b: "\i. i \ I \ random_variable (M' i) (X' i)" + using assms(2) by (simp add:indep_vars_def2) + have c: "\x. x \ I \ AE xa in M. X' x xa = Y' x xa" + by (rule AE_mp[OF assms(1)], rule AE_I2, simp) + + have "distr M (Pi\<^sub>M I M') (\x. \i\I. Y' i x) = distr M (Pi\<^sub>M I M') (\x. \i\I. X' i x)" + by (intro distr_cong_AE measurable_restrict a b assms(3)) auto + also have "... = Pi\<^sub>M I (\i. distr M (M' i) (X' i))" + using assms True b by (subst indep_vars_iff_distr_eq_PiM'[symmetric]) auto + also have "... = Pi\<^sub>M I (\i. distr M (M' i) (Y' i))" + by (intro PiM_cong distr_cong_AE c assms(3) b) auto + finally have "distr M (Pi\<^sub>M I M') (\x. \i\I. Y' i x) = Pi\<^sub>M I (\i. distr M (M' i) (Y' i))" + by simp + + thus ?thesis + using True assms(3) + by (subst indep_vars_iff_distr_eq_PiM') auto +next + case False + then show ?thesis + by (simp add:indep_vars_def2 indep_sets_def) +qed + +lemma indep_vars_reindex: + assumes "inj_on f I" + assumes "indep_vars M' X' (f ` I)" + shows "indep_vars (M' \ f) (\k \. X' (f k) \) I" + using assms by (simp add:indep_vars_def2 indep_sets_reindex) + +lemma variance_divide: + fixes f :: "'a \ real" + assumes "integrable M f" + shows "variance (\\. f \ / r) = variance f / r^2" + using assms + by (subst Bochner_Integration.integral_divide[OF assms(1)]) + (simp add:diff_divide_distrib[symmetric] power2_eq_square algebra_simps) + +lemma pmf_mono: + assumes "M = measure_pmf p" + assumes "\x. x \ P \ x \ set_pmf p \ x \ Q" + shows "prob P \ prob Q" +proof - + have "prob P = prob (P \ (set_pmf p))" + by (rule measure_pmf_eq[OF assms(1)], blast) + also have "... \ prob Q" + using assms by (intro finite_measure.finite_measure_mono, auto) + finally show ?thesis by simp +qed + +lemma pmf_add: + assumes "M = measure_pmf p" + assumes "\x. x \ P \ x \ set_pmf p \ x \ Q \ x \ R" + shows "prob P \ prob Q + prob R" +proof - + have [simp]:"events = UNIV" by (subst assms(1), simp) + have "prob P \ prob (Q \ R)" + using assms by (intro pmf_mono[OF assms(1)], blast) + also have "... \ prob Q + prob R" + by (rule measure_subadditive, auto) + finally show ?thesis by simp +qed + +lemma pmf_add_2: + assumes "M = measure_pmf p" + assumes "prob {\. P \} \ r1" + assumes "prob {\. Q \} \ r2" + shows "prob {\. P \ \ Q \} \ r1 + r2" (is "?lhs \ ?rhs") +proof - + have "?lhs \ prob {\. P \} + prob {\. Q \}" + by (intro pmf_add[OF assms(1)], auto) + also have "... \ ?rhs" + by (intro add_mono assms(2-3)) + finally show ?thesis + by simp +qed + +definition covariance where + "covariance f g = expectation (\\. (f \ - expectation f) * (g \ - expectation g))" + +lemma real_prod_integrable: + fixes f g :: "'a \ real" + assumes [measurable]: "f \ borel_measurable M" "g \ borel_measurable M" + assumes sq_int: "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" + shows "integrable M (\\. f \ * g \)" + unfolding integrable_iff_bounded +proof + have "(\\<^sup>+ \. ennreal (norm (f \ * g \)) \M)\<^sup>2 = (\\<^sup>+ \. ennreal \f \\ * ennreal \g \\ \M)\<^sup>2" + by (simp add: abs_mult ennreal_mult) + also have "... \ (\\<^sup>+ \. ennreal \f \\^2 \M) * (\\<^sup>+ \. ennreal \g \\^2 \M)" + by (rule Cauchy_Schwarz_nn_integral, auto) + also have "... < \" + using sq_int by (auto simp: integrable_iff_bounded ennreal_power ennreal_mult_less_top) + finally have "(\\<^sup>+ x. ennreal (norm (f x * g x)) \M)\<^sup>2 < \" + by simp + thus "(\\<^sup>+ x. ennreal (norm (f x * g x)) \M) < \" + by (simp add: power_less_top_ennreal) +qed auto + +lemma covariance_eq: + fixes f :: "'a \ real" + assumes "f \ borel_measurable M" "g \ borel_measurable M" + assumes "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" + shows "covariance f g = expectation (\\. f \ * g \) - expectation f * expectation g" +proof - + have "integrable M f" using square_integrable_imp_integrable assms by auto + moreover have "integrable M g" using square_integrable_imp_integrable assms by auto + ultimately show ?thesis + using assms real_prod_integrable + by (simp add:covariance_def algebra_simps prob_space) +qed + +lemma covar_integrable: + fixes f g :: "'a \ real" + assumes "f \ borel_measurable M" "g \ borel_measurable M" + assumes "integrable M (\\. f \^2)" "integrable M (\\. g \^2)" + shows "integrable M (\\. (f \ - expectation f) * (g \ - expectation g))" +proof - + have "integrable M f" using square_integrable_imp_integrable assms by auto + moreover have "integrable M g" using square_integrable_imp_integrable assms by auto + ultimately show ?thesis using assms real_prod_integrable by (simp add: algebra_simps) +qed + +lemma sum_square_int: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + shows "integrable M (\\. (\i \ I. f i \)\<^sup>2)" +proof - + have " integrable M (\\. \i\I. \j\I. f j \ * f i \)" + using assms + by (intro Bochner_Integration.integrable_sum real_prod_integrable, auto) + thus ?thesis + by (simp add:power2_eq_square sum_distrib_left sum_distrib_right) +qed + +lemma var_sum_1: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + shows + "variance (\\. (\i \ I. f i \)) = (\i \ I. (\j \ I. covariance (f i) (f j)))" +proof - + have a:"\i j. i \ I \ j \ I \ integrable M (\\. (f i \ - expectation (f i)) * (f j \ - expectation (f j)))" + using assms covar_integrable by simp + have "variance (\\. (\i \ I. f i \)) = expectation (\\. (\i\I. f i \ - expectation (f i))\<^sup>2)" + using square_integrable_imp_integrable[OF assms(2,3)] + by (simp add: Bochner_Integration.integral_sum sum_subtractf) + also have "... = expectation (\\. (\i \ I. (\j \ I. (f i \ - expectation (f i)) * (f j \ - expectation (f j)))))" + by (simp add: power2_eq_square sum_distrib_right sum_distrib_left mult.commute) + also have "... = (\i \ I. (\j \ I. covariance (f i) (f j)))" + using a by (simp add: Bochner_Integration.integral_sum covariance_def) + finally show ?thesis by simp +qed + +lemma covar_self_eq: + fixes f :: "'a \ real" + shows "covariance f f = variance f" + by (simp add:covariance_def power2_eq_square) + +lemma covar_indep_eq_zero: + fixes f g :: "'a \ real" + assumes "integrable M f" + assumes "integrable M g" + assumes "indep_var borel f borel g" + shows "covariance f g = 0" +proof - + have a:"indep_var borel ((\t. t - expectation f) \ f) borel ((\t. t - expectation g) \ g)" + by (rule indep_var_compose[OF assms(3)], auto) + + have b:"expectation (\\. (f \ - expectation f) * (g \ - expectation g)) = 0" + using a assms by (subst indep_var_lebesgue_integral, auto simp add:comp_def prob_space) + + thus ?thesis by (simp add:covariance_def) +qed + +lemma var_sum_2: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + shows "variance (\\. (\i \ I. f i \)) = + (\i \ I. variance (f i)) + (\i \ I. \j \ I - {i}. covariance (f i) (f j))" +proof - + have "variance (\\. (\i \ I. f i \)) = (\i\I. \j\I. covariance (f i) (f j))" + by (simp add: var_sum_1[OF assms(1,2,3)]) + also have "... = (\i\I. covariance (f i) (f i) + (\j\I-{i}. covariance (f i) (f j)))" + using assms by (subst sum.insert[symmetric], auto simp add:insert_absorb) + also have "... = (\i\I. variance (f i)) + (\i \ I. (\j\I-{i}. covariance (f i) (f j)))" + by (simp add: covar_self_eq[symmetric] sum.distrib) + finally show ?thesis by simp +qed + +lemma var_sum_pairwise_indep: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + assumes "\i j. i \ I \ j \ I \ i \ j \ indep_var borel (f i) borel (f j)" + shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" +proof - + have "\i j. i \ I \ j \ I - {i} \ covariance (f i) (f j) = 0" + using covar_indep_eq_zero assms(4) square_integrable_imp_integrable[OF assms(2,3)] by auto + hence a:"(\i \ I. \j \ I - {i}. covariance (f i) (f j)) = 0" + by simp + thus ?thesis by (simp add: var_sum_2[OF assms(1,2,3)]) +qed + +lemma indep_var_from_indep_vars: + assumes "i \ j" + assumes "indep_vars (\_. M') f {i, j}" + shows "indep_var M' (f i) M' (f j)" +proof - + have a:"inj (case_bool i j)" using assms(1) + by (simp add: bool.case_eq_if inj_def) + have b:"range (case_bool i j) = {i,j}" + by (simp add: UNIV_bool insert_commute) + have c:"indep_vars (\_. M') f (range (case_bool i j))" using assms(2) b by simp + + have "True = indep_vars (\x. M') (\x. f (case_bool i j x)) UNIV" + using indep_vars_reindex[OF a c] + by (simp add:comp_def) + also have "... = indep_vars (\x. case_bool M' M' x) (\x. case_bool (f i) (f j) x) UNIV" + by (rule indep_vars_cong, auto simp:bool.case_distrib bool.case_eq_if) + also have "... = ?thesis" + by (simp add: indep_var_def) + finally show ?thesis by simp +qed + +lemma var_sum_pairwise_indep_2: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + assumes "\J. J \ I \ card J = 2 \ indep_vars (\ _. borel) f J" + shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" + using assms(4) + by (intro var_sum_pairwise_indep[OF assms(1,2,3)] indep_var_from_indep_vars, auto) + +lemma var_sum_all_indep: + fixes f :: "'b \ 'a \ real" + assumes "finite I" + assumes "\i. i \ I \ f i \ borel_measurable M" + assumes "\i. i \ I \ integrable M (\\. f i \^2)" + assumes "indep_vars (\ _. borel) f I" + shows "variance (\\. (\i \ I. f i \)) = (\i \ I. variance (f i))" + by (intro var_sum_pairwise_indep_2[OF assms(1,2,3)] indep_vars_subset[OF assms(4)], auto) + +end + +end diff --git a/thys/Frequency_Moments/Product_PMF_Ext.thy b/thys/Frequency_Moments/Product_PMF_Ext.thy new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/Product_PMF_Ext.thy @@ -0,0 +1,208 @@ +section \Indexed Products of Probability Mass Functions\ + +theory Product_PMF_Ext + imports Main Probability_Ext "HOL-Probability.Product_PMF" Universal_Hash_Families.Preliminary_Results +begin + +text \This section introduces a restricted version of @{term "Pi_pmf"} where the default value is @{term "undefined"} +and contains some additional results about that case in addition to @{theory "HOL-Probability.Product_PMF"}\ + +abbreviation prod_pmf where "prod_pmf I M \ Pi_pmf I undefined M" + +lemma pmf_prod_pmf: + assumes "finite I" + shows "pmf (prod_pmf I M) x = (if x \ extensional I then \i \ I. (pmf (M i)) (x i) else 0)" + by (simp add: pmf_Pi[OF assms(1)] extensional_def) + +lemma PiE_defaut_undefined_eq: "PiE_dflt I undefined M = PiE I M" + by (simp add:PiE_dflt_def PiE_def extensional_def Pi_def set_eq_iff) blast + +lemma set_prod_pmf: + assumes "finite I" + shows "set_pmf (prod_pmf I M) = PiE I (set_pmf \ M)" + by (simp add:set_Pi_pmf[OF assms] PiE_defaut_undefined_eq) + +text \A more general version of @{thm [source] "measure_Pi_pmf_Pi"}.\ +lemma prob_prod_pmf': + assumes "finite I" + assumes "J \ I" + shows "measure (measure_pmf (Pi_pmf I d M)) (Pi J A) = (\ i \ J. measure (M i) (A i))" +proof - + have a:"Pi J A = Pi I (\i. if i \ J then A i else UNIV)" + using assms by (simp add:Pi_def set_eq_iff, blast) + show ?thesis + using assms + by (simp add:if_distrib a measure_Pi_pmf_Pi[OF assms(1)] prod.If_cases[OF assms(1)] Int_absorb1) +qed + +lemma prob_prod_pmf_slice: + assumes "finite I" + assumes "i \ I" + shows "measure (measure_pmf (prod_pmf I M)) {\. P (\ i)} = measure (M i) {\. P \}" + using prob_prod_pmf'[OF assms(1), where J="{i}" and M="M" and A="\_. Collect P"] + by (simp add:assms Pi_def) + + +definition restrict_dfl where "restrict_dfl f A d = (\x. if x \ A then f x else d)" + +lemma pi_pmf_decompose: + assumes "finite I" + shows "Pi_pmf I d M = map_pmf (\\. restrict_dfl (\i. \ (f i) i) I d) (Pi_pmf (f ` I) (\_. d) (\j. Pi_pmf (f -` {j} \ I) d M))" +proof - + have fin_F_I:"finite (f ` I)" using assms by blast + + have "finite I \ ?thesis" + using fin_F_I + proof (induction "f ` I" arbitrary: I rule:finite_induct) + case empty + then show ?case by (simp add:restrict_dfl_def) + next + case (insert x F) + have a: "(f -` {x} \ I) \ (f -` F \ I) = I" + using insert(4) by blast + have b: "F = f ` (f -` F \ I) " using insert(2,4) + by (auto simp add:set_eq_iff image_def vimage_def) + have c: "finite (f -` F \ I)" using insert by blast + have d: "\j. j \ F \ (f -` {j} \ (f -` F \ I)) = (f -` {j} \ I)" + using insert(4) by blast + + have " Pi_pmf I d M = Pi_pmf ((f -` {x} \ I) \ (f -` F \ I)) d M" + by (simp add:a) + also have "... = map_pmf (\(g, h) i. if i \ f -` {x} \ I then g i else h i) + (pair_pmf (Pi_pmf (f -` {x} \ I) d M) (Pi_pmf (f -` F \ I) d M))" + using insert by (subst Pi_pmf_union) auto + also have "... = map_pmf (\(g,h) i. if f i = x \ i \ I then g i else if f i \ F \ i \ I then h (f i) i else d) + (pair_pmf (Pi_pmf (f -` {x} \ I) d M) (Pi_pmf F (\_. d) (\j. Pi_pmf (f -` {j} \ (f -` F \ I)) d M)))" + by (simp add:insert(3)[OF b c] map_pmf_comp case_prod_beta' apsnd_def map_prod_def + pair_map_pmf2 b[symmetric] restrict_dfl_def) (metis fst_conv snd_conv) + also have "... = map_pmf (\(g,h) i. if i \ I then (h(x := g)) (f i) i else d) + (pair_pmf (Pi_pmf (f -` {x} \ I) d M) (Pi_pmf F (\_. d) (\j. Pi_pmf (f -` {j} \ I) d M)))" + using insert(4) d + by (intro arg_cong2[where f="map_pmf"] ext) (auto simp add:case_prod_beta' cong:Pi_pmf_cong) + also have "... = map_pmf (\\ i. if i \ I then \ (f i) i else d) (Pi_pmf (insert x F) (\_. d) (\j. Pi_pmf (f -` {j} \ I) d M))" + by (simp add:Pi_pmf_insert[OF insert(1,2)] map_pmf_comp case_prod_beta') + finally show ?case by (simp add:insert(4) restrict_dfl_def) + qed + thus ?thesis using assms by blast +qed + +lemma restrict_dfl_iter: "restrict_dfl (restrict_dfl f I d) J d = restrict_dfl f (I \ J) d" + by (rule ext, simp add:restrict_dfl_def) + +lemma indep_vars_restrict': + assumes "finite I" + shows "prob_space.indep_vars (Pi_pmf I d M) (\_. discrete) (\i \. restrict_dfl \ (f -` {i} \ I) d) (f ` I)" +proof - + let ?Q = "(Pi_pmf (f ` I) (\_. d) (\i. Pi_pmf (I \ f -` {i}) d M))" + have a:"prob_space.indep_vars ?Q (\_. discrete) (\i \. \ i) (f ` I)" + using assms by (intro indep_vars_Pi_pmf, blast) + have b: "AE x in measure_pmf ?Q. \i\f ` I. x i = restrict_dfl (\i. x (f i) i) (I \ f -` {i}) d" + using assms + by (auto simp add:PiE_dflt_def restrict_dfl_def AE_measure_pmf_iff set_Pi_pmf comp_def Int_commute) + have "prob_space.indep_vars ?Q (\_. discrete) (\i x. restrict_dfl (\i. x (f i) i) (I \ f -` {i}) d) (f ` I)" + by (rule prob_space.indep_vars_cong_AE[OF prob_space_measure_pmf b a], simp) + thus ?thesis + using prob_space_measure_pmf + by (auto intro!:prob_space.indep_vars_distr simp:pi_pmf_decompose[OF assms, where f="f"] + map_pmf_rep_eq comp_def restrict_dfl_iter Int_commute) +qed + +lemma indep_vars_restrict_intro': + assumes "finite I" + assumes "\i \. i \ J \ X' i \ = X' i (restrict_dfl \ (f -` {i} \ I) d)" + assumes "J = f ` I" + assumes "\\ i. i \ J \ X' i \ \ space (M' i)" + shows "prob_space.indep_vars (measure_pmf (Pi_pmf I d p)) M' (\i \. X' i \) J" +proof - + define M where "M \ measure_pmf (Pi_pmf I d p)" + interpret prob_space "M" + using M_def prob_space_measure_pmf by blast + have "indep_vars (\_. discrete) (\i x. restrict_dfl x (f -` {i} \ I) d) (f ` I)" + unfolding M_def by (rule indep_vars_restrict'[OF assms(1)]) + hence "indep_vars M' (\i \. X' i (restrict_dfl \ ( f -` {i} \ I) d)) (f ` I)" + using assms(4) + by (intro indep_vars_compose2[where Y="X'" and N="M'" and M'="\_. discrete"]) (auto simp:assms(3)) + hence "indep_vars M' (\i \. X' i \) (f ` I)" + using assms(2)[symmetric] + by (simp add:assms(3) cong:indep_vars_cong) + thus ?thesis + unfolding M_def using assms(3) by simp +qed + +lemma + fixes f :: "'b \ ('c :: {second_countable_topology,banach,real_normed_field})" + assumes "finite I" + assumes "i \ I" + assumes "integrable (measure_pmf (M i)) f" + shows integrable_Pi_pmf_slice: "integrable (Pi_pmf I d M) (\x. f (x i))" + and expectation_Pi_pmf_slice: "integral\<^sup>L (Pi_pmf I d M) (\x. f (x i)) = integral\<^sup>L (M i) f" +proof - + have a:"distr (Pi_pmf I d M) (M i) (\\. \ i) = distr (Pi_pmf I d M) discrete (\\. \ i)" + by (rule distr_cong, auto) + + have b: "measure_pmf.random_variable (M i) borel f" + using assms(3) by simp + + have c:"integrable (distr (Pi_pmf I d M) (M i) (\\. \ i)) f" + using assms(1,2,3) + by (subst a, subst map_pmf_rep_eq[symmetric], subst Pi_pmf_component, auto) + + show "integrable (Pi_pmf I d M) (\x. f (x i))" + by (rule integrable_distr[where f="f" and M'="M i"]) (auto intro: c) + + have "integral\<^sup>L (Pi_pmf I d M) (\x. f (x i)) = integral\<^sup>L (distr (Pi_pmf I d M) (M i) (\\. \ i)) f" + using b by (intro integral_distr[symmetric], auto) + also have "... = integral\<^sup>L (map_pmf (\\. \ i) (Pi_pmf I d M)) f" + by (subst a, subst map_pmf_rep_eq[symmetric], simp) + also have "... = integral\<^sup>L (M i) f" + using assms(1,2) by (simp add: Pi_pmf_component) + finally show "integral\<^sup>L (Pi_pmf I d M) (\x. f (x i)) = integral\<^sup>L (M i) f" by simp +qed + +text \This is an improved version of @{thm [source] "expectation_prod_Pi_pmf"}. +It works for general normed fields instead of non-negative real functions .\ + +lemma expectation_prod_Pi_pmf: + fixes f :: "'a \ 'b \ ('c :: {second_countable_topology,banach,real_normed_field})" + assumes "finite I" + assumes "\i. i \ I \ integrable (measure_pmf (M i)) (f i)" + shows "integral\<^sup>L (Pi_pmf I d M) (\x. (\i \ I. f i (x i))) = (\ i \ I. integral\<^sup>L (M i) (f i))" +proof - + have a: "prob_space.indep_vars (measure_pmf (Pi_pmf I d M)) (\_. borel) (\i \. f i (\ i)) I" + by (intro prob_space.indep_vars_compose2[where Y="f" and M'="\_. discrete"] + prob_space_measure_pmf indep_vars_Pi_pmf assms(1)) auto + have "integral\<^sup>L (Pi_pmf I d M) (\x. (\i \ I. f i (x i))) = (\ i \ I. integral\<^sup>L (Pi_pmf I d M) (\x. f i (x i)))" + by (intro prob_space.indep_vars_lebesgue_integral prob_space_measure_pmf assms(1,2) + a integrable_Pi_pmf_slice) auto + also have "... = (\ i \ I. integral\<^sup>L (M i) (f i))" + by (intro prod.cong expectation_Pi_pmf_slice assms(1,2)) auto + finally show ?thesis by simp +qed + +lemma variance_prod_pmf_slice: + fixes f :: "'a \ real" + assumes "i \ I" "finite I" + assumes "integrable (measure_pmf (M i)) (\\. f \^2)" + shows "prob_space.variance (Pi_pmf I d M) (\\. f (\ i)) = prob_space.variance (M i) f" +proof - + have a:"integrable (measure_pmf (M i)) f" + using assms(3) measure_pmf.square_integrable_imp_integrable by auto + have b:" integrable (measure_pmf (Pi_pmf I d M)) (\x. (f (x i))\<^sup>2)" + by (rule integrable_Pi_pmf_slice[OF assms(2) assms(1)], metis assms(3)) + have c:" integrable (measure_pmf (Pi_pmf I d M)) (\x. (f (x i)))" + by (rule integrable_Pi_pmf_slice[OF assms(2) assms(1)], metis a) + + have "measure_pmf.expectation (Pi_pmf I d M) (\x. (f (x i))\<^sup>2) - (measure_pmf.expectation (Pi_pmf I d M) (\x. f (x i)))\<^sup>2 = + measure_pmf.expectation (M i) (\x. (f x)\<^sup>2) - (measure_pmf.expectation (M i) f)\<^sup>2" + using assms a b c by ((subst expectation_Pi_pmf_slice[OF assms(2,1)])?, simp)+ + + thus ?thesis + using assms a b c by (simp add: measure_pmf.variance_eq) +qed + +lemma Pi_pmf_bind_return: + assumes "finite I" + shows "Pi_pmf I d (\i. M i \ (\x. return_pmf (f i x))) = Pi_pmf I d' M \ (\x. return_pmf (\i. if i \ I then f i (x i) else d))" + using assms by (simp add: Pi_pmf_bind[where d'="d'"]) + +end diff --git a/thys/Frequency_Moments/ROOT b/thys/Frequency_Moments/ROOT new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/ROOT @@ -0,0 +1,26 @@ +chapter AFP + +session Frequency_Moments (AFP) = "HOL-Probability" + + options [timeout = 1200] + sessions + Bertrands_Postulate + Equivalence_Relation_Enumeration + "HOL-Algebra" + Interpolation_Polynomials_HOL_Algebra + Lp + Prefix_Free_Code_Combinators + Median_Method + Universal_Hash_Families + theories + Frequency_Moments_Preliminary_Results + Frequency_Moments + Frequency_Moment_0 + Frequency_Moment_2 + Frequency_Moment_k + Landau_Ext + K_Smallest + Probability_Ext + Product_PMF_Ext + document_files + "root.tex" + "root.bib" diff --git a/thys/Frequency_Moments/document/root.bib b/thys/Frequency_Moments/document/root.bib new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/document/root.bib @@ -0,0 +1,28 @@ +@InProceedings{baryossef2002, + author="Bar-Yossef, Ziv + and Jayram, T. S. + and Kumar, Ravi + and Sivakumar, D. + and Trevisan, Luca", + editor="Rolim, Jos{\'e} D. P. + and Vadhan, Salil", + title="Counting Distinct Elements in a Data Stream", + booktitle="Randomization and Approximation Techniques in Computer Science", + year="2002", + publisher="Springer Berlin Heidelberg", + _address="Berlin, Heidelberg", + pages="1--10", +} + +@article{alon1999, + title = {The Space Complexity of Approximating the Frequency Moments}, + journal = {Journal of Computer and System Sciences}, + volume = {58}, + number = {1}, + pages = {137-147}, + year = {1999}, + issn = {0022-0000}, + _doi = {https://doi.org/10.1006/jcss.1997.1545}, + _url = {https://www.sciencedirect.com/science/article/pii/S0022000097915452}, + author = {Noga Alon and Yossi Matias and Mario Szegedy}, +} diff --git a/thys/Frequency_Moments/document/root.tex b/thys/Frequency_Moments/document/root.tex new file mode 100644 --- /dev/null +++ b/thys/Frequency_Moments/document/root.tex @@ -0,0 +1,166 @@ +\documentclass[11pt,a4paper]{article} +\usepackage[T1]{fontenc} +\usepackage{isabelle,isabellesym} +\usepackage{amsmath} +\usepackage{amsthm} +\newcommand{\size}[1]{\lvert#1\rvert} +\newcommand{\var}{\mathrm{Var}} +\newcommand{\expectation}{\mathrm{E}} +\usepackage{amssymb} +\usepackage{pdfsetup} + +\urlstyle{rm} +\isabellestyle{it} + +\begin{document} + +\title{Formalization of Randomized Approximation Algorithms for Frequency Moments} +\author{Emin Karayel} +\maketitle +\begin{abstract} +In 1999 Alon et.\ al.\ introduced the still active research topic of approximating the frequency moments of a data stream using randomized algorithms with minimal space usage. +This includes the problem of estimating the cardinality of the stream elements---the zeroth frequency moment. +But, also higher-order frequency moments that provide information about the skew of the data stream. +(The $k$-th frequency moment of a data stream is the sum of the $k$-th powers of the occurrence counts of each element in the stream.) +This entry formalizes three randomized algorithms for the approximation of $F_0$, $F_2$ and $F_k$ for $k \geq 3$ based on \cite{alon1999,baryossef2002} and verifies their expected accuracy, success probability and space usage. +\end{abstract} + +\tableofcontents + +\parindent 0pt\parskip 0.5ex + +\input{session} +\appendix +\section{Informal proof of correctness for the $F_0$ algorithm\label{sec:f0_proof}} +This appendix contains a detailed informal proof for the new Rounding-KMV algorithm that approximates $F_0$ +introduced in Section~\ref{sec:f0} for reference. It follows the same reasoning as the formalized proof. + +Because of the amplification result about medians (see for example \cite[\textsection 2.1]{alon1999}) it is enough to show that each of the estimates the median is taken from is within the desired interval with success probability $\frac{2}{3}$. +To verify the latter, let $a_1, \ldots, a_m$ be the stream elements, where we assume that the elements are a subset of $\{0,\ldots,n-1\}$ and $0 < \delta < 1$ be the desired relative accuracy. +Let $p$ be the smallest prime such that $p \geq \max (n,19)$ and let $h$ be a random polynomial over $GF(p)$ with degree strictly less than $2$. +The algoritm also introduces the internal parameters $t, r$ defined by: +\begin{align*} + t & := \lceil 80\delta^{-2} \rceil & + r & := 4 \log_2 \lceil \delta^{-1} \rceil + 23 +\end{align*} +The estimate the algorithm obtains is $R$, defined using: +\begin{align*} + H := & \left\{ \lfloor h(a) \rfloor_r \middle \vert a \in A \right\} & + R := & \begin{cases} t p \left(\mathrm{min}_t (H) \right)^{-1} & \textrm{ if } \size{H} \geq t \\ + \size{H} & \textrm{ othewise,} \end{cases} & +\end{align*} +where $A := \left\{ a_1, \ldots, a_m \right\}$, $\mathrm{min}_t(H)$ denotes the $t$-th smallest element of $H$ and $\lfloor x \rfloor_r$ denotes the largest binary floating point number smaller or equal to $x$ with a mantissa that requires at most $r$ bits to represent.\footnote{This rounding operation is called \isa{truncate{\isacharunderscore}down} in Isabelle, it is defined in HOL\nobreakdash-Library.Float.} +With these definitions, it is possible to state the main theorem as: +\[ + P(\size{R - F_0} \leq \delta \size{F_0}) \geq \frac{2}{3} \textrm{.} +\] +which is shown separately in the following two subsections for the cases $F_0 \geq t$ and $F_0 < t$. +\subsection{Case $F_0 \geq t$} +Let us introduce: +\begin{align*} + H^* & := \left\{ h(a) \middle \vert a \in A \right\}^{\#} & + R^* & := tp \left( \mathrm{min}^{\#}_t(H^*) \right)^{-1} +\end{align*} +These definitions are modified versions of the definitions for $H$ and $R$: +The set $H^*$ is a multiset, this means that each element also has a multiplicity, counting the +number of \emph{distinct} elements of $A$ being mapped by $h$ to the same value. +Note that by definition: $\size{H^*}=\size{A}$. +Similarly the operation $\mathrm{min}^{\#}_t$ obtains the $t$-th element of the multiset $H$ (taking multiplicities into account). +Note also that there is no rounding operation $\lfloor \cdot \rfloor_r$ in the definition of $H^*$. +The key reason for the introduction of these alternative versions of $H, R$ is that it is easier to +show probabilistic bounds on the distances $\size{R^* - F_0}$ and $\size{R^* - R}$ as opposed to +$\size{R - F_0}$ directly. +In particular the plan is to show: +\begin{eqnarray} + P\left(\size{R^*-F_0} > \delta' F_0\right) & \leq & \frac{2}{9} \textrm{, and} \label{eq:r_star_dev} \\ + P\left(\size{R^*-F_0} \leq \delta' F_0 \wedge \size{R-R^*} > \frac{\delta}{4} F_0\right) & \leq & \frac{1}{9} \label{eq:r_star_r} +\end{eqnarray} +where $\delta' := \frac{3}{4} \delta$. +I.e. the probability that $R^*$ has not the relative accuracy of $\frac{3}{4}\delta$ is less that $\frac{2}{9}$ and the probability +that assuming $R^*$ has the relative accuracy of $\frac{3}{4}\delta$ but that $R$ deviates by more that $\frac{1}{4}\delta F_0$ is at most $\frac{1}{9}$. +Hence, the probability that neither of these events happen is at least $\frac{2}{3}$ but in that case: +\begin{equation} + \label{eq:concl} + \size{R-F_0} \leq \size{R - R^*} + \size{R^*-F_0} \leq \frac{\delta}{4} F_0 + \frac{3 \delta}{4} F_0 = \delta F_0 \textrm{.} +\end{equation} +Thus we only need to show \autoref{eq:r_star_dev} and~\ref{eq:r_star_r}. For the verification of \autoref{eq:r_star_dev} let +\[ + Q(u) = \size{\left\{h(a) < u \mid a \in A \right\}} +\] +and observe that $\mathrm{min}_t^{\#}(H^*) < u$ if $Q(u) \geq t$ and $\mathrm{min}_t^{\#}(H^*) \geq v$ if $Q(v) \leq t-1$. +To see why this is true note that, if at least $t$ elements of $A$ are mapped by $h$ below a certain value, then the $t$-smallest element must also be within them, and thus also be below that value. +And that the opposite direction of this conclusion is also true. +Note that this relies on the fact that $H^*$ is a multiset and that multiplicities are being taken into account, when computing the $t$-th smallest element. +Alternatively, it is also possible to write $Q(u) = \sum_{a \in A} 1_{\{h(a) < u\}}$\footnote{The notation $1_A$ is shorthand for the indicator function of $A$, i.e., $1_A(x)=1$ if $x \in A$ and $0$ otherwise.}, i.e., $Q$ is a sum of pairwise independent $\{0,1\}$-valued random variables, with expectation $\frac{u}{p}$ and variance $\frac{u}{p} - \frac{u^2}{p^2}$. +\footnote{A consequence of $h$ being chosen uniformly from a $2$-independent hash family.} +Using lineariy of expectation and Bienaym\'e's identity, it follows that $\var \, Q(u) \leq \expectation \, Q(u) = |A|u p^{-1} = F_0 u p^{-1}$ for $u \in \{0,\ldots,p\}$. + +\noindent For $v = \left\lfloor \frac{tp}{(1-\delta') F_0} \right\rfloor$ it is possible to conclude: +\begin{equation*} + t-1 \leq\footnotemark \frac{t}{(1-\delta')} - 3\sqrt{\frac{t}{(1-\delta')}} - 1 + \leq \frac{F_0 v}{p} - 3 \sqrt{\frac{F_0 v}{p}} \leq \expectation Q(v) - 3 \sqrt{\var Q(v)} +\end{equation*} +\footnotetext{The verification of this inequality is a lengthy but straightforward calculcation using the definition of $\delta'$ and $t$.} +and thus using Tchebyshev's inequality: +\begin{align} + P\left(R^* < \left(1-\delta'\right) F_0\right) & = P\left(\mathrm{rank}_t^{\#}(H^*) > \frac{tp}{(1-\delta')F_0}\right) \nonumber \\ + & \leq P(\mathrm{rank}_t^{\#}(H^*) \geq v) = P(Q(v) \leq t-1) \label{eq:r_star_upper_bound} \\ + & \leq P\left(Q(v) \leq \expectation Q(v) - 3 \sqrt{\var Q(v)}\right) \leq \frac{1}{9} \textrm{.} \nonumber +\end{align} +Similarly for $u = \left\lceil \frac{tp}{(1+\delta') F_0} \right\rceil$ it is possible to conclude: +\begin{equation*} + t \geq \frac{t}{(1+\delta')} + 3\sqrt{\frac{t}{(1+\delta')}+1} + 1 + \geq \frac{F_0 u}{p} + 3 \sqrt{\frac{F_0 u}{p}} \geq \expectation Q(u) + 3 \sqrt{\var Q(v)} +\end{equation*} +and thus using Tchebyshev's inequality: +\begin{align} + P\left(R^* > \left(1+\delta'\right) F_0\right) & = P\left(\mathrm{rank}_t^{\#}(H^*) < \frac{tp}{(1+\delta')F_0}\right) \nonumber \\ + & \leq P(\mathrm{rank}_t^{\#}(H^*) < u) = P(Q(u) \geq t) \label{eq:r_star_lower_bound} \\ + & \leq P\left(Q(u) \geq \expectation Q(u) + 3 \sqrt{\var Q(u)}\right) \leq \frac{1}{9} \textrm{.} \nonumber +\end{align} +Note that \autoref{eq:r_star_upper_bound} and~\ref{eq:r_star_lower_bound} confirm \autoref{eq:r_star_dev}. To verfiy \autoref{eq:r_star_r}, note that +\begin{equation} + \label{eq:rank_eq} + \mathrm{min}_t(H) = \lfloor \mathrm{min}_t^{\#}(H^*) \rfloor_r +\end{equation} +if there are no collisions, induced by the application of $\lfloor h(\cdot) \rfloor_r$ on the elements of $A$. +Even more carefully, note that the equation would remain true, as long as there are no collision within the smallest $t$ elements of $H^*$. +Because \autoref{eq:r_star_r} needs to be shown only in the case where $R^* \geq (1-\delta') F_0$, i.e., when $\mathrm{min}_t^{\#}(H^*) \leq v$, +it is enough to bound the probability of a collision in the range $[0; v]$. +Moreover \autoref{eq:rank_eq} implies $\size{\mathrm{min}_t(H) - \mathrm{min}_t^{\#}(H^*)} \leq \max(\mathrm{min}_t^{\#}(H^*), \mathrm{min}_t(H)) 2^{-r}$ from +which it is possible to derive $\size{R^*-R} \leq \frac{\delta}{4} F_0$. +Another important fact is that $h$ is injective with probability $1-\frac{1}{p}$, this is because $h$ is chosen uniformly from the polynomials of degree less than $2$. +If it is a degree $1$ polynomial it is a linear function on $GF(p)$ and thus injective. +Because $p \geq 18$ the probability that $h$ is not injective can be bounded by $1/18$. With these in mind, we can conclude: +\begin{eqnarray*} + & & P\left( \size{R^*-F_0} \leq \delta' F_0 \wedge \size{R-R^*} > \frac{\delta}{4} F_0 \right) \\ + & \leq & P\left( R^* \geq (1-\delta') F_0 \wedge \mathrm{min}_t^{\#}(H^*) \neq \mathrm{min}_t(H) \wedge h \textrm{ inj.}\right) + P(\neg h \textrm{ inj.}) \\ + & \leq & P\left( \exists a \neq b \in A. \lfloor h(a) \rfloor_r = \lfloor h(b) \rfloor_r \leq v \wedge h(a) \neq h(b) \right) + \frac{1}{18} \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} P\left(\lfloor h(a) \rfloor_r = \lfloor h(b) \rfloor_r \leq v \wedge h(a) \neq h(b) \right) \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} P\left(\size{h(a) - h(b)} \leq v 2^{-r} \wedge h(a) \leq v (1+2^{-r}) \wedge h(a) \neq h(b) \right) \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} \sum_{\substack{a', b' \in \{0,\ldots, p-1\} \wedge a' \neq b' \\ \size{a'-b'} \leq v 2^{-r} \wedge a' \leq v (1+2^{-r})}} P(h(a) = a') P(h(b)= b') \\ + & \leq & \frac{1}{18} + \frac{5 F_0^2 v^2}{2 p^2} 2^{-r} \leq \frac{1}{9} \textrm{.} +\end{eqnarray*} +which shows that \autoref{eq:r_star_r} is true.\subsection{Case $F_0 < t$} +Note that in this case $\size{H} \leq F_0 < t$ and thus $R = \size{H}$, hence the goal is to show that: +$P(\size{H} \neq F_0) \leq \frac{1}{3}$. +The latter can only happen, if there is a collision induced by the application of $\lfloor h(\cdot)\rfloor_r$. As before $h$ is not injective with probability at most $\frac{1}{18}$, hence: +\begin{eqnarray*} + & & P\left( \size{R - F_0} > \delta F_0\right) \leq P\left( R \neq F_0 \right) \\ + & \leq & \frac{1}{18} + P\left( R \neq F_0 \wedge h \textrm{ inj.} \right) \\ + & \leq & \frac{1}{18} + P\left( \exists a \neq b \in A. \lfloor h(a) \rfloor_r = \lfloor h(b) \rfloor_r \wedge h \textrm{ inj.} \right) \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} P\left(\lfloor h(a) \rfloor_r = \lfloor h(b) \rfloor_r \wedge h(a) \neq h(b) \right) \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} P\left(\size{h(a) - h(b)} \leq p 2^{-r} \wedge h(a) \neq h(b) \right) \\ + & \leq & \frac{1}{18} + \sum_{a \neq b \in A} \sum_{\substack{a', b' \in \{0,\ldots, p-1\} \\ a' \neq b' \wedge \size{a'-b'} \leq p 2^{-r}}} P(h(a) = a') P(h(b)= b') \\ + & \leq & \frac{1}{18} + F_0^2 2^{-r+1} \leq \frac{1}{18} + t^2 2^{-r+1} \leq \frac{1}{9} \textrm{.} +\end{eqnarray*} +Which concludes the proof. \qed + +\bibliographystyle{abbrv} +\bibliography{root} +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/thys/ROOTS b/thys/ROOTS --- a/thys/ROOTS +++ b/thys/ROOTS @@ -1,674 +1,675 @@ ADS_Functor AI_Planning_Languages_Semantics AODV AVL-Trees AWN Abortable_Linearizable_Modules Abs_Int_ITP2012 Abstract-Hoare-Logics Abstract-Rewriting Abstract_Completeness Abstract_Soundness Ackermanns_not_PR Actuarial_Mathematics Adaptive_State_Counting Affine_Arithmetic Aggregation_Algebras Akra_Bazzi Algebraic_Numbers Algebraic_VCs Allen_Calculus Amicable_Numbers Amortized_Complexity AnselmGod Applicative_Lifting Approximation_Algorithms Architectural_Design_Patterns Aristotles_Assertoric_Syllogistic Arith_Prog_Rel_Primes ArrowImpossibilityGS Attack_Trees Auto2_HOL Auto2_Imperative_HOL AutoFocus-Stream Automated_Stateful_Protocol_Verification Automatic_Refinement AxiomaticCategoryTheory BDD BD_Security_Compositional BNF_CC BNF_Operations BTree Banach_Steinhaus Belief_Revision Bell_Numbers_Spivey BenOr_Kozen_Reif Berlekamp_Zassenhaus Bernoulli Bertrands_Postulate Bicategory BinarySearchTree Binding_Syntax_Theory Binomial-Heaps Binomial-Queues BirdKMP Blue_Eyes Bondy Boolean_Expression_Checkers Bounded_Deducibility_Security Buchi_Complementation Budan_Fourier Buffons_Needle Buildings BytecodeLogicJmlTypes C2KA_DistributedSystems CAVA_Automata CAVA_LTL_Modelchecker CCS CISC-Kernel CRDT CSP_RefTK CYK CZH_Elementary_Categories CZH_Foundations CZH_Universal_Constructions CakeML CakeML_Codegen Call_Arity Card_Equiv_Relations Card_Multisets Card_Number_Partitions Card_Partitions Cartan_FP Case_Labeling Catalan_Numbers Category Category2 Category3 Cauchy Cayley_Hamilton Certification_Monads Chandy_Lamport Chord_Segments Circus Clean ClockSynchInst Closest_Pair_Points CoCon CoSMeDis CoSMed CofGroups Coinductive Coinductive_Languages Collections Combinatorics_Words Combinatorics_Words_Graph_Lemma Combinatorics_Words_Lyndon Comparison_Sort_Lower_Bound Compiling-Exceptions-Correctly Complete_Non_Orders Completeness Complex_Bounded_Operators Complex_Geometry Complx ComponentDependencies ConcurrentGC ConcurrentIMP Concurrent_Ref_Alg Concurrent_Revisions Conditional_Simplification Conditional_Transfer_Rule Consensus_Refined Constructive_Cryptography Constructive_Cryptography_CM Constructor_Funs Containers CoreC++ Core_DOM Core_SC_DOM Correctness_Algebras Cotangent_PFD_Formula Count_Complex_Roots CryptHOL CryptoBasedCompositionalProperties Cubic_Quartic_Equations DFS_Framework DOM_Components DPT-SAT-Solver DataRefinementIBP Datatype_Order_Generator Decl_Sem_Fun_PL Decreasing-Diagrams Decreasing-Diagrams-II Dedekind_Real Deep_Learning Delta_System_Lemma Density_Compiler Dependent_SIFUM_Refinement Dependent_SIFUM_Type_Systems Depth-First-Search Derangements Deriving Descartes_Sign_Rule Design_Theory Dict_Construction Differential_Dynamic_Logic Differential_Game_Logic Dijkstra_Shortest_Path Diophantine_Eqns_Lin_Hom Dirichlet_L Dirichlet_Series DiscretePricing Discrete_Summation DiskPaxos Dominance_CHK DynamicArchitectures Dynamic_Tables E_Transcendental Echelon_Form EdmondsKarp_Maxflow Efficient-Mergesort Elliptic_Curves_Group_Law Encodability_Process_Calculi Epistemic_Logic Equivalence_Relation_Enumeration Ergodic_Theory Error_Function Euler_MacLaurin Euler_Partition Eval_FO Example-Submission Extended_Finite_State_Machine_Inference Extended_Finite_State_Machines FFT FLP FOL-Fitting FOL_Axiomatic FOL_Harrison FOL_Seq_Calc1 FOL_Seq_Calc2 FOL_Seq_Calc3 Factor_Algebraic_Polynomial Factored_Transition_System_Bounding Falling_Factorial_Sum Farkas FeatherweightJava Featherweight_OCL Fermat3_4 FileRefinement FinFun Finger-Trees Finite-Map-Extras Finite_Automata_HF Finitely_Generated_Abelian_Groups First_Order_Terms First_Welfare_Theorem Fishburn_Impossibility Fisher_Yates Flow_Networks Floyd_Warshall Flyspeck-Tame FocusStreamsCaseStudies Forcing Formal_Puiseux_Series Formal_SSA Formula_Derivatives Foundation_of_geometry Fourier FO_Theory_Rewriting Free-Boolean-Algebra Free-Groups +Frequency_Moments Fresh_Identifiers FunWithFunctions FunWithTilings Functional-Automata Functional_Ordered_Resolution_Prover Furstenberg_Topology GPU_Kernel_PL Gabow_SCC GaleStewart_Games Gale_Shapley Game_Based_Crypto Gauss-Jordan-Elim-Fun Gauss_Jordan Gauss_Sums Gaussian_Integers GenClock General-Triangle Generalized_Counting_Sort Generic_Deriving Generic_Join GewirthPGCProof Girth_Chromatic GoedelGod Goedel_HFSet_Semantic Goedel_HFSet_Semanticless Goedel_Incompleteness Goodstein_Lambda GraphMarkingIBP Graph_Saturation Graph_Theory Green Groebner_Bases Groebner_Macaulay Gromov_Hyperbolicity Grothendieck_Schemes Group-Ring-Module HOL-CSP HOLCF-Prelude HRB-Slicing Hahn_Jordan_Decomposition Heard_Of Hello_World HereditarilyFinite Hermite Hermite_Lindemann Hidden_Markov_Models Higher_Order_Terms Hoare_Time Hood_Melville_Queue HotelKeyCards Huffman Hybrid_Logic Hybrid_Multi_Lane_Spatial_Logic Hybrid_Systems_VCs HyperCTL Hyperdual IEEE_Floating_Point IFC_Tracking IMAP-CRDT IMO2019 IMP2 IMP2_Binary_Heap IMP_Compiler IP_Addresses Imperative_Insertion_Sort Impossible_Geometry Incompleteness Incredible_Proof_Machine Independence_CH Inductive_Confidentiality Inductive_Inference InfPathElimination InformationFlowSlicing InformationFlowSlicing_Inter Integration Interpolation_Polynomials_HOL_Algebra Interpreter_Optimizations Interval_Arithmetic_Word32 Intro_Dest_Elim Iptables_Semantics Irrational_Series_Erdos_Straus Irrationality_J_Hancl Irrationals_From_THEBOOK IsaGeoCoq Isabelle_C Isabelle_Marries_Dirac Isabelle_Meta_Model Jacobson_Basic_Algebra Jinja JinjaDCI JinjaThreads JiveDataStoreModel Jordan_Hoelder Jordan_Normal_Form KAD KAT_and_DRA KBPs KD_Tree Key_Agreement_Strong_Adversaries Kleene_Algebra Knights_Tour Knot_Theory Knuth_Bendix_Order Knuth_Morris_Pratt Koenigsberg_Friendship Kruskal Kuratowski_Closure_Complement LLL_Basis_Reduction LLL_Factorization LOFT LTL LTL_Master_Theorem LTL_Normal_Form LTL_to_DRA LTL_to_GBA Lam-ml-Normalization LambdaAuth LambdaMu Lambda_Free_EPO Lambda_Free_KBOs Lambda_Free_RPOs Lambert_W Landau_Symbols Laplace_Transform Latin_Square LatticeProperties Launchbury Laws_of_Large_Numbers Lazy-Lists-II Lazy_Case Lehmer Lifting_Definition_Option Lifting_the_Exponent LightweightJava LinearQuantifierElim Linear_Inequalities Linear_Programming Linear_Recurrences Liouville_Numbers List-Index List-Infinite List_Interleaving List_Inversions List_Update LocalLexing Localization_Ring Locally-Nameless-Sigma Logging_Independent_Anonymity Lowe_Ontological_Argument Lower_Semicontinuous Lp LP_Duality Lucas_Theorem MDP-Algorithms MDP-Rewards MFMC_Countable MFODL_Monitor_Optimized MFOTL_Monitor MSO_Regex_Equivalence Markov_Models Marriage Mason_Stothers Matrices_for_ODEs Matrix Matrix_Tensor Matroids Max-Card-Matching Median_Method Median_Of_Medians_Selection Menger Mereology Mersenne_Primes Metalogic_ProofChecker MiniML MiniSail Minimal_SSA Minkowskis_Theorem Minsky_Machines Modal_Logics_for_NTS Modular_Assembly_Kit_Security Modular_arithmetic_LLL_and_HNF_algorithms Monad_Memo_DP Monad_Normalisation MonoBoolTranAlgebra MonoidalCategory Monomorphic_Monad MuchAdoAboutTwo Multi_Party_Computation Multirelations Myhill-Nerode Name_Carrying_Type_Inference Nash_Williams Nat-Interval-Logic Native_Word Nested_Multisets_Ordinals Network_Security_Policy_Verification Neumann_Morgenstern_Utility No_FTL_observers Nominal2 Noninterference_CSP Noninterference_Concurrent_Composition Noninterference_Generic_Unwinding Noninterference_Inductive_Unwinding Noninterference_Ipurge_Unwinding Noninterference_Sequential_Composition NormByEval Nullstellensatz Octonions OpSets Open_Induction Optics Optimal_BST Orbit_Stabiliser Order_Lattice_Props Ordered_Resolution_Prover Ordinal Ordinal_Partitions Ordinals_and_Cardinals Ordinary_Differential_Equations PAC_Checker PAL PCF PLM POPLmark-deBruijn PSemigroupsConvolution Padic_Ints Pairing_Heap Paraconsistency Parity_Game Partial_Function_MR Partial_Order_Reduction Password_Authentication_Protocol Pell Perfect-Number-Thm Perron_Frobenius Physical_Quantities Pi_Calculus Pi_Transcendental Planarity_Certificates Poincare_Bendixson Poincare_Disc Polynomial_Factorization Polynomial_Interpolation Polynomials Pop_Refinement Posix-Lexing Possibilistic_Noninterference Power_Sum_Polynomials Pratt_Certificate Prefix_Free_Code_Combinators Presburger-Automata Prim_Dijkstra_Simple Prime_Distribution_Elementary Prime_Harmonic_Series Prime_Number_Theorem Priority_Queue_Braun Priority_Search_Trees Probabilistic_Noninterference Probabilistic_Prime_Tests Probabilistic_System_Zoo Probabilistic_Timed_Automata Probabilistic_While Program-Conflict-Analysis Progress_Tracking Projective_Geometry Projective_Measurements Promela Proof_Strategy_Language PropResPI Propositional_Proof_Systems Prpu_Maxflow PseudoHoops Psi_Calculi Ptolemys_Theorem Public_Announcement_Logic QHLProver QR_Decomposition Quantales Quasi_Borel_Spaces Quaternions Quick_Sort_Cost RIPEMD-160-SPARK ROBDD RSAPSS Ramsey-Infinite Random_BSTs Random_Graph_Subgraph_Threshold Randomised_BSTs Randomised_Social_Choice Rank_Nullity_Theorem Real_Impl Real_Power Recursion-Addition Recursion-Theory-I Refine_Imperative_HOL Refine_Monadic RefinementReactive Regex_Equivalence Registers Regression_Test_Selection Regular-Sets Regular_Algebras Regular_Tree_Relations Relation_Algebra Relational-Incorrectness-Logic Relational_Disjoint_Set_Forests Relational_Forests Relational_Method Relational_Minimum_Spanning_Trees Relational_Paths Rep_Fin_Groups ResiduatedTransitionSystem Residuated_Lattices Resolution_FOL Rewriting_Z Ribbon_Proofs Robbins-Conjecture Robinson_Arithmetic Root_Balanced_Tree Roth_Arithmetic_Progressions Routing Roy_Floyd_Warshall SATSolverVerification SC_DOM_Components SDS_Impossibility SIFPL SIFUM_Type_Systems SPARCv8 Safe_Distance Safe_OCL Saturation_Framework Saturation_Framework_Extensions Schutz_Spacetime Secondary_Sylow Security_Protocol_Refinement Selection_Heap_Sort SenSocialChoice Separata Separation_Algebra Separation_Logic_Imperative_HOL SequentInvertibility Shadow_DOM Shadow_SC_DOM Shivers-CFA ShortestPath Show Sigma_Commit_Crypto Signature_Groebner Simpl Simple_Firewall Simplex Simplicial_complexes_and_boolean_functions SimplifiedOntologicalArgument Skew_Heap Skip_Lists Slicing Sliding_Window_Algorithm Smith_Normal_Form Smooth_Manifolds Sort_Encodings Source_Coding_Theorem SpecCheck Special_Function_Bounds Splay_Tree Sqrt_Babylonian Stable_Matching Statecharts Stateful_Protocol_Composition_and_Typing Stellar_Quorums Stern_Brocot Stewart_Apollonius Stirling_Formula Stochastic_Matrices Stone_Algebras Stone_Kleene_Relation_Algebras Stone_Relation_Algebras Store_Buffer_Reduction Stream-Fusion Stream_Fusion_Code Strong_Security Sturm_Sequences Sturm_Tarski Stuttering_Equivalence Subresultants Subset_Boolean_Algebras SumSquares Sunflowers SuperCalc Surprise_Paradox Symmetric_Polynomials Syntax_Independent_Logic Szemeredi_Regularity Szpilrajn TESL_Language TLA Tail_Recursive_Functions Tarskis_Geometry Taylor_Models Three_Circles Timed_Automata Topological_Semantics Topology TortoiseHare Transcendence_Series_Hancl_Rucki Transformer_Semantics Transition_Systems_and_Automata Transitive-Closure Transitive-Closure-II Transitive_Models Treaps Tree-Automata Tree_Decomposition Triangle Trie Twelvefold_Way Tycon Types_Tableaus_and_Goedels_God Types_To_Sets_Extension UPF UPF_Firewall UTP Universal_Hash_Families Universal_Turing_Machine UpDown_Scheme Valuation Van_Emde_Boas_Trees Van_der_Waerden VectorSpace VeriComp Verified-Prover Verified_SAT_Based_AI_Planning VerifyThis2018 VerifyThis2019 Vickrey_Clarke_Groves Virtual_Substitution VolpanoSmith VYDRA_MDL WHATandWHERE_Security WOOT_Strong_Eventual_Consistency WebAssembly Weight_Balanced_Trees Weighted_Path_Order Well_Quasi_Orders Wetzels_Problem Winding_Number_Eval Word_Lib WorkerWrapper X86_Semantics XML Youngs_Inequality ZFC_in_HOL Zeta_3_Irrational Zeta_Function pGCL