diff --git a/thys/KD_Tree/Balanced.thy b/thys/KD_Tree/Balanced.thy --- a/thys/KD_Tree/Balanced.thy +++ b/thys/KD_Tree/Balanced.thy @@ -1,597 +1,597 @@ (* File: Balanced.thy Author: Martin Rau, TU München *) section "Building a balanced \k\-d Tree from a List of Points" theory Balanced imports KDTree Median_Of_Medians_Selection.Median_Of_Medians_Selection begin text \ Build a balanced \k\-d Tree by recursively partition the points into two lists. The partitioning criteria will be the median at a particular axis \ws\. The left list will contain all points \p\ with @{term "p!ws \ median"}. The right list will contain all points with median at axis @{term "ws \ p!a"}. The left and right list differ in length by one or none. The axis \ws\ will the widest spread axis. The axis which has the widest spread of point values. \ subsection "Widest Spread of a List of Points" definition spread_set :: "axis \ point set \ real" where "spread_set a ps = ( let as = (\p. p!a) ` ps in Max as - Min as )" definition spread :: "axis \ point list \ real" where "spread a ps = ( let as = map (\p. p!a) (tl ps) in fold max as (hd ps !a) - fold min as (hd ps!a) )" definition is_widest_spread :: "axis \ dimension \ point set \ bool" where "is_widest_spread a k ps = (\a' < k. spread_set a' ps \ spread_set a ps)" fun widest_spread_rec :: "axis \ point list \ axis * real" where "widest_spread_rec 0 ps = (0, spread 0 ps)" | "widest_spread_rec a ps = ( let (a', s') = widest_spread_rec (a - 1) ps in let s = spread a ps in if s \ s' then (a', s') else (a, s) )" definition widest_spread :: "point list \ axis" where "widest_spread ps = ( let (a, _) = widest_spread_rec (dim (hd ps) - 1) ps in a )" fun widest_spread_invar :: "dimension \ kdt \ bool" where "widest_spread_invar _ (Leaf _) \ True" | "widest_spread_invar k (Node a s l r) \ is_widest_spread a k (set_kdt l \ set_kdt r) \ widest_spread_invar k l \ widest_spread_invar k r" lemma spread_is_spread_set: "ps \ [] \ spread_set a (set ps) = spread a ps" using Max.set_eq_fold[of "hd ps !a" _] Min.set_eq_fold[of "hd ps !a"] apply (auto simp add: Let_def spread_def spread_set_def) by (metis (no_types, lifting) hd_Cons_tl list.simps(15) list.simps(9) set_map) lemma widest_spread_rec_is_spread: "(ws, s) = widest_spread_rec a ps \ s = spread ws ps" by (induction a) (auto simp add: Let_def split: prod.splits if_splits) lemma is_widest_spread_k_le_ws: "is_widest_spread ws k ps \ spread_set k ps \ spread_set ws ps \ is_widest_spread ws (k+1) ps" using is_widest_spread_def less_Suc_eq by auto lemma is_widest_spread_k_gt_ws: "is_widest_spread ws k ps \ \ (spread_set k ps \ spread_set ws ps) \ is_widest_spread k (k+1) ps" using is_widest_spread_def less_Suc_eq by auto lemma widest_spread_rec_le_a: "ps \ [] \ (ws, s) = widest_spread_rec a ps \ ws \ a" by (induction a arbitrary: ws s) (auto simp add: Let_def le_Suc_eq split: prod.splits if_splits) lemma widest_spread_rec_is_widest_spread: "ps \ [] \ (ws, s) = widest_spread_rec a ps \ is_widest_spread ws (a+1) (set ps)" proof (induction a arbitrary: ws s) case 0 thus ?case using is_widest_spread_def by simp next case (Suc a) then obtain ws' s' where *: "(ws', s') = widest_spread_rec a ps" by (metis surj_pair) hence "is_widest_spread ws' (Suc a) (set ps)" using Suc.IH Suc.prems(1) by simp then show ?case using Suc.prems * spread_is_spread_set widest_spread_rec_is_spread is_widest_spread_k_le_ws[of ws' "Suc a" "set ps"] is_widest_spread_k_gt_ws[of ws' "Suc a" "set ps"] by (auto simp add: Let_def split: prod.splits if_splits) qed lemma widest_spread_lt_k: "\p \ set ps. dim p = k \ 0 < k \ ps \ [] \ widest_spread ps < k" using widest_spread_def widest_spread_rec_le_a apply (auto split: prod.splits) by (metis Suc_le_lessD Suc_le_mono Suc_pred) lemma widest_spread_is_widest_spread: assumes "ps \ []" "\p \ set ps. dim p = k" shows "is_widest_spread (widest_spread ps) k (set ps)" proof (cases k) case 0 then show ?thesis using is_widest_spread_def by simp next case (Suc n) obtain ws s where *: "(ws, s) = widest_spread_rec (dim (hd ps) - 1) ps" using prod.collapse by blast moreover have "dim (hd ps) = k" using assms(1,2) by simp ultimately have "is_widest_spread ws (k - 1 + 1) (set ps)" using widest_spread_rec_is_widest_spread assms(1) by simp hence "is_widest_spread ws k (set ps)" using Suc by simp thus ?thesis using * widest_spread_def by (auto split: prod.split) qed subsection "Partitioning a List of Points by Median" definition axis_median :: "axis \ point list \ real" where "axis_median a ps = ( let n = (length ps - 1) div 2 in let ps' = map (\p. p!a) ps in fast_select n ps' )" lemma size_mset_map_P: "size {# y \# mset (map f xs). P y #} = size {# x \# mset xs. P (f x) #}" by (induction xs) auto lemma size_axis_median_length: assumes "0 < length ps" shows "size {# p \# mset ps. p!a < axis_median a ps #} \ (length ps - 1) div 2" (is "?thesis1") and "size {# p \# mset ps. axis_median a ps < p!a #} \ length ps div 2" (is "?thesis2") proof - let ?n = "(length ps - 1) div 2" let ?ps' = "map (\p. p!a) ps" let ?m = "fast_select ?n ?ps'" have "length ps = length ?ps'" by simp moreover have "?n < length ?ps'" using assms calculation by linarith ultimately have *: "median ?ps' = ?m" using median_def fast_select_correct by metis have "size {# a \# mset ?ps'. a < ?m #} \ (length ps - 1) div 2" using * size_less_than_median[of ?ps'] by simp hence "size {# p \# mset ps. p!a < ?m #} \ (length ps - 1) div 2" using size_mset_map_P[of "\a. a < ?m"] by metis thus ?thesis1 using axis_median_def by metis have "size {# a \# mset ?ps'. ?m < a #} \ length ps div 2" using * size_greater_than_median[of ?ps'] by simp hence "size {# p \# mset ps. ?m < p!a #} \ length ps div 2" using size_mset_map_P[of "\a. ?m < a"] by metis thus ?thesis2 using axis_median_def by metis qed subsubsection "Threeway Partitioning a List of Points" fun partition :: "axis \ real \ point list \ point list * point list * point list" where "partition _ _ [] = ([], [], [])" | "partition a m (p # ps) = ( let (lt, eq, gt) = partition a m ps in if p!a < m then (p # lt, eq, gt) else if p!a = m then (lt, p # eq, gt) else (lt, eq, p # gt) )" lemma partition_set: assumes "(lt, eq, gt) = partition a m ps" shows "set ps = set lt \ set eq \ set gt" using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) lemma partition_mset: assumes "(lt, eq, gt) = partition a m ps" shows "mset lt = {# p \# mset ps. p!a < m #}" and "mset eq = {# p \# mset ps. p!a = m #}" and "mset gt = {# p \# mset ps. m < p!a #}" using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) lemma partition_filter: assumes "(lt, eq, gt) = partition a m ps" shows "\p \ set lt. p!a < m" and "\p \ set eq. p!a = m" and "\p \ set gt. m < p!a" using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) lemma partition_length: assumes "(lt, eq, gt) = partition a m ps" shows "length ps = length lt + length eq + length gt" and "length lt = size {# p \# mset ps. p!a < m #}" and "length eq = size {# p \# mset ps. p!a = m #}" and "length gt = size {# p \# mset ps. m < p!a #}" using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) subsubsection "Partitioning a List of Points balanced by Median" definition partition_by_median :: "axis \ point list \ point list * real * point list" where "partition_by_median a ps = ( let m = axis_median a ps in let (lt, eq, gt) = partition a m ps in let rem = length ps div 2 - length lt in (lt @ take rem eq, m, drop rem eq @ gt) )" lemma set_take_drop: "set xs = set (take n xs) \ set (drop n xs)" by (metis append_take_drop_id set_append) lemma partition_by_median_set: assumes "(l, m, r) = partition_by_median a ps" shows "set ps = set l \ set r" using assms unfolding partition_by_median_def apply (simp add: Let_def split: prod.splits) using set_take_drop by (metis partition_set inf_sup_aci(6)) lemma partition_by_median_filter: assumes "(l, m, r) = partition_by_median a ps" shows "\p \ set l. p!a \ m" and "\p \ set r. m \ p!a" using assms unfolding partition_by_median_def by (auto simp add: Let_def le_less split: prod.splits dest!: in_set_takeD in_set_dropD) (metis partition_filter)+ -lemma partition_by_median_length_1: +lemma partition_by_median_length1: assumes "(l, m, r) = partition_by_median a ps" shows "length ps = length l + length r" using assms unfolding partition_by_median_def apply (simp add: Let_def min_def split: prod.splits) by (metis (no_types, lifting) add.assoc partition_length(1)) -lemma partition_by_median_length_2: +lemma partition_by_median_length2: assumes "(l, m, r) = partition_by_median a ps" "0 < length ps" shows "length r - length l \ 1" and "length l \ length r" proof - let ?m = "axis_median a ps" let ?leg = "partition a ?m ps" let ?lt = "fst ?leg" let ?eq = "fst (snd ?leg)" let ?gt = "snd (snd ?leg)" let ?rem = "length ps div 2 - length ?lt" let ?l = "?lt @ take ?rem ?eq" let ?r = "drop ?rem ?eq @ ?gt" have *: "(?lt, ?eq, ?gt) = partition a ?m ps" by simp have "length ?lt \ (length ps - 1) div 2" using assms * partition_length(2) size_axis_median_length(1) by presburger moreover have "length ?gt \ length ps div 2" using assms * partition_length(4) size_axis_median_length(2) by presburger moreover have "length ps = length ?lt + length ?eq + length ?gt" using * partition_length(1) by simp ultimately have L: "length ?l = length ps div 2" by simp have **: "(?l, ?m, ?r) = partition_by_median a ps" by (auto simp add: Let_def partition_by_median_def split: prod.splits) hence "length ps = length ?l + length ?r" - using partition_by_median_length_1 by blast + using partition_by_median_length1 by blast hence "length ?l \ length ?r" "length ?r - length ?l \ 1" using L by linarith+ thus "length l \ length r" "length r - length l \ 1" using ** by (metis Pair_inject assms(1))+ qed -lemma partition_by_median_length_3: +lemma partition_by_median_length3: assumes "(l, m, r) = partition_by_median a ps" "0 < length ps" shows "length l < length ps" proof (rule ccontr) assume *: "\ (length l < length ps)" have "length ps = length l + length r" - using partition_by_median_length_1 assms(1) by simp + using partition_by_median_length1 assms(1) by simp hence "length l = length ps" "length r = 0" using * by simp_all moreover have "length l \ length r" - using partition_by_median_length_2 assms by fastforce+ + using partition_by_median_length2 assms by fastforce+ ultimately show "False" using assms(2) by simp qed -lemma partition_by_median_length_4: +lemma partition_by_median_length4: assumes "(l, m, r) = partition_by_median a ps" "1 < length ps" shows "length r < length ps" proof (rule ccontr) assume *: "\ (length r < length ps)" have "length ps = length l + length r" - using partition_by_median_length_1 assms(1) by simp + using partition_by_median_length1 assms(1) by simp hence "length r = length ps" "length l = 0" using * by simp_all moreover have "length r - length l \ 1" - using partition_by_median_length_2 assms by fastforce+ + using partition_by_median_length2 assms by fastforce+ ultimately show "False" using assms(2) by simp qed -lemma partition_by_median_length_5: +lemma partition_by_median_length5: assumes "(l, m, r) = partition_by_median a ps" "1 < length ps" shows "0 < length l" and "0 < length r" - using assms partition_by_median_length_1 partition_by_median_length_4 apply simp - using assms partition_by_median_length_1 partition_by_median_length_3 by fastforce + using assms partition_by_median_length1 partition_by_median_length4 apply simp + using assms partition_by_median_length1 partition_by_median_length3 by fastforce lemmas partition_by_median_length = - partition_by_median_length_1 partition_by_median_length_2 - partition_by_median_length_3 partition_by_median_length_4 - partition_by_median_length_5 + partition_by_median_length1 partition_by_median_length2 + partition_by_median_length3 partition_by_median_length4 + partition_by_median_length5 subsection \Building the Tree\ function (sequential) build :: "dimension \ point list \ kdt" where "build _ [] = undefined" (* We never hit this case recursively. Only if the original input is really [].*) | "build k [p] = Leaf p" | "build k ps = ( let a = widest_spread ps in let (l, m, r) = partition_by_median a ps in Node a m (build k l) (build k r) )" by pat_completeness auto termination build using partition_by_median_length(4,5) apply (relation "measure (\( _, ps). length ps)") apply (auto) apply fastforce+ done text \ Setting up different build simps for length induct. \ -lemma build_simp_1: +lemma build_simp1: "ps = [p] \ build k ps = Leaf p" by simp -lemma build_simp_2: +lemma build_simp2: "ps = p\<^sub>0 # p\<^sub>1 # ps' \ a = widest_spread ps \ (l, m, r) = partition_by_median a ps \ build k ps = Node a m (build k l) (build k r)" using build.simps(3) by (auto simp add: Let_def split: prod.splits) lemma length_ps_gt_1: "1 < length ps \ \p\<^sub>0 p\<^sub>1 ps'. ps = p\<^sub>0 # p\<^sub>1 # ps'" by (induction ps) (auto simp add: neq_Nil_conv) -lemma build_simp_3: +lemma build_simp3: "1 < length ps \ a = widest_spread ps \ (l, m, r) = partition_by_median a ps \ build k ps = Node a m (build k l) (build k r)" - using build_simp_2 length_ps_gt_1 by fast + using build_simp2 length_ps_gt_1 by fast -lemmas build_simps[simp] = build_simp_1 build_simp_3 +lemmas build_simps[simp] = build_simp1 build_simp3 declare build.simps[simp del] text \ The main theorems. \ theorem build_set: assumes "0 < length ps" shows "set ps = set_kdt (build k ps)" using assms proof (induction ps rule: length_induct) case (1 ps) then show ?case proof (cases "length ps \ 1") case True then obtain p where "ps = [p]" using "1.prems" by (cases ps) auto thus ?thesis by simp next case False let ?a = "widest_spread ps" let ?lmr = "partition_by_median ?a ps" let ?l = "fst ?lmr" let ?m = "fst (snd ?lmr)" let ?r = "snd (snd ?lmr)" have "set ?l = set_kdt (build k ?l)" "set ?r = set_kdt (build k ?r)" using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+ moreover have "set ps = set ?l \ set ?r" using partition_by_median_set by (metis prod.collapse) moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" using False by simp ultimately show ?thesis by auto qed qed theorem build_invar: assumes "0 < length ps" "\p \ set ps. dim p = k" "distinct ps" "0 < k" shows "invar k (build k ps)" using assms proof (induction ps rule: length_induct) case (1 ps) then show ?case proof (cases "length ps \ 1") case True then obtain p where P: "ps = [p]" using "1.prems"(1) by (cases ps) auto hence "dim p = k" using "1.prems"(2) by simp thus ?thesis using P by simp next case False let ?a = "widest_spread ps" let ?lmr = "partition_by_median ?a ps" let ?l = "fst ?lmr" let ?m = "fst (snd ?lmr)" let ?r = "snd (snd ?lmr)" have 1: "length ps = length ?l + length ?r" using partition_by_median_length(1) by (metis prod.collapse)+ hence 2: "length ?l < length ps" "length ?r < length ps" using False partition_by_median_length(4,5) not_le_imp_less "1.prems" by (metis prod.collapse)+ hence 3: "0 < length ?l" "0 < length ?r" using 1 False partition_by_median_length(6,7) by simp_all moreover have 4: "set ps = set ?l \ set ?r" using partition_by_median_set by (metis prod.collapse) moreover have "distinct ?l" "distinct ?r" and 5: "set ?l \ set ?r = {}" using "1.prems"(3) 1 4 by (metis card_distinct distinct_append distinct_card length_append set_append)+ moreover have "\p \ set ?l .dim p = k" "\p \ set ?r .dim p = k" using "1.prems"(2) 4 5 by simp_all ultimately have "invar k (build k ?l)" "invar k (build k ?r)" using "1.IH" 2 by (simp_all add: "1.prems"(4)) moreover have "\p \ set ?l. p ! ?a \ ?m" "\p \ set ?r. ?m \ p ! ?a" using partition_by_median_filter by (metis prod.collapse)+ moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" using False by simp moreover have "?a < k" using "1.prems"(1,2,4) widest_spread_lt_k by auto ultimately show ?thesis using 3 5 build_set by auto qed qed theorem build_widest_spread: assumes "0 < length ps" "\p \ set ps. dim p = k" shows "widest_spread_invar k (build k ps)" using assms proof (induction ps rule: length_induct) case (1 ps) then show ?case proof (cases "length ps \ 1") case True then obtain p where "ps = [p]" using "1.prems" by (cases ps) auto thus ?thesis by simp next case False let ?a = "widest_spread ps" let ?lmr = "partition_by_median ?a ps" let ?l = "fst ?lmr" let ?m = "fst (snd ?lmr)" let ?r = "snd (snd ?lmr)" have 1: "length ps = length ?l + length ?r" using partition_by_median_length(1) by (metis prod.collapse)+ hence 2: "length ?l < length ps" "length ?r < length ps" using False partition_by_median_length(4,5) not_le_imp_less "1.prems" apply (auto simp add: not_le) apply (metis "1" not_add_less1 not_add_less2 prod.collapse)+ done hence 3: "0 < length ?l" "0 < length ?r" using 1 False partition_by_median_length(6,7) by simp_all moreover have 4: "set ps = set ?l \ set ?r" using partition_by_median_set by (metis prod.collapse) moreover have "\p \ set ?l .dim p = k" "\p \ set ?r .dim p = k" using "1.prems"(2) 4 by simp_all ultimately have "widest_spread_invar k (build k ?l)" "widest_spread_invar k (build k ?r)" using "1.IH" 2 by simp_all moreover have "is_widest_spread ?a k (set_kdt (build k ?l) \ set_kdt (build k ?r))" using widest_spread_is_widest_spread "1.prems" 3 4 build_set by fastforce moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" using False by simp ultimately show ?thesis by auto qed qed theorem build_size: assumes "0 < length ps" shows "size_kdt (build k ps) = length ps" using assms proof (induction ps rule: length_induct) case (1 ps) then show ?case proof (cases "length ps \ 1") case True then obtain p where "ps = [p]" using "1.prems" by (cases ps) auto thus ?thesis by simp next case False let ?a = "widest_spread ps" let ?lmr = "partition_by_median ?a ps" let ?l = "fst ?lmr" let ?m = "fst (snd ?lmr)" let ?r = "snd (snd ?lmr)" have "size_kdt (build k ?l) = length ?l" "size_kdt (build k ?r) = length ?r" using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+ moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" using False by simp ultimately show ?thesis using partition_by_median_length(1) by (metis prod.collapse size_kdt.simps(2)) qed qed theorem build_balanced: assumes "0 < length ps" shows "balanced (build k ps)" using assms proof (induction ps rule: length_induct) case (1 ps) show ?case proof (cases "length ps \ 1") case True then obtain p where "ps = [p]" using "1.prems" by (cases ps) auto thus ?thesis unfolding balanced_def by simp next case False let ?a = "widest_spread ps" let ?lmr = "partition_by_median ?a ps" let ?l = "fst ?lmr" let ?m = "fst (snd ?lmr)" let ?r = "snd (snd ?lmr)" have 0: "length ps = length ?l + length ?r" "length ?r - length ?l \ 1" "length ?l \ length ?r" using partition_by_median_length(1,2,3) "1.prems" by (metis prod.collapse)+ hence 1: "length ?l + 1 = length ?r \ length ?l = length ?r" by linarith moreover have 2: "length ?l < length ps" "length ?r < length ps" using False 0 by auto moreover have 3: "0 < length ?l" "0 < length ?r" using "1.prems" 0 1 2 by linarith+ ultimately have 4: "balanced (build k ?l)" "balanced (build k ?r)" using "1.IH" by simp_all have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" using False by simp moreover have "size_kdt (build k ?l) + 1 = size_kdt (build k ?r) \ size_kdt (build k ?l) = size_kdt (build k ?r)" using 1 3 build_size by simp ultimately show ?thesis using 4 balanced_Node_if_wbal2 by auto qed qed lemma complete_if_balanced_size_2powh: assumes "balanced kdt" "size_kdt kdt = 2 ^ h" shows "complete kdt" proof (rule ccontr) assume "\ complete kdt" hence "2 ^ (min_height kdt) < size_kdt kdt" "size_kdt kdt < 2 ^ height kdt" by (simp_all add: min_height_size_if_incomplete size_height_if_incomplete) hence "height kdt - min_height kdt > 1" using assms(2) by simp hence "\ balanced kdt" using balanced_def by simp thus "False" using assms(1) by simp qed theorem build_complete: assumes "length ps = 2 ^ h" shows "complete (build k ps)" using assms complete_if_balanced_size_2powh by (simp add: build_balanced build_size) corollary build_height: "length ps = 2 ^ h \ length ps = 2 ^ (height (build k ps))" using build_complete build_size complete_iff_size by auto end \ No newline at end of file diff --git a/thys/KD_Tree/KDTree.thy b/thys/KD_Tree/KDTree.thy --- a/thys/KD_Tree/KDTree.thy +++ b/thys/KD_Tree/KDTree.thy @@ -1,441 +1,441 @@ (* File: KDTree.thy Author: Martin Rau, TU München *) section "Definition of the \k\-d Tree" theory KDTree imports Complex_Main begin text \ A \k\-d tree is a space-partitioning data structure for organizing points in a \k\-dimensional space. In principle the \k\-d tree is a binary tree. The leafs hold the \k\-dimensional points and the nodes contain left and right subtrees as well as a discriminator \s\ at a particular axis \a\. Every node divides the space into two parts by splitting along a hyperplane. Consider a node \n\ with associated discriminator \s\ at axis \a\. All points in the left subtree must have a value at axis \a\ that is less than or equal to \s\ and all points in the right subtree must have a value at axis \a\ that is greater than or equal to \s\. Deviations from the papers: The chosen tree representation is taken from @{cite "DBLP:journals/toms/FriedmanBF77"} with one minor adjustment. Originally the leafs hold buckets of points of size \b\. This representation fixes the bucket size to \b = 1\, a single point per Leaf. This is only a minor adjustment since the paper proves that \b = 1\ is the optimal bucket size for minimizing the runtime of the nearest neighbor algorithm @{cite "DBLP:journals/toms/FriedmanBF77"}, only simplifies building the optimized \k\-d trees @{cite "DBLP:journals/toms/FriedmanBF77"} and has little influence on the search algorithm @{cite "DBLP:journals/cacm/Bentley75"}. \ type_synonym point = "real list" type_synonym axis = nat type_synonym dimension = nat type_synonym discriminator = real datatype kdt = Leaf point | Node axis discriminator kdt kdt subsection "Definition of the \k\-d Tree Invariant and Related Functions" definition dim :: "point \ nat" where "dim p = length p" declare dim_def[simp] fun set_kdt :: "kdt \ point set" where "set_kdt (Leaf p) = {p}" | "set_kdt (Node _ _ l r) = set_kdt l \ set_kdt r" fun invar :: "dimension \ kdt \ bool" where "invar k (Leaf p) \ dim p = k" | "invar k (Node a s l r) \ (\p \ set_kdt l. p!a \ s) \ (\p \ set_kdt r. s \ p!a) \ invar k l \ invar k r \ a < k \ set_kdt l \ set_kdt r = {}" fun size_kdt :: "kdt \ nat" where "size_kdt (Leaf _) = 1" | "size_kdt (Node _ _ l r) = size_kdt l + size_kdt r" fun height :: "kdt \ nat" where "height (Leaf _) = 0" | "height (Node _ _ l r) = max (height l) (height r) + 1" fun min_height :: "kdt \ nat" where "min_height (Leaf _) = 0" | "min_height (Node _ _ l r) = min (min_height l) (min_height r) + 1" definition balanced :: "kdt \ bool" where "balanced kdt \ height kdt - min_height kdt \ 1" fun complete :: "kdt \ bool" where "complete (Leaf _) = True" | "complete (Node _ _ l r) \ complete l \ complete r \ height l = height r" lemma invar_l: "invar k (Node a s l r) \ invar k l" by simp lemma invar_r: "invar k (Node a s l r) \ invar k r" by simp lemma invar_axis_lt_d: "invar k (Node a s l r) \ a < k" by simp lemma invar_dim: "invar k kdt \ \p \ set_kdt kdt. dim p = k" by (induction kdt) auto lemma invar_l_le_a: "invar k (Node a s l r) \ \p \ set_kdt l. p!a \ s" by simp lemma invar_r_ge_a: "invar k (Node a s l r) \ \p \ set_kdt r. s \ p!a" by simp lemma invar_distinct: "invar k kdt \ kdt = Node a s l r \ set_kdt l \ set_kdt r = {}" by fastforce lemma invar_set: "set_kdt (Node a s l r) = set_kdt l \ set_kdt r" by simp subsection "Lemmas adapted from \HOL-Library.Tree\ to \k\-d Tree" -lemma size_ge0[simp]: +lemma size_ge_0[simp]: "0 < size_kdt kdt" by (induction kdt) auto lemma eq_size_1[simp]: "size_kdt kdt = 1 \ (\p. kdt = Leaf p)" apply (induction kdt) apply (auto) - using size_ge0 nat_less_le apply blast+ + using size_ge_0 nat_less_le apply blast+ done lemma eq_1_size[simp]: "1 = size_kdt kdt \ (\p. kdt = Leaf p)" using eq_size_1 by metis lemma neq_Leaf_iff: "(\p. kdt = Leaf p) = (\a s l r. kdt = Node a s l r)" by (cases kdt) auto lemma eq_height_0[simp]: "height kdt = 0 \ (\p. kdt = Leaf p)" by (cases kdt) auto lemma eq_0_height[simp]: "0 = height kdt \ (\p. kdt = Leaf p)" by (cases kdt) auto lemma eq_min_height_0[simp]: "min_height kdt = 0 \ (\p. kdt = Leaf p)" by (cases kdt) auto lemma eq_0_min_height[simp]: "0 = min_height kdt \ (\p. kdt = Leaf p)" by (cases kdt) auto lemma size_height: "size_kdt kdt \ 2 ^ height kdt" proof(induction kdt) case (Node a s l r) show ?case proof (cases "height l \ height r") case True have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith also have "\ \ 2 ^ height r + 2 ^ height r" using True by simp also have "\ = 2 ^ height (Node a s l r)" using True by (auto simp: max_def mult_2) finally show ?thesis . next case False have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith also have "\ \ 2 ^ height l + 2 ^ height l" using False by simp finally show ?thesis using False by (auto simp: max_def mult_2) qed qed simp lemma min_height_le_height: "min_height kdt \ height kdt" by (induction kdt) auto lemma min_height_size: "2 ^ min_height kdt \ size_kdt kdt" proof(induction kdt) case (Node a s l r) have "(2::nat) ^ min_height (Node a s l r) \ 2 ^ min_height l + 2 ^ min_height r" by (simp add: min_def) also have "\ \ size_kdt (Node a s l r)" using Node.IH by simp finally show ?case . qed simp lemma complete_iff_height: "complete kdt \ (min_height kdt = height kdt)" apply (induction kdt) apply simp apply (simp add: min_def max_def) by (metis le_antisym le_trans min_height_le_height) lemma size_if_complete: "complete kdt \ size_kdt kdt = 2 ^ height kdt" by (induction kdt) auto lemma complete_if_size_height: "size_kdt kdt = 2 ^ height kdt \ complete kdt" proof (induction "height kdt" arbitrary: kdt) case 0 thus ?case by auto next case (Suc h) hence "\p. kdt = Leaf p" by auto then obtain a s l r where [simp]: "kdt = Node a s l r" using neq_Leaf_iff by auto have 1: "height l \ h" and 2: "height r \ h" using Suc(2) by(auto) have 3: "\ height l < h" proof assume 0: "height l < h" have "size_kdt kdt = size_kdt l + size_kdt r" by simp also have "\ \ 2 ^ height l + 2 ^ height r" using size_height[of l] size_height[of r] by arith also have " \ < 2 ^ h + 2 ^ height r" using 0 by (simp) also have " \ \ 2 ^ h + 2 ^ h" using 2 by (simp) also have "\ = 2 ^ (Suc h)" by (simp) also have "\ = size_kdt kdt" using Suc(2,3) by simp finally have "size_kdt kdt < size_kdt kdt" . thus False by (simp) qed have 4: "\ height r < h" proof assume 0: "height r < h" have "size_kdt kdt = size_kdt l + size_kdt r" by simp also have "\ \ 2 ^ height l + 2 ^ height r" using size_height[of l] size_height[of r] by arith also have " \ < 2 ^ height l + 2 ^ h" using 0 by (simp) also have " \ \ 2 ^ h + 2 ^ h" using 1 by (simp) also have "\ = 2 ^ (Suc h)" by (simp) also have "\ = size_kdt kdt" using Suc(2,3) by simp finally have "size_kdt kdt < size_kdt kdt" . thus False by (simp) qed from 1 2 3 4 have *: "height l = h" "height r = h" by linarith+ hence "size_kdt l = 2 ^ height l" "size_kdt r = 2 ^ height r" using Suc(3) size_height[of l] size_height[of r] by (auto) with * Suc(1) show ?case by simp qed lemma complete_if_size_min_height: "size_kdt kdt = 2 ^ min_height kdt \ complete kdt" proof (induct "min_height kdt" arbitrary: kdt) case 0 thus ?case by auto next case (Suc h) hence "\p. kdt = Leaf p" by auto then obtain a s l r where [simp]: "kdt = Node a s l r" using neq_Leaf_iff by auto have 1: "h \ min_height l" and 2: "h \ min_height r" using Suc(2) by (auto) have 3: "\ h < min_height l" proof assume 0: "h < min_height l" have "size_kdt kdt = size_kdt l + size_kdt r" by simp also note min_height_size[of l] also(xtrans) note min_height_size[of r] also(xtrans) have "(2::nat) ^ min_height l > 2 ^ h" using 0 by (simp add: diff_less_mono) also(xtrans) have "(2::nat) ^ min_height r \ 2 ^ h" using 2 by simp also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) also have "\ = size_kdt kdt" using Suc(2,3) by simp finally show False by (simp add: diff_le_mono) qed have 4: "\ h < min_height r" proof assume 0: "h < min_height r" have "size_kdt kdt = size_kdt l + size_kdt r" by simp also note min_height_size[of l] also(xtrans) note min_height_size[of r] also(xtrans) have "(2::nat) ^ min_height r > 2 ^ h" using 0 by (simp add: diff_less_mono) also(xtrans) have "(2::nat) ^ min_height l \ 2 ^ h" using 1 by simp also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) also have "\ = size_kdt kdt" using Suc(2,3) by simp finally show False by (simp add: diff_le_mono) qed from 1 2 3 4 have *: "min_height l = h" "min_height r = h" by linarith+ hence "size_kdt l = 2 ^ min_height l" "size_kdt r = 2 ^ min_height r" using Suc(3) min_height_size[of l] min_height_size[of r] by (auto) with * Suc(1) show ?case by (simp add: complete_iff_height) qed lemma complete_iff_size: "complete kdt \ size_kdt kdt = 2 ^ height kdt" using complete_if_size_height size_if_complete by blast lemma size_height_if_incomplete: "\ complete kdt \ size_kdt kdt < 2 ^ height kdt" by (meson antisym_conv complete_iff_size not_le size_height) lemma min_height_size_if_incomplete: "\ complete kdt \ 2 ^ min_height kdt < size_kdt kdt" by (metis complete_if_size_min_height le_less min_height_size) lemma balanced_subtreeL: "balanced (Node a s l r) \ balanced l" by (simp add: balanced_def) lemma balanced_subtreeR: "balanced (Node a s l r) \ balanced r" by (simp add: balanced_def) lemma balanced_optimal: assumes "balanced kdt" "size_kdt kdt \ size_kdt kdt'" shows "height kdt \ height kdt'" proof (cases "complete kdt") case True have "(2::nat) ^ height kdt \ 2 ^ height kdt'" proof - have "2 ^ height kdt = size_kdt kdt" using True by (simp add: complete_iff_height size_if_complete) also have "\ \ size_kdt kdt'" using assms(2) by simp also have "\ \ 2 ^ height kdt'" by (rule size_height) finally show ?thesis . qed thus ?thesis by (simp) next case False have "(2::nat) ^ min_height kdt < 2 ^ height kdt'" proof - have "(2::nat) ^ min_height kdt < size_kdt kdt" by(rule min_height_size_if_incomplete[OF False]) also have "\ \ size_kdt kdt'" using assms(2) by simp also have "\ \ 2 ^ height kdt'" by(rule size_height) finally have "(2::nat) ^ min_height kdt < (2::nat) ^ height kdt'" . thus ?thesis . qed hence *: "min_height kdt < height kdt'" by simp have "min_height kdt + 1 = height kdt" using min_height_le_height[of kdt] assms(1) False by (simp add: complete_iff_height balanced_def) with * show ?thesis by arith qed subsection "Lemmas adapted from \HOL-Library.Tree_Real\ to \k\-d Tree" lemma size_height_log: "log 2 (size_kdt kdt) \ height kdt" by (simp add: log2_of_power_le size_height) lemma min_height_size_log: "min_height kdt \ log 2 (size_kdt kdt)" by (simp add: le_log2_of_power min_height_size) lemma size_log_if_complete: "complete kdt \ height kdt = log 2 (size_kdt kdt)" by (simp add: size_if_complete) lemma min_height_size_log_if_incomplete: "\ complete kdt \ min_height kdt < log 2 (size_kdt kdt)" by (simp add: less_log2_of_power min_height_size_if_incomplete) lemma min_height_balanced: assumes "balanced kdt" shows "min_height kdt = nat(floor(log 2 (size_kdt kdt)))" proof cases assume *: "complete kdt" hence "size_kdt kdt = 2 ^ min_height kdt" by (simp add: complete_iff_height size_if_complete) from log2_of_power_eq[OF this] show ?thesis by linarith next assume *: "\ complete kdt" hence "height kdt = min_height kdt + 1" using assms min_height_le_height[of kdt] by(auto simp add: balanced_def complete_iff_height) hence "size_kdt kdt < 2 ^ (min_height kdt + 1)" by (metis * size_height_if_incomplete) hence "log 2 (size_kdt kdt) < min_height kdt + 1" - using log2_of_power_less size_ge0 by blast + using log2_of_power_less size_ge_0 by blast thus ?thesis using min_height_size_log[of kdt] by linarith qed lemma height_balanced: assumes "balanced kdt" shows "height kdt = nat(ceiling(log 2 (size_kdt kdt)))" proof cases assume *: "complete kdt" hence "size_kdt kdt = 2 ^ height kdt" by (simp add: size_if_complete) from log2_of_power_eq[OF this] show ?thesis by linarith next assume *: "\ complete kdt" hence **: "height kdt = min_height kdt + 1" using assms min_height_le_height[of kdt] by(auto simp add: balanced_def complete_iff_height) hence "size_kdt kdt \ 2 ^ (min_height kdt + 1)" by (metis size_height) - from log2_of_power_le[OF this size_ge0] min_height_size_log_if_incomplete[OF *] ** + from log2_of_power_le[OF this size_ge_0] min_height_size_log_if_incomplete[OF *] ** show ?thesis by linarith qed lemma balanced_Node_if_wbal1: assumes "balanced l" "balanced r" "size_kdt l = size_kdt r + 1" shows "balanced (Node a s l r)" proof - from assms(3) have [simp]: "size_kdt l = size_kdt r + 1" by simp have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" by(rule nat_mono[OF ceiling_mono]) simp hence 1: "height(Node a s l r) = nat \log 2 (1 + size_kdt r)\ + 1" using height_balanced[OF assms(1)] height_balanced[OF assms(2)] by (simp del: nat_ceiling_le_eq add: max_def) have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" by(rule nat_mono[OF floor_mono]) simp hence 2: "min_height(Node a s l r) = nat \log 2 (size_kdt r)\ + 1" using min_height_balanced[OF assms(1)] min_height_balanced[OF assms(2)] by (simp) have "size_kdt r \ 1" by (simp add: Suc_leI) then obtain i where i: "2 ^ i \ size_kdt r" "size_kdt r < 2 ^ (i + 1)" using ex_power_ivl1[of 2 "size_kdt r"] by auto hence i1: "2 ^ i < size_kdt r + 1" "size_kdt r + 1 \ 2 ^ (i + 1)" by auto from 1 2 floor_log_nat_eq_if[OF i] ceiling_log_nat_eq_if[OF i1] show ?thesis by(simp add:balanced_def) qed lemma balanced_sym: "balanced (Node a s l r) \ balanced (Node a' s' r l)" by (auto simp: balanced_def) lemma balanced_Node_if_wbal2: assumes "balanced l" "balanced r" "abs(int(size_kdt l) - int(size_kdt r)) \ 1" shows "balanced (Node a s l r)" proof - have "size_kdt l = size_kdt r \ (size_kdt l = size_kdt r + 1 \ size_kdt r = size_kdt l + 1)" (is "?A \ ?B") using assms(3) by linarith thus ?thesis proof assume "?A" thus ?thesis using assms(1,2) apply(simp add: balanced_def min_def max_def) by (metis assms(1,2) balanced_optimal le_antisym le_less) next assume "?B" thus ?thesis by (meson assms(1,2) balanced_sym balanced_Node_if_wbal1) qed qed end \ No newline at end of file