diff --git a/thys/KD_Tree/Balanced.thy b/thys/KD_Tree/Balanced.thy deleted file mode 100644 --- a/thys/KD_Tree/Balanced.thy +++ /dev/null @@ -1,597 +0,0 @@ -(* - File: Balanced.thy - Author: Martin Rau, TU München -*) - -section "Building a balanced \k\-d Tree from a List of Points" - -theory Balanced -imports - KDTree - Median_Of_Medians_Selection.Median_Of_Medians_Selection -begin - -text \ - Build a balanced \k\-d Tree by recursively partition the points into two lists. - The partitioning criteria will be the median at a particular axis \ws\. - The left list will contain all points \p\ with @{term "p!ws \ median"}. - The right list will contain all points with median at axis @{term "ws \ p!a"}. - The left and right list differ in length by one or none. - The axis \ws\ will the widest spread axis. The axis which has the widest spread of point values. -\ - - -subsection "Widest Spread of a List of Points" - -definition spread_set :: "axis \ point set \ real" where - "spread_set a ps = ( - let as = (\p. p!a) ` ps in - Max as - Min as - )" - -definition spread :: "axis \ point list \ real" where - "spread a ps = ( - let as = map (\p. p!a) (tl ps) in - fold max as (hd ps !a) - fold min as (hd ps!a) - )" - -definition is_widest_spread :: "axis \ dimension \ point set \ bool" where - "is_widest_spread a k ps = (\a' < k. spread_set a' ps \ spread_set a ps)" - -fun widest_spread_rec :: "axis \ point list \ axis * real" where - "widest_spread_rec 0 ps = (0, spread 0 ps)" -| "widest_spread_rec a ps = ( - let (a', s') = widest_spread_rec (a - 1) ps in - let s = spread a ps in - if s \ s' then (a', s') else (a, s) - )" - -definition widest_spread :: "point list \ axis" where - "widest_spread ps = ( - let (a, _) = widest_spread_rec (dim (hd ps) - 1) ps in - a - )" - -fun widest_spread_invar :: "dimension \ kdt \ bool" where - "widest_spread_invar _ (Leaf _) \ True" -| "widest_spread_invar k (Node a s l r) \ is_widest_spread a k (set_kdt l \ set_kdt r) \ widest_spread_invar k l \ widest_spread_invar k r" - -lemma spread_is_spread_set: - "ps \ [] \ spread_set a (set ps) = spread a ps" - using Max.set_eq_fold[of "hd ps !a" _] Min.set_eq_fold[of "hd ps !a"] - apply (auto simp add: Let_def spread_def spread_set_def) - by (metis (no_types, lifting) hd_Cons_tl list.simps(15) list.simps(9) set_map) - -lemma widest_spread_rec_is_spread: - "(ws, s) = widest_spread_rec a ps \ s = spread ws ps" - by (induction a) (auto simp add: Let_def split: prod.splits if_splits) - -lemma is_widest_spread_k_le_ws: - "is_widest_spread ws k ps \ spread_set k ps \ spread_set ws ps \ is_widest_spread ws (k+1) ps" - using is_widest_spread_def less_Suc_eq by auto - -lemma is_widest_spread_k_gt_ws: - "is_widest_spread ws k ps \ \ (spread_set k ps \ spread_set ws ps) \ is_widest_spread k (k+1) ps" - using is_widest_spread_def less_Suc_eq by auto - -lemma widest_spread_rec_le_a: - "ps \ [] \ (ws, s) = widest_spread_rec a ps \ ws \ a" - by (induction a arbitrary: ws s) (auto simp add: Let_def le_Suc_eq split: prod.splits if_splits) - -lemma widest_spread_rec_is_widest_spread: - "ps \ [] \ (ws, s) = widest_spread_rec a ps \ is_widest_spread ws (a+1) (set ps)" -proof (induction a arbitrary: ws s) - case 0 - thus ?case - using is_widest_spread_def by simp -next - case (Suc a) - then obtain ws' s' where *: "(ws', s') = widest_spread_rec a ps" - by (metis surj_pair) - hence "is_widest_spread ws' (Suc a) (set ps)" - using Suc.IH Suc.prems(1) by simp - then show ?case - using Suc.prems * spread_is_spread_set widest_spread_rec_is_spread - is_widest_spread_k_le_ws[of ws' "Suc a" "set ps"] is_widest_spread_k_gt_ws[of ws' "Suc a" "set ps"] - by (auto simp add: Let_def split: prod.splits if_splits) -qed - -lemma widest_spread_lt_k: - "\p \ set ps. dim p = k \ 0 < k \ ps \ [] \ widest_spread ps < k" - using widest_spread_def widest_spread_rec_le_a - apply (auto split: prod.splits) - by (metis Suc_le_lessD Suc_le_mono Suc_pred) - -lemma widest_spread_is_widest_spread: - assumes "ps \ []" "\p \ set ps. dim p = k" - shows "is_widest_spread (widest_spread ps) k (set ps)" -proof (cases k) - case 0 - then show ?thesis - using is_widest_spread_def by simp -next - case (Suc n) - obtain ws s where *: "(ws, s) = widest_spread_rec (dim (hd ps) - 1) ps" - using prod.collapse by blast - moreover have "dim (hd ps) = k" - using assms(1,2) by simp - ultimately have "is_widest_spread ws (k - 1 + 1) (set ps)" - using widest_spread_rec_is_widest_spread assms(1) by simp - hence "is_widest_spread ws k (set ps)" - using Suc by simp - thus ?thesis - using * widest_spread_def by (auto split: prod.split) -qed - - -subsection "Partitioning a List of Points by Median" - -definition axis_median :: "axis \ point list \ real" where - "axis_median a ps = ( - let n = (length ps - 1) div 2 in - let ps' = map (\p. p!a) ps in - fast_select n ps' - )" - -lemma size_mset_map_P: - "size {# y \# mset (map f xs). P y #} = size {# x \# mset xs. P (f x) #}" - by (induction xs) auto - -lemma size_axis_median_length: - assumes "0 < length ps" - shows "size {# p \# mset ps. p!a < axis_median a ps #} \ (length ps - 1) div 2" (is "?thesis1") - and "size {# p \# mset ps. axis_median a ps < p!a #} \ length ps div 2" (is "?thesis2") -proof - - let ?n = "(length ps - 1) div 2" - let ?ps' = "map (\p. p!a) ps" - let ?m = "fast_select ?n ?ps'" - - have "length ps = length ?ps'" - by simp - moreover have "?n < length ?ps'" - using assms calculation by linarith - ultimately have *: "median ?ps' = ?m" - using median_def fast_select_correct by metis - - have "size {# a \# mset ?ps'. a < ?m #} \ (length ps - 1) div 2" - using * size_less_than_median[of ?ps'] by simp - hence "size {# p \# mset ps. p!a < ?m #} \ (length ps - 1) div 2" - using size_mset_map_P[of "\a. a < ?m"] by metis - thus ?thesis1 - using axis_median_def by metis - - have "size {# a \# mset ?ps'. ?m < a #} \ length ps div 2" - using * size_greater_than_median[of ?ps'] by simp - hence "size {# p \# mset ps. ?m < p!a #} \ length ps div 2" - using size_mset_map_P[of "\a. ?m < a"] by metis - thus ?thesis2 - using axis_median_def by metis -qed - - -subsubsection "Threeway Partitioning a List of Points" - -fun partition :: "axis \ real \ point list \ point list * point list * point list" where - "partition _ _ [] = ([], [], [])" -| "partition a m (p # ps) = ( - let (lt, eq, gt) = partition a m ps in - if p!a < m then - (p # lt, eq, gt) - else if p!a = m then - (lt, p # eq, gt) - else - (lt, eq, p # gt) - )" - -lemma partition_set: - assumes "(lt, eq, gt) = partition a m ps" - shows "set ps = set lt \ set eq \ set gt" - using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) - -lemma partition_mset: - assumes "(lt, eq, gt) = partition a m ps" - shows "mset lt = {# p \# mset ps. p!a < m #}" - and "mset eq = {# p \# mset ps. p!a = m #}" - and "mset gt = {# p \# mset ps. m < p!a #}" - using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) - -lemma partition_filter: - assumes "(lt, eq, gt) = partition a m ps" - shows "\p \ set lt. p!a < m" - and "\p \ set eq. p!a = m" - and "\p \ set gt. m < p!a" - using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) - -lemma partition_length: - assumes "(lt, eq, gt) = partition a m ps" - shows "length ps = length lt + length eq + length gt" - and "length lt = size {# p \# mset ps. p!a < m #}" - and "length eq = size {# p \# mset ps. p!a = m #}" - and "length gt = size {# p \# mset ps. m < p!a #}" - using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits) - - -subsubsection "Partitioning a List of Points balanced by Median" - -definition partition_by_median :: "axis \ point list \ point list * real * point list" where - "partition_by_median a ps = ( - let m = axis_median a ps in - let (lt, eq, gt) = partition a m ps in - let rem = length ps div 2 - length lt in - (lt @ take rem eq, m, drop rem eq @ gt) - )" - -lemma set_take_drop: "set xs = set (take n xs) \ set (drop n xs)" - by (metis append_take_drop_id set_append) - -lemma partition_by_median_set: - assumes "(l, m, r) = partition_by_median a ps" - shows "set ps = set l \ set r" - using assms unfolding partition_by_median_def - apply (simp add: Let_def split: prod.splits) - using set_take_drop by (metis partition_set inf_sup_aci(6)) - -lemma partition_by_median_filter: - assumes "(l, m, r) = partition_by_median a ps" - shows "\p \ set l. p!a \ m" - and "\p \ set r. m \ p!a" - using assms unfolding partition_by_median_def - by (auto simp add: Let_def le_less split: prod.splits dest!: in_set_takeD in_set_dropD) - (metis partition_filter)+ - -lemma partition_by_median_length1: - assumes "(l, m, r) = partition_by_median a ps" - shows "length ps = length l + length r" - using assms unfolding partition_by_median_def - apply (simp add: Let_def min_def split: prod.splits) - by (metis (no_types, lifting) add.assoc partition_length(1)) - -lemma partition_by_median_length2: - assumes "(l, m, r) = partition_by_median a ps" "0 < length ps" - shows "length r - length l \ 1" - and "length l \ length r" -proof - - let ?m = "axis_median a ps" - let ?leg = "partition a ?m ps" - let ?lt = "fst ?leg" - let ?eq = "fst (snd ?leg)" - let ?gt = "snd (snd ?leg)" - let ?rem = "length ps div 2 - length ?lt" - let ?l = "?lt @ take ?rem ?eq" - let ?r = "drop ?rem ?eq @ ?gt" - - have *: "(?lt, ?eq, ?gt) = partition a ?m ps" - by simp - have "length ?lt \ (length ps - 1) div 2" - using assms * partition_length(2) size_axis_median_length(1) by presburger - moreover have "length ?gt \ length ps div 2" - using assms * partition_length(4) size_axis_median_length(2) by presburger - moreover have "length ps = length ?lt + length ?eq + length ?gt" - using * partition_length(1) by simp - ultimately have L: "length ?l = length ps div 2" - by simp - - have **: "(?l, ?m, ?r) = partition_by_median a ps" - by (auto simp add: Let_def partition_by_median_def split: prod.splits) - hence "length ps = length ?l + length ?r" - using partition_by_median_length1 by blast - hence "length ?l \ length ?r" "length ?r - length ?l \ 1" - using L by linarith+ - - thus "length l \ length r" "length r - length l \ 1" - using ** by (metis Pair_inject assms(1))+ -qed - -lemma partition_by_median_length3: - assumes "(l, m, r) = partition_by_median a ps" "0 < length ps" - shows "length l < length ps" -proof (rule ccontr) - assume *: "\ (length l < length ps)" - have "length ps = length l + length r" - using partition_by_median_length1 assms(1) by simp - hence "length l = length ps" "length r = 0" - using * by simp_all - moreover have "length l \ length r" - using partition_by_median_length2 assms by fastforce+ - ultimately show "False" using assms(2) by simp -qed - -lemma partition_by_median_length4: - assumes "(l, m, r) = partition_by_median a ps" "1 < length ps" - shows "length r < length ps" -proof (rule ccontr) - assume *: "\ (length r < length ps)" - have "length ps = length l + length r" - using partition_by_median_length1 assms(1) by simp - hence "length r = length ps" "length l = 0" - using * by simp_all - moreover have "length r - length l \ 1" - using partition_by_median_length2 assms by fastforce+ - ultimately show "False" using assms(2) by simp -qed - -lemma partition_by_median_length5: - assumes "(l, m, r) = partition_by_median a ps" "1 < length ps" - shows "0 < length l" - and "0 < length r" - using assms partition_by_median_length1 partition_by_median_length4 apply simp - using assms partition_by_median_length1 partition_by_median_length3 by fastforce - -lemmas partition_by_median_length = - partition_by_median_length1 partition_by_median_length2 - partition_by_median_length3 partition_by_median_length4 - partition_by_median_length5 - - -subsection \Building the Tree\ - -function (sequential) build :: "dimension \ point list \ kdt" where - "build _ [] = undefined" (* We never hit this case recursively. Only if the original input is really [].*) -| "build k [p] = Leaf p" -| "build k ps = ( - let a = widest_spread ps in - let (l, m, r) = partition_by_median a ps in - Node a m (build k l) (build k r) - )" - by pat_completeness auto -termination build - using partition_by_median_length(4,5) - apply (relation "measure (\( _, ps). length ps)") - apply (auto) - apply fastforce+ - done - - -text \ - Setting up different build simps for length induct. -\ - -lemma build_simp1: - "ps = [p] \ build k ps = Leaf p" - by simp - -lemma build_simp2: - "ps = p\<^sub>0 # p\<^sub>1 # ps' \ a = widest_spread ps \ (l, m, r) = partition_by_median a ps \ build k ps = Node a m (build k l) (build k r)" - using build.simps(3) by (auto simp add: Let_def split: prod.splits) - -lemma length_ps_gt_1: - "1 < length ps \ \p\<^sub>0 p\<^sub>1 ps'. ps = p\<^sub>0 # p\<^sub>1 # ps'" - by (induction ps) (auto simp add: neq_Nil_conv) - -lemma build_simp3: - "1 < length ps \ a = widest_spread ps \ (l, m, r) = partition_by_median a ps \ build k ps = Node a m (build k l) (build k r)" - using build_simp2 length_ps_gt_1 by fast - -lemmas build_simps[simp] = build_simp1 build_simp3 - -declare build.simps[simp del] - - -text \ - The main theorems. -\ - -theorem build_set: - assumes "0 < length ps" - shows "set ps = set_kdt (build k ps)" - using assms -proof (induction ps rule: length_induct) - case (1 ps) - then show ?case - proof (cases "length ps \ 1") - case True - then obtain p where "ps = [p]" - using "1.prems" by (cases ps) auto - thus ?thesis - by simp - next - case False - - let ?a = "widest_spread ps" - let ?lmr = "partition_by_median ?a ps" - let ?l = "fst ?lmr" - let ?m = "fst (snd ?lmr)" - let ?r = "snd (snd ?lmr)" - - have "set ?l = set_kdt (build k ?l)" "set ?r = set_kdt (build k ?r)" - using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+ - moreover have "set ps = set ?l \ set ?r" - using partition_by_median_set by (metis prod.collapse) - moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" - using False by simp - ultimately show ?thesis - by auto - qed -qed - -theorem build_invar: - assumes "0 < length ps" "\p \ set ps. dim p = k" "distinct ps" "0 < k" - shows "invar k (build k ps)" - using assms -proof (induction ps rule: length_induct) - case (1 ps) - then show ?case - proof (cases "length ps \ 1") - case True - then obtain p where P: "ps = [p]" - using "1.prems"(1) by (cases ps) auto - hence "dim p = k" - using "1.prems"(2) by simp - thus ?thesis - using P by simp - next - case False - - let ?a = "widest_spread ps" - let ?lmr = "partition_by_median ?a ps" - let ?l = "fst ?lmr" - let ?m = "fst (snd ?lmr)" - let ?r = "snd (snd ?lmr)" - - have 1: "length ps = length ?l + length ?r" - using partition_by_median_length(1) by (metis prod.collapse)+ - hence 2: "length ?l < length ps" "length ?r < length ps" - using False partition_by_median_length(4,5) not_le_imp_less "1.prems" by (metis prod.collapse)+ - hence 3: "0 < length ?l" "0 < length ?r" - using 1 False partition_by_median_length(6,7) by simp_all - moreover have 4: "set ps = set ?l \ set ?r" - using partition_by_median_set by (metis prod.collapse) - moreover have "distinct ?l" "distinct ?r" and 5: "set ?l \ set ?r = {}" - using "1.prems"(3) 1 4 by (metis card_distinct distinct_append distinct_card length_append set_append)+ - moreover have "\p \ set ?l .dim p = k" "\p \ set ?r .dim p = k" - using "1.prems"(2) 4 5 by simp_all - ultimately have "invar k (build k ?l)" "invar k (build k ?r)" - using "1.IH" 2 by (simp_all add: "1.prems"(4)) - moreover have "\p \ set ?l. p ! ?a \ ?m" "\p \ set ?r. ?m \ p ! ?a" - using partition_by_median_filter by (metis prod.collapse)+ - moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" - using False by simp - moreover have "?a < k" - using "1.prems"(1,2,4) widest_spread_lt_k by auto - ultimately show ?thesis - using 3 5 build_set by auto - qed -qed - -theorem build_widest_spread: - assumes "0 < length ps" "\p \ set ps. dim p = k" - shows "widest_spread_invar k (build k ps)" - using assms -proof (induction ps rule: length_induct) - case (1 ps) - then show ?case - proof (cases "length ps \ 1") - case True - then obtain p where "ps = [p]" - using "1.prems" by (cases ps) auto - thus ?thesis by simp - next - case False - - let ?a = "widest_spread ps" - let ?lmr = "partition_by_median ?a ps" - let ?l = "fst ?lmr" - let ?m = "fst (snd ?lmr)" - let ?r = "snd (snd ?lmr)" - - have 1: "length ps = length ?l + length ?r" - using partition_by_median_length(1) by (metis prod.collapse)+ - hence 2: "length ?l < length ps" "length ?r < length ps" - using False partition_by_median_length(4,5) not_le_imp_less "1.prems" - apply (auto simp add: not_le) - apply (metis "1" not_add_less1 not_add_less2 prod.collapse)+ - done - hence 3: "0 < length ?l" "0 < length ?r" - using 1 False partition_by_median_length(6,7) by simp_all - moreover have 4: "set ps = set ?l \ set ?r" - using partition_by_median_set by (metis prod.collapse) - moreover have "\p \ set ?l .dim p = k" "\p \ set ?r .dim p = k" - using "1.prems"(2) 4 by simp_all - ultimately have "widest_spread_invar k (build k ?l)" "widest_spread_invar k (build k ?r)" - using "1.IH" 2 by simp_all - moreover have "is_widest_spread ?a k (set_kdt (build k ?l) \ set_kdt (build k ?r))" - using widest_spread_is_widest_spread "1.prems" 3 4 build_set by fastforce - moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" - using False by simp - ultimately show ?thesis - by auto - qed -qed - -theorem build_size: - assumes "0 < length ps" - shows "size_kdt (build k ps) = length ps" - using assms -proof (induction ps rule: length_induct) - case (1 ps) - then show ?case - proof (cases "length ps \ 1") - case True - then obtain p where "ps = [p]" - using "1.prems" by (cases ps) auto - thus ?thesis by simp - next - case False - - let ?a = "widest_spread ps" - let ?lmr = "partition_by_median ?a ps" - let ?l = "fst ?lmr" - let ?m = "fst (snd ?lmr)" - let ?r = "snd (snd ?lmr)" - - have "size_kdt (build k ?l) = length ?l" "size_kdt (build k ?r) = length ?r" - using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+ - moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" - using False by simp - ultimately show ?thesis - using partition_by_median_length(1) by (metis prod.collapse size_kdt.simps(2)) - qed -qed - -theorem build_balanced: - assumes "0 < length ps" - shows "balanced (build k ps)" - using assms -proof (induction ps rule: length_induct) - case (1 ps) - show ?case - proof (cases "length ps \ 1") - case True - then obtain p where "ps = [p]" - using "1.prems" by (cases ps) auto - thus ?thesis - unfolding balanced_def by simp - next - case False - - let ?a = "widest_spread ps" - let ?lmr = "partition_by_median ?a ps" - let ?l = "fst ?lmr" - let ?m = "fst (snd ?lmr)" - let ?r = "snd (snd ?lmr)" - - have 0: "length ps = length ?l + length ?r" "length ?r - length ?l \ 1" "length ?l \ length ?r" - using partition_by_median_length(1,2,3) "1.prems" by (metis prod.collapse)+ - hence 1: "length ?l + 1 = length ?r \ length ?l = length ?r" - by linarith - moreover have 2: "length ?l < length ps" "length ?r < length ps" - using False 0 by auto - moreover have 3: "0 < length ?l" "0 < length ?r" - using "1.prems" 0 1 2 by linarith+ - ultimately have 4: "balanced (build k ?l)" "balanced (build k ?r)" - using "1.IH" by simp_all - have "build k ps = Node ?a ?m (build k ?l) (build k ?r)" - using False by simp - moreover have "size_kdt (build k ?l) + 1 = size_kdt (build k ?r) \ size_kdt (build k ?l) = size_kdt (build k ?r)" - using 1 3 build_size by simp - ultimately show ?thesis - using 4 balanced_Node_if_wbal2 by auto - qed -qed - -lemma complete_if_balanced_size_2powh: - assumes "balanced kdt" "size_kdt kdt = 2 ^ h" - shows "complete kdt" -proof (rule ccontr) - assume "\ complete kdt" - hence "2 ^ (min_height kdt) < size_kdt kdt" "size_kdt kdt < 2 ^ height kdt" - by (simp_all add: min_height_size_if_incomplete size_height_if_incomplete) - hence "height kdt - min_height kdt > 1" - using assms(2) by simp - hence "\ balanced kdt" - using balanced_def by simp - thus "False" - using assms(1) by simp -qed - -theorem build_complete: - assumes "length ps = 2 ^ h" - shows "complete (build k ps)" - using assms complete_if_balanced_size_2powh - by (simp add: build_balanced build_size) - -corollary build_height: - "length ps = 2 ^ h \ length ps = 2 ^ (height (build k ps))" - using build_complete build_size complete_iff_size by auto - -end \ No newline at end of file diff --git a/thys/KD_Tree/Build.thy b/thys/KD_Tree/Build.thy new file mode 100644 --- /dev/null +++ b/thys/KD_Tree/Build.thy @@ -0,0 +1,464 @@ +(* + File: Build.thy + Author: Martin Rau, TU München +*) + +section \Building a balanced \k\-d Tree from a List of Points\ + +theory Build +imports + KD_Tree + Median_Of_Medians_Selection.Median_Of_Medians_Selection +begin + +text \ + Build a balanced \k\-d Tree by recursively partition the points into two lists. + The partitioning criteria will be the median at a particular axis \k\. + The left list will contain all points \p\ with @{term "p$k \ median"}. + The right list will contain all points with median at axis @{term "median < p$k"}. + The left and right list differ in length by one or none. + The axis \k\ will the widest spread axis. +\ + +subsection "Auxiliary Lemmas" + +lemma length_filter_mset_sorted_nth: + assumes "distinct xs" "n < length xs" "sorted xs" + shows "{# x \# mset xs. x \ xs ! n #} = mset (take (n + 1) xs)" + using assms +proof (induction xs arbitrary: n rule: list.induct) + case (Cons x xs) + thus ?case + proof (cases n) + case 0 + thus ?thesis + using Cons.prems(1,3) filter_mset_is_empty_iff by fastforce + next + case (Suc n') + thus ?thesis + using Cons by simp + qed +qed auto + +lemma length_filter_sort_nth: + assumes "distinct xs" "n < length xs" + shows "length (filter (\x. x \ sort xs ! n) xs) = n + 1" +proof - + have "length (filter (\x. x \ sort xs ! n) xs) = length (filter (\x. x \ sort xs ! n) (sort xs))" + by (simp add: filter_sort) + also have "... = size (mset (filter (\x. x \ sort xs ! n) (sort xs)))" + using size_mset by metis + also have "... = size ({# x \# mset (sort xs). x \ sort xs ! n #})" + using mset_filter by simp + also have "... = size (mset (take (n + 1) (sort xs)))" + using length_filter_mset_sorted_nth assms sorted_sort distinct_sort length_sort by metis + finally show ?thesis + using assms(2) by auto +qed + + +subsection \Widest Spread Axis\ + +definition calc_spread :: "('k::finite) \ 'k point list \ real" where + "calc_spread k ps = (case ps of [] \ 0 | ps \ + let ks = map (\p. p$k) (tl ps) in + fold max ks ((hd ps)$k) - fold min ks ((hd ps)$k) + )" + +fun widest_spread :: "('k::finite) list \ 'k point list \ 'k \ real" where + "widest_spread [] _ = undefined" +| "widest_spread [k] ps = (k, calc_spread k ps)" +| "widest_spread (k # ks) ps = ( + let (k', s') = widest_spread ks ps in + let s = calc_spread k ps in + if s \ s' then (k', s') else (k, s) + )" + +lemma calc_spread_spec: + "calc_spread k ps = spread k (set ps)" + using Max.set_eq_fold[of "(hd ps)$k"] Min.set_eq_fold[of "(hd ps)$k"] + by (auto simp: Let_def spread_def calc_spread_def split: list.splits, metis set_map) + +lemma widest_spread_calc_spread: + "ks \ [] \ (k, s) = widest_spread ks ps \ s = calc_spread k ps" + by (induction ks ps rule: widest_spread.induct) (auto simp: Let_def split: prod.splits if_splits) + +lemma widest_spread_axis_Un: + shows "widest_spread_axis k K P \ spread k' P \ spread k P \ widest_spread_axis k (K \ { k' }) P" + and "widest_spread_axis k K P \ spread k P \ spread k' P \ widest_spread_axis k' (K \ { k' }) P" + unfolding widest_spread_axis_def by auto + +lemma widest_spread_spec: + "(k, s) = widest_spread ks ps \ widest_spread_axis k (set ks) (set ps)" +proof (induction ks ps arbitrary: k s rule: widest_spread.induct) + case (3 k\<^sub>0 k\<^sub>1 ks ps) + obtain K' S' where K'_def: "(K', S') = widest_spread (k\<^sub>1 # ks) ps" + by (metis surj_pair) + hence IH: "widest_spread_axis K' (set (k\<^sub>1 # ks)) (set ps)" + using "3.IH" by blast + hence 0: "S' = spread K' (set ps)" + using K'_def widest_spread_calc_spread calc_spread_spec by blast + define S where "S = calc_spread k\<^sub>0 ps" + hence 1: "S = spread k\<^sub>0 (set ps)" + using calc_spread_spec by blast + show ?case + proof (cases "S \ S'") + case True + hence "widest_spread_axis K' (set (k\<^sub>0 # k\<^sub>1 # ks)) (set ps)" + using 0 1 widest_spread_axis_Un(1)[OF IH, of k\<^sub>0] by auto + thus ?thesis + using True K'_def S_def "3.prems" by (auto split: prod.splits) + next + case False + hence "widest_spread_axis k\<^sub>0 (set (k\<^sub>0 # k\<^sub>1 # ks)) (set ps)" + using 0 1 widest_spread_axis_Un(2)[OF IH, of k\<^sub>0] "3.prems"(1) by auto + thus ?thesis + using False K'_def S_def "3.prems" by (auto split: prod.splits) + qed +qed (auto simp: widest_spread_axis_def) + + +subsection \Fast Axis Median\ + +definition axis_median :: "('k::finite) \ 'k point list \ real" where + "axis_median k ps = (let n = (length ps - 1) div 2 in fast_select n (map (\p. p$k) ps))" + +lemma length_filter_le_axis_median: + assumes "0 < length ps" "\k. distinct (map (\p. p$k) ps)" + shows "length (filter (\p. p$k \ axis_median k ps) ps) = (length ps - 1) div 2 + 1" +proof - + let ?n = "(length ps - 1) div 2" + let ?ps = "map (\p. p$k) ps" + let ?m = "fast_select ?n ?ps" + have 0: "?n < length ?ps" + using assms(1) by (auto, linarith) + have 1: "distinct ?ps" + using assms(2) by blast + have "?m = select ?n ?ps" + using fast_select_correct[OF 0] by blast + hence "length (filter (\p. p$k \ axis_median k ps) ps) = + length (filter (\p. p$k \ sort ?ps ! ?n) ps)" + unfolding axis_median_def by (auto simp add: Let_def select_def simp del: fast_select.simps) + also have "... = length (filter (\v. v \ sort ?ps ! ?n) ?ps)" + by (induction ps) (auto, metis comp_apply) + also have "... = ?n + 1" + using length_filter_sort_nth[OF 1 0] by blast + finally show ?thesis . +qed + +definition partition_by_median :: "('k::finite) \ 'k point list \ 'k point list \ real \ 'k point list" where + "partition_by_median k ps = ( + let m = axis_median k ps in + let (l, r) = partition (\p. p$k \ m) ps in + (l, m, r) + )" + +lemma set_partition_by_median: + "(l, m, r) = partition_by_median k ps \ set ps = set l \ set r" + unfolding partition_by_median_def by (auto simp: Let_def) + +lemma filter_partition_by_median: + assumes "(l, m, r) = partition_by_median k ps" + shows "\p \ set l. p$k \ m" + and "\p \ set r. \p$k \ m" + using assms unfolding partition_by_median_def by (auto simp: Let_def) + +lemma sum_length_partition_by_median: + assumes "(l, m, r) = partition_by_median k ps" + shows "length ps = length l + length r" + using assms sum_length_filter_compl[of "(\p. p $ k \ axis_median k ps)"] + unfolding partition_by_median_def by (simp add: Let_def o_def) + +lemma length_l_partition_by_median: + assumes "0 < length ps" "\k. distinct (map (\p. p$k) ps)" "(l, m, r) = partition_by_median k ps" + shows "length l = (length ps - 1) div 2 + 1" + using assms unfolding partition_by_median_def by (auto simp: Let_def length_filter_le_axis_median) + +corollary lengths_partition_by_median_1: + assumes "0 < length ps" "\k. distinct (map (\p. p$k) ps)" "(l, m, r) = partition_by_median k ps" + shows "length l - length r \ 1" + and "length r \ length l" + and "0 < length l" + and "length r < length ps" + using length_l_partition_by_median[OF assms] sum_length_partition_by_median[OF assms(3)] by auto + +corollary lengths_partition_by_median_2: + assumes "1 < length ps" "\k. distinct (map (\p. p$k) ps)" "(l, m, r) = partition_by_median k ps" + shows "0 < length r" + and "length l < length ps" +proof - + have *: "0 < length ps" + using assms(1) by auto + show "0 < length r" "length l < length ps" + using length_l_partition_by_median[OF * assms(2,3)] sum_length_partition_by_median[OF assms(3)] + using assms(1) by linarith+ +qed + +lemmas length_partition_by_median = + sum_length_partition_by_median length_l_partition_by_median + lengths_partition_by_median_1 lengths_partition_by_median_2 + + +subsection \Building the Tree\ + +function (domintros, sequential) build :: "('k::finite) list \ 'k point list \ 'k kdt" where + "build _ [] = undefined" +| "build _ [p] = Leaf p" +| "build ks ps = ( + let (k, _) = widest_spread ks ps in + let (l, m, r) = partition_by_median k ps in + Node k m (build ks l) (build ks r) + )" + by pat_completeness auto + +lemma build_domintros3: + assumes "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)" + assumes "build_dom (ks, l)" "build_dom (ks, r)" + shows "build_dom (ks, x # y # zs)" +proof - + { + fix k s l m r + assume "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)" + hence "build_dom (ks, l)" "build_dom (ks, r)" + using assms by (metis Pair_inject)+ + } + thus ?thesis + by (simp add: build.domintros(3)) +qed + +lemma build_termination: + assumes "\k. distinct (map (\p. p$k) ps)" + shows "build_dom (ks, ps)" + using assms +proof (induction ps rule: length_induct) + case (1 xs) + consider (A) "xs = []" | (B) "\x. xs = [x]" | (C) "\x y zs. xs = x # y # zs" + by (induction xs rule: induct_list012) auto + then show ?case + proof cases + case C + then obtain x y zs where xyzs_def: "xs = x # y # zs" + by blast + obtain k s where ks_def: "(k, s) = widest_spread ks xs" + by (metis surj_pair) + obtain l m r where lmr_def: "(l, m, r) = partition_by_median k xs" + by (metis prod_cases3) + note defs = xyzs_def ks_def lmr_def + have "\k. distinct (map (\p. p $ k) l)" "\k. distinct (map (\p. p $ k) r)" + using lmr_def unfolding partition_by_median_def + by (auto simp: Let_def "1.prems" distinct_map_filter) + moreover have "length l < length xs" "length r < length xs" + using length_partition_by_median(8)[OF _ "1.prems"] length_partition_by_median(6)[OF _ "1.prems"] + using defs by auto + ultimately have "build_dom (ks, l)" "build_dom (ks, r)" + using "1.IH" by blast+ + thus ?thesis + using build_domintros3 defs by blast + qed (auto intro: build.domintros) +qed + +lemma build_psimp_1: + "ps = [p] \ build k ps = Leaf p" + by (simp add: build.domintros(2) build.psimps(2)) + +lemma build_psimp_2: + assumes "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)" + assumes "build_dom (ks, l)" "build_dom (ks, r)" + shows "build ks (x # y # zs) = Node k m (build ks l) (build ks r)" +proof - + have 0: "build_dom (ks, x # y # zs)" + using assms build_domintros3 by blast + thus ?thesis + using build.psimps(3)[OF 0] assms(1,2) by (auto split: prod.splits) +qed + +lemma length_xs_gt_1: + "1 < length xs \ \x y ys. xs = x # y # ys" + by (cases xs, auto simp: neq_Nil_conv) + +lemma build_psimp_3: + assumes "1 < length ps" "(k, s) = widest_spread ks ps" "(l, m, r) = partition_by_median k ps" + assumes "build_dom (ks, l)" "build_dom (ks, r)" + shows "build ks ps = Node k m (build ks l) (build ks r)" + using build_psimp_2 length_xs_gt_1 assms by blast + +lemmas build_psimps[simp] = build_psimp_1 build_psimp_3 + + +subsection \Main Theorems\ + +theorem set_build: + "0 < length ps \ \k. distinct (map (\p. p$k) ps) \ set ps = set_kdt (build ks ps)" +proof (induction ps rule: length_induct) + case (1 ps) + show ?case + proof (cases "1 < length ps") + case True + obtain k s where ks_def: "(k, s) = widest_spread ks ps" + by (metis surj_pair) + obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps" + by (metis prod_cases3) + have D: "\k. distinct (map (\p. p$k) l)" "\k. distinct (map (\p. p$k) r)" + using lmr_def unfolding partition_by_median_def + by (auto simp: "1.prems"(2) Let_def distinct_map_filter) + moreover have "length l < length ps" "0 < length l" + "length r < length ps" "0 < length r" + using length_partition_by_median(8)[OF True "1.prems"(2)] + length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(7)[OF True "1.prems"(2)] + lmr_def by blast+ + ultimately have "set l = set_kdt (build ks l)" "set r = set_kdt (build ks r)" + using "1.IH" by blast+ + moreover have "set ps = set l \ set r" + using lmr_def unfolding partition_by_median_def by (auto simp: Let_def) + moreover have "build ks ps = Node k m (build ks l) (build ks r)" + using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast + ultimately show ?thesis + by simp + next + case False + thus ?thesis + using "1.prems" by (cases ps) auto + qed +qed + +theorem invar_build: + "0 < length ps \ \k. distinct (map (\p. p$k) ps) \ set ks = UNIV \ invar (build ks ps)" +proof (induction ps rule: length_induct) + case (1 ps) + show ?case + proof (cases "1 < length ps") + case True + obtain k s where ks_def: "(k, s) = widest_spread ks ps" + by (metis surj_pair) + obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps" + by (metis prod_cases3) + have D: "\k. distinct (map (\p. p$k) l)" "\k. distinct (map (\p. p$k) r)" + using lmr_def unfolding partition_by_median_def + by (auto simp: "1.prems"(2) Let_def distinct_map_filter) + moreover have "length l < length ps" "0 < length l" + "length r < length ps" "0 < length r" + using length_partition_by_median(8)[OF True "1.prems"(2)] + length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(7)[OF True "1.prems"(2)] + lmr_def by blast+ + ultimately have "invar (build ks l)" "invar (build ks r)" + using "1.IH" "1.prems"(3) by blast+ + moreover have "\p \ set l. p$k \ m" "\p \ set r. m < p$k" + using filter_partition_by_median(1)[OF lmr_def] + filter_partition_by_median(2)[OF lmr_def] by auto + moreover have "widest_spread_axis k UNIV (set l \ set r)" + using widest_spread_spec[OF ks_def] "1.prems"(3) set_partition_by_median[OF lmr_def] by simp + moreover have "build ks ps = Node k m (build ks l) (build ks r)" + using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast + ultimately show ?thesis + using set_build[OF \0 < length l\ D(1)] set_build[OF \0 < length r\ D(2)] by simp + next + case False + thus ?thesis + using "1.prems" by (cases ps) auto + qed +qed + +theorem size_build: + "0 < length ps \ \k. distinct (map (\p. p$k) ps) \ size_kdt (build ks ps) = length ps" +proof (induction ps rule: length_induct) + case (1 ps) + show ?case + proof (cases "1 < length ps") + case True + obtain k s where ks_def: "(k, s) = widest_spread ks ps" + by (metis surj_pair) + obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps" + by (metis prod_cases3) + have D: "\k. distinct (map (\p. p$k) l)" "\k. distinct (map (\p. p$k) r)" + using lmr_def unfolding partition_by_median_def + by (auto simp: "1.prems"(2) Let_def distinct_map_filter) + moreover have "length l < length ps" "0 < length l" + "length r < length ps" "0 < length r" + using length_partition_by_median(8)[OF True "1.prems"(2)] + length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(7)[OF True "1.prems"(2)] + lmr_def by blast+ + ultimately have "size_kdt (build ks l) = length l" "size_kdt (build ks r) = length r" + using "1.IH" by blast+ + moreover have "build ks ps = Node k m (build ks l) (build ks r)" + using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast + ultimately show ?thesis + using length_partition_by_median(1)[OF lmr_def] by simp + next + case False + thus ?thesis + using "1.prems" by (cases ps) auto + qed +qed + +theorem balanced_build: + "0 < length ps \ \k. distinct (map (\p. p$k) ps) \ balanced (build ks ps)" +proof (induction ps rule: length_induct) + case (1 ps) + show ?case + proof (cases "1 < length ps") + case True + obtain k s where ks_def: "(k, s) = widest_spread ks ps" + by (metis surj_pair) + obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps" + by (metis prod_cases3) + have D: "\k. distinct (map (\p. p$k) l)" "\k. distinct (map (\p. p$k) r)" + using lmr_def unfolding partition_by_median_def + by (auto simp: "1.prems"(2) Let_def distinct_map_filter) + moreover have "length l < length ps" "0 < length l" + "length r < length ps" "0 < length r" + using length_partition_by_median(8)[OF True "1.prems"(2)] + length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)] + length_partition_by_median(7)[OF True "1.prems"(2)] + lmr_def by blast+ + ultimately have IH: "balanced (build ks l)" "balanced (build ks r)" + using "1.IH" by blast+ + have "build ks ps = Node k m (build ks l) (build ks r)" + using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast + moreover have "length r + 1 = length l \ length r = length l" + using length_partition_by_median(1)[OF lmr_def] + length_partition_by_median(3)[OF "1.prems"(1) "1.prems"(2) lmr_def] + length_partition_by_median(4)[OF "1.prems"(1) "1.prems"(2) lmr_def] + by linarith + ultimately show ?thesis + using balanced_Node_if_wbal1[OF IH] balanced_Node_if_wbal2[OF IH] + size_build[OF \0 < length l\ D(1)] size_build[OF \0 < length r\ D(2)] + by auto + next + case False + thus ?thesis + using "1.prems" by (cases ps) (auto simp: balanced_def) + qed +qed + +lemma complete_if_balanced_size_2powh: + assumes "balanced kdt" "size_kdt kdt = 2 ^ h" + shows "complete kdt" +proof (rule ccontr) + assume "\ complete kdt" + hence "2 ^ (min_height kdt) < size_kdt kdt" "size_kdt kdt < 2 ^ height kdt" + by (simp_all add: min_height_size_if_incomplete size_height_if_incomplete) + hence "height kdt - min_height kdt > 1" + using assms(2) by simp + hence "\ balanced kdt" + using balanced_def by force + thus "False" + using assms(1) by simp +qed + +theorem complete_build: + "length ps = 2 ^ h \ \k. distinct (map (\p. p$k) ps) \ complete (build k ps)" + by (simp add: balanced_build complete_if_balanced_size_2powh size_build) + +corollary height_build: + assumes "length ps = 2 ^ h" "\k. distinct (map (\p. p$k) ps)" + shows "h = height (build k ps)" + using complete_build[OF assms] size_build[OF _ assms(2)] by (simp add: assms(1) complete_iff_size) + +end diff --git a/thys/KD_Tree/KDTree.thy b/thys/KD_Tree/KDTree.thy deleted file mode 100644 --- a/thys/KD_Tree/KDTree.thy +++ /dev/null @@ -1,441 +0,0 @@ -(* - File: KDTree.thy - Author: Martin Rau, TU München -*) - -section "Definition of the \k\-d Tree" - -theory KDTree -imports - Complex_Main -begin - - -text \ - A \k\-d tree is a space-partitioning data structure for organizing points in a \k\-dimensional space. - In principle the \k\-d tree is a binary tree. The leafs hold the \k\-dimensional points and the nodes - contain left and right subtrees as well as a discriminator \s\ at a particular axis \a\. - Every node divides the space into two parts by splitting along a hyperplane. - Consider a node \n\ with associated discriminator \s\ at axis \a\. - All points in the left subtree must have a value at axis \a\ that is less than or - equal to \s\ and all points in the right subtree must have a value at axis \a\ that is - greater than or equal to \s\. - - Deviations from the papers: - - The chosen tree representation is taken from @{cite "DBLP:journals/toms/FriedmanBF77"} with one minor - adjustment. Originally the leafs hold buckets of points of size \b\. This representation fixes the - bucket size to \b = 1\, a single point per Leaf. This is only a minor adjustment since the paper - proves that \b = 1\ is the optimal bucket size for minimizing the runtime of the nearest neighbor - algorithm @{cite "DBLP:journals/toms/FriedmanBF77"}, only simplifies building the optimized - \k\-d trees @{cite "DBLP:journals/toms/FriedmanBF77"} and has little influence on the - search algorithm @{cite "DBLP:journals/cacm/Bentley75"}. -\ - -type_synonym point = "real list" -type_synonym axis = nat -type_synonym dimension = nat -type_synonym discriminator = real - -datatype kdt = - Leaf point -| Node axis discriminator kdt kdt - - -subsection "Definition of the \k\-d Tree Invariant and Related Functions" - -definition dim :: "point \ nat" where - "dim p = length p" - -declare dim_def[simp] - -fun set_kdt :: "kdt \ point set" where - "set_kdt (Leaf p) = {p}" -| "set_kdt (Node _ _ l r) = set_kdt l \ set_kdt r" - -fun invar :: "dimension \ kdt \ bool" where - "invar k (Leaf p) \ dim p = k" -| "invar k (Node a s l r) \ (\p \ set_kdt l. p!a \ s) \ (\p \ set_kdt r. s \ p!a) \ - invar k l \ invar k r \ a < k \ set_kdt l \ set_kdt r = {}" - -fun size_kdt :: "kdt \ nat" where - "size_kdt (Leaf _) = 1" -| "size_kdt (Node _ _ l r) = size_kdt l + size_kdt r" - -fun height :: "kdt \ nat" where - "height (Leaf _) = 0" -| "height (Node _ _ l r) = max (height l) (height r) + 1" - -fun min_height :: "kdt \ nat" where - "min_height (Leaf _) = 0" -| "min_height (Node _ _ l r) = min (min_height l) (min_height r) + 1" - -definition balanced :: "kdt \ bool" where - "balanced kdt \ height kdt - min_height kdt \ 1" - -fun complete :: "kdt \ bool" where - "complete (Leaf _) = True" -| "complete (Node _ _ l r) \ complete l \ complete r \ height l = height r" - - -lemma invar_l: - "invar k (Node a s l r) \ invar k l" - by simp - -lemma invar_r: - "invar k (Node a s l r) \ invar k r" - by simp - -lemma invar_axis_lt_d: - "invar k (Node a s l r) \ a < k" - by simp - -lemma invar_dim: - "invar k kdt \ \p \ set_kdt kdt. dim p = k" - by (induction kdt) auto - -lemma invar_l_le_a: - "invar k (Node a s l r) \ \p \ set_kdt l. p!a \ s" - by simp - -lemma invar_r_ge_a: - "invar k (Node a s l r) \ \p \ set_kdt r. s \ p!a" - by simp - -lemma invar_distinct: - "invar k kdt \ kdt = Node a s l r \ set_kdt l \ set_kdt r = {}" - by fastforce - -lemma invar_set: - "set_kdt (Node a s l r) = set_kdt l \ set_kdt r" - by simp - - -subsection "Lemmas adapted from \HOL-Library.Tree\ to \k\-d Tree" - -lemma size_ge_0[simp]: - "0 < size_kdt kdt" - by (induction kdt) auto - -lemma eq_size_1[simp]: - "size_kdt kdt = 1 \ (\p. kdt = Leaf p)" - apply (induction kdt) - apply (auto) - using size_ge_0 nat_less_le apply blast+ - done - -lemma eq_1_size[simp]: - "1 = size_kdt kdt \ (\p. kdt = Leaf p)" - using eq_size_1 by metis - -lemma neq_Leaf_iff: - "(\p. kdt = Leaf p) = (\a s l r. kdt = Node a s l r)" - by (cases kdt) auto - -lemma eq_height_0[simp]: - "height kdt = 0 \ (\p. kdt = Leaf p)" - by (cases kdt) auto - -lemma eq_0_height[simp]: - "0 = height kdt \ (\p. kdt = Leaf p)" - by (cases kdt) auto - -lemma eq_min_height_0[simp]: - "min_height kdt = 0 \ (\p. kdt = Leaf p)" - by (cases kdt) auto - -lemma eq_0_min_height[simp]: - "0 = min_height kdt \ (\p. kdt = Leaf p)" - by (cases kdt) auto - -lemma size_height: - "size_kdt kdt \ 2 ^ height kdt" -proof(induction kdt) - case (Node a s l r) - show ?case - proof (cases "height l \ height r") - case True - have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp - also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith - also have "\ \ 2 ^ height r + 2 ^ height r" using True by simp - also have "\ = 2 ^ height (Node a s l r)" - using True by (auto simp: max_def mult_2) - finally show ?thesis . - next - case False - have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp - also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith - also have "\ \ 2 ^ height l + 2 ^ height l" using False by simp - finally show ?thesis using False by (auto simp: max_def mult_2) - qed -qed simp - -lemma min_height_le_height: - "min_height kdt \ height kdt" - by (induction kdt) auto - -lemma min_height_size: - "2 ^ min_height kdt \ size_kdt kdt" -proof(induction kdt) - case (Node a s l r) - have "(2::nat) ^ min_height (Node a s l r) \ 2 ^ min_height l + 2 ^ min_height r" - by (simp add: min_def) - also have "\ \ size_kdt (Node a s l r)" using Node.IH by simp - finally show ?case . -qed simp - -lemma complete_iff_height: - "complete kdt \ (min_height kdt = height kdt)" - apply (induction kdt) - apply simp - apply (simp add: min_def max_def) - by (metis le_antisym le_trans min_height_le_height) - -lemma size_if_complete: - "complete kdt \ size_kdt kdt = 2 ^ height kdt" - by (induction kdt) auto - -lemma complete_if_size_height: - "size_kdt kdt = 2 ^ height kdt \ complete kdt" -proof (induction "height kdt" arbitrary: kdt) - case 0 thus ?case by auto -next - case (Suc h) - hence "\p. kdt = Leaf p" - by auto - then obtain a s l r where [simp]: "kdt = Node a s l r" - using neq_Leaf_iff by auto - have 1: "height l \ h" and 2: "height r \ h" using Suc(2) by(auto) - have 3: "\ height l < h" - proof - assume 0: "height l < h" - have "size_kdt kdt = size_kdt l + size_kdt r" by simp - also have "\ \ 2 ^ height l + 2 ^ height r" - using size_height[of l] size_height[of r] by arith - also have " \ < 2 ^ h + 2 ^ height r" using 0 by (simp) - also have " \ \ 2 ^ h + 2 ^ h" using 2 by (simp) - also have "\ = 2 ^ (Suc h)" by (simp) - also have "\ = size_kdt kdt" using Suc(2,3) by simp - finally have "size_kdt kdt < size_kdt kdt" . - thus False by (simp) - qed - have 4: "\ height r < h" - proof - assume 0: "height r < h" - have "size_kdt kdt = size_kdt l + size_kdt r" by simp - also have "\ \ 2 ^ height l + 2 ^ height r" - using size_height[of l] size_height[of r] by arith - also have " \ < 2 ^ height l + 2 ^ h" using 0 by (simp) - also have " \ \ 2 ^ h + 2 ^ h" using 1 by (simp) - also have "\ = 2 ^ (Suc h)" by (simp) - also have "\ = size_kdt kdt" using Suc(2,3) by simp - finally have "size_kdt kdt < size_kdt kdt" . - thus False by (simp) - qed - from 1 2 3 4 have *: "height l = h" "height r = h" by linarith+ - hence "size_kdt l = 2 ^ height l" "size_kdt r = 2 ^ height r" - using Suc(3) size_height[of l] size_height[of r] by (auto) - with * Suc(1) show ?case by simp -qed - -lemma complete_if_size_min_height: - "size_kdt kdt = 2 ^ min_height kdt \ complete kdt" -proof (induct "min_height kdt" arbitrary: kdt) - case 0 thus ?case by auto -next - case (Suc h) - hence "\p. kdt = Leaf p" - by auto - then obtain a s l r where [simp]: "kdt = Node a s l r" - using neq_Leaf_iff by auto - have 1: "h \ min_height l" and 2: "h \ min_height r" using Suc(2) by (auto) - have 3: "\ h < min_height l" - proof - assume 0: "h < min_height l" - have "size_kdt kdt = size_kdt l + size_kdt r" by simp - also note min_height_size[of l] - also(xtrans) note min_height_size[of r] - also(xtrans) have "(2::nat) ^ min_height l > 2 ^ h" - using 0 by (simp add: diff_less_mono) - also(xtrans) have "(2::nat) ^ min_height r \ 2 ^ h" using 2 by simp - also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) - also have "\ = size_kdt kdt" using Suc(2,3) by simp - finally show False by (simp add: diff_le_mono) - qed - have 4: "\ h < min_height r" - proof - assume 0: "h < min_height r" - have "size_kdt kdt = size_kdt l + size_kdt r" by simp - also note min_height_size[of l] - also(xtrans) note min_height_size[of r] - also(xtrans) have "(2::nat) ^ min_height r > 2 ^ h" - using 0 by (simp add: diff_less_mono) - also(xtrans) have "(2::nat) ^ min_height l \ 2 ^ h" using 1 by simp - also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) - also have "\ = size_kdt kdt" using Suc(2,3) by simp - finally show False by (simp add: diff_le_mono) - qed - from 1 2 3 4 have *: "min_height l = h" "min_height r = h" by linarith+ - hence "size_kdt l = 2 ^ min_height l" "size_kdt r = 2 ^ min_height r" - using Suc(3) min_height_size[of l] min_height_size[of r] by (auto) - with * Suc(1) show ?case - by (simp add: complete_iff_height) -qed - -lemma complete_iff_size: - "complete kdt \ size_kdt kdt = 2 ^ height kdt" - using complete_if_size_height size_if_complete by blast - -lemma size_height_if_incomplete: - "\ complete kdt \ size_kdt kdt < 2 ^ height kdt" - by (meson antisym_conv complete_iff_size not_le size_height) - -lemma min_height_size_if_incomplete: - "\ complete kdt \ 2 ^ min_height kdt < size_kdt kdt" - by (metis complete_if_size_min_height le_less min_height_size) - -lemma balanced_subtreeL: - "balanced (Node a s l r) \ balanced l" - by (simp add: balanced_def) - -lemma balanced_subtreeR: - "balanced (Node a s l r) \ balanced r" - by (simp add: balanced_def) - -lemma balanced_optimal: - assumes "balanced kdt" "size_kdt kdt \ size_kdt kdt'" - shows "height kdt \ height kdt'" -proof (cases "complete kdt") - case True - have "(2::nat) ^ height kdt \ 2 ^ height kdt'" - proof - - have "2 ^ height kdt = size_kdt kdt" - using True by (simp add: complete_iff_height size_if_complete) - also have "\ \ size_kdt kdt'" using assms(2) by simp - also have "\ \ 2 ^ height kdt'" by (rule size_height) - finally show ?thesis . - qed - thus ?thesis by (simp) -next - case False - have "(2::nat) ^ min_height kdt < 2 ^ height kdt'" - proof - - have "(2::nat) ^ min_height kdt < size_kdt kdt" - by(rule min_height_size_if_incomplete[OF False]) - also have "\ \ size_kdt kdt'" using assms(2) by simp - also have "\ \ 2 ^ height kdt'" by(rule size_height) - finally have "(2::nat) ^ min_height kdt < (2::nat) ^ height kdt'" . - thus ?thesis . - qed - hence *: "min_height kdt < height kdt'" by simp - have "min_height kdt + 1 = height kdt" - using min_height_le_height[of kdt] assms(1) False - by (simp add: complete_iff_height balanced_def) - with * show ?thesis by arith -qed - - -subsection "Lemmas adapted from \HOL-Library.Tree_Real\ to \k\-d Tree" - -lemma size_height_log: - "log 2 (size_kdt kdt) \ height kdt" - by (simp add: log2_of_power_le size_height) - -lemma min_height_size_log: - "min_height kdt \ log 2 (size_kdt kdt)" - by (simp add: le_log2_of_power min_height_size) - -lemma size_log_if_complete: - "complete kdt \ height kdt = log 2 (size_kdt kdt)" - by (simp add: size_if_complete) - -lemma min_height_size_log_if_incomplete: - "\ complete kdt \ min_height kdt < log 2 (size_kdt kdt)" - by (simp add: less_log2_of_power min_height_size_if_incomplete) - -lemma min_height_balanced: - assumes "balanced kdt" - shows "min_height kdt = nat(floor(log 2 (size_kdt kdt)))" -proof cases - assume *: "complete kdt" - hence "size_kdt kdt = 2 ^ min_height kdt" - by (simp add: complete_iff_height size_if_complete) - from log2_of_power_eq[OF this] show ?thesis by linarith -next - assume *: "\ complete kdt" - hence "height kdt = min_height kdt + 1" - using assms min_height_le_height[of kdt] - by(auto simp add: balanced_def complete_iff_height) - hence "size_kdt kdt < 2 ^ (min_height kdt + 1)" - by (metis * size_height_if_incomplete) - hence "log 2 (size_kdt kdt) < min_height kdt + 1" - using log2_of_power_less size_ge_0 by blast - thus ?thesis using min_height_size_log[of kdt] by linarith -qed - -lemma height_balanced: - assumes "balanced kdt" - shows "height kdt = nat(ceiling(log 2 (size_kdt kdt)))" -proof cases - assume *: "complete kdt" - hence "size_kdt kdt = 2 ^ height kdt" - by (simp add: size_if_complete) - from log2_of_power_eq[OF this] show ?thesis - by linarith -next - assume *: "\ complete kdt" - hence **: "height kdt = min_height kdt + 1" - using assms min_height_le_height[of kdt] - by(auto simp add: balanced_def complete_iff_height) - hence "size_kdt kdt \ 2 ^ (min_height kdt + 1)" by (metis size_height) - from log2_of_power_le[OF this size_ge_0] min_height_size_log_if_incomplete[OF *] ** - show ?thesis by linarith -qed - -lemma balanced_Node_if_wbal1: - assumes "balanced l" "balanced r" "size_kdt l = size_kdt r + 1" - shows "balanced (Node a s l r)" -proof - - from assms(3) have [simp]: "size_kdt l = size_kdt r + 1" by simp - have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" - by(rule nat_mono[OF ceiling_mono]) simp - hence 1: "height(Node a s l r) = nat \log 2 (1 + size_kdt r)\ + 1" - using height_balanced[OF assms(1)] height_balanced[OF assms(2)] - by (simp del: nat_ceiling_le_eq add: max_def) - have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" - by(rule nat_mono[OF floor_mono]) simp - hence 2: "min_height(Node a s l r) = nat \log 2 (size_kdt r)\ + 1" - using min_height_balanced[OF assms(1)] min_height_balanced[OF assms(2)] - by (simp) - have "size_kdt r \ 1" by (simp add: Suc_leI) - then obtain i where i: "2 ^ i \ size_kdt r" "size_kdt r < 2 ^ (i + 1)" - using ex_power_ivl1[of 2 "size_kdt r"] by auto - hence i1: "2 ^ i < size_kdt r + 1" "size_kdt r + 1 \ 2 ^ (i + 1)" by auto - from 1 2 floor_log_nat_eq_if[OF i] ceiling_log_nat_eq_if[OF i1] - show ?thesis by(simp add:balanced_def) -qed - -lemma balanced_sym: - "balanced (Node a s l r) \ balanced (Node a' s' r l)" - by (auto simp: balanced_def) - -lemma balanced_Node_if_wbal2: - assumes "balanced l" "balanced r" "abs(int(size_kdt l) - int(size_kdt r)) \ 1" - shows "balanced (Node a s l r)" -proof - - have "size_kdt l = size_kdt r \ (size_kdt l = size_kdt r + 1 \ size_kdt r = size_kdt l + 1)" (is "?A \ ?B") - using assms(3) by linarith - thus ?thesis - proof - assume "?A" - thus ?thesis using assms(1,2) - apply(simp add: balanced_def min_def max_def) - by (metis assms(1,2) balanced_optimal le_antisym le_less) - next - assume "?B" - thus ?thesis - by (meson assms(1,2) balanced_sym balanced_Node_if_wbal1) - qed -qed - -end \ No newline at end of file diff --git a/thys/KD_Tree/KD_Tree.thy b/thys/KD_Tree/KD_Tree.thy new file mode 100644 --- /dev/null +++ b/thys/KD_Tree/KD_Tree.thy @@ -0,0 +1,434 @@ +(* + File: KD_Tree.thy + Author: Martin Rau, TU München +*) + +section "Definition of the \k\-d Tree" + +theory KD_Tree +imports + Complex_Main + "HOL-Analysis.Finite_Cartesian_Product" + "HOL-Analysis.Topology_Euclidean_Space" +begin + + +text \ + A \k\-d tree is a space-partitioning data structure for organizing points in a \k\-dimensional space. + In principle the \k\-d tree is a binary tree. The leafs hold the \k\-dimensional points and the nodes + contain left and right subtrees as well as a discriminator \v\ at a particular axis \k\. + Every node divides the space into two parts by splitting along a hyperplane. + Consider a node \n\ with associated discriminator \v\ at axis \k\. + All points in the left subtree must have a value at axis \k\ that is less than or + equal to \v\ and all points in the right subtree must have a value at axis \k\ that is + greater than \v\. + + Deviations from the papers: + + The chosen tree representation is taken from @{cite "DBLP:journals/toms/FriedmanBF77"} with one minor + adjustment. Originally the leafs hold buckets of points of size \b\. This representation fixes the + bucket size to \b = 1\, a single point per Leaf. This is only a minor adjustment since the paper + proves that \b = 1\ is the optimal bucket size for minimizing the running time of the nearest neighbor + algorithm @{cite "DBLP:journals/toms/FriedmanBF77"}, only simplifies building the optimized + \k\-d trees @{cite "DBLP:journals/toms/FriedmanBF77"} and has little influence on the + search algorithm @{cite "DBLP:journals/cacm/Bentley75"}. +\ + +type_synonym 'k point = "(real, 'k) vec" + +lemma dist_point_def: + fixes p\<^sub>0 :: "('k::finite) point" + shows "dist p\<^sub>0 p\<^sub>1 = sqrt (\k \ UNIV. (p\<^sub>0$k - p\<^sub>1$k)\<^sup>2)" + unfolding dist_vec_def L2_set_def dist_real_def by simp + +datatype 'k kdt = + Leaf "'k point" +| Node 'k real "'k kdt" "'k kdt" + + +subsection \Definition of the \k\-d Tree Invariant and Related Functions\ + +fun set_kdt :: "'k kdt \ ('k point) set" where + "set_kdt (Leaf p) = { p }" +| "set_kdt (Node _ _ l r) = set_kdt l \ set_kdt r" + +definition spread :: "('k::finite) \ 'k point set \ real" where + "spread k P = (if P = {} then 0 else let V = (\p. p$k) ` P in Max V - Min V)" + +definition widest_spread_axis :: "('k::finite) \ 'k set \ 'k point set \ bool" where + "widest_spread_axis k K ps \ (\k' \ K. spread k' ps \ spread k ps)" + +fun invar :: "('k::finite) kdt \ bool" where + "invar (Leaf p) \ True" +| "invar (Node k v l r) \ (\p \ set_kdt l. p$k \ v) \ (\p \ set_kdt r. v < p$k) \ + widest_spread_axis k UNIV (set_kdt l \ set_kdt r) \ invar l \ invar r" + +fun size_kdt :: "'k kdt \ nat" where + "size_kdt (Leaf _) = 1" +| "size_kdt (Node _ _ l r) = size_kdt l + size_kdt r" + +fun height :: "'k kdt \ nat" where + "height (Leaf _) = 0" +| "height (Node _ _ l r) = max (height l) (height r) + 1" + +fun min_height :: "'k kdt \ nat" where + "min_height (Leaf _) = 0" +| "min_height (Node _ _ l r) = min (min_height l) (min_height r) + 1" + +definition balanced :: "'k kdt \ bool" where + "balanced kdt \ height kdt - min_height kdt \ 1" + +fun complete :: "'k kdt \ bool" where + "complete (Leaf _) = True" +| "complete (Node _ _ l r) \ complete l \ complete r \ height l = height r" + + +lemma invar_l: + "invar (Node k v l r) \ invar l" + by simp + +lemma invar_r: + "invar (Node k v l r) \ invar r" + by simp + +lemma invar_l_le_k: + "invar (Node k v l r) \ \p \ set_kdt l. p$k \ v" + by simp + +lemma invar_r_ge_k: + "invar (Node k v l r) \ \p \ set_kdt r. v < p$k" + by simp + +lemma invar_set: + "set_kdt (Node k v l r) = set_kdt l \ set_kdt r" + by simp + + +subsection \Lemmas adapted from \HOL-Library.Tree\ to \k\-d Tree\ + +lemma size_ge0[simp]: + "0 < size_kdt kdt" + by (induction kdt) auto + +lemma eq_size_1[simp]: + "size_kdt kdt = 1 \ (\p. kdt = Leaf p)" + apply (induction kdt) + apply (auto) + using size_ge0 nat_less_le apply blast+ + done + +lemma eq_1_size[simp]: + "1 = size_kdt kdt \ (\p. kdt = Leaf p)" + using eq_size_1 by metis + +lemma neq_Leaf_iff: + "(\p. kdt = Leaf p) = (\k v l r. kdt = Node k v l r)" + by (cases kdt) auto + +lemma eq_height_0[simp]: + "height kdt = 0 \ (\p. kdt = Leaf p)" + by (cases kdt) auto + +lemma eq_0_height[simp]: + "0 = height kdt \ (\p. kdt = Leaf p)" + by (cases kdt) auto + +lemma eq_min_height_0[simp]: + "min_height kdt = 0 \ (\p. kdt = Leaf p)" + by (cases kdt) auto + +lemma eq_0_min_height[simp]: + "0 = min_height kdt \ (\p. kdt = Leaf p)" + by (cases kdt) auto + +lemma size_height: + "size_kdt kdt \ 2 ^ height kdt" +proof(induction kdt) + case (Node k v l r) + show ?case + proof (cases "height l \ height r") + case True + have "size_kdt (Node k v l r) = size_kdt l + size_kdt r" by simp + also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith + also have "\ \ 2 ^ height r + 2 ^ height r" using True by simp + also have "\ = 2 ^ height (Node k v l r)" + using True by (auto simp: max_def mult_2) + finally show ?thesis . + next + case False + have "size_kdt (Node k v l r) = size_kdt l + size_kdt r" by simp + also have "\ \ 2 ^ height l + 2 ^ height r" using Node.IH by arith + also have "\ \ 2 ^ height l + 2 ^ height l" using False by simp + finally show ?thesis using False by (auto simp: max_def mult_2) + qed +qed simp + +lemma min_height_le_height: + "min_height kdt \ height kdt" + by (induction kdt) auto + +lemma min_height_size: + "2 ^ min_height kdt \ size_kdt kdt" +proof(induction kdt) + case (Node k v l r) + have "(2::nat) ^ min_height (Node k v l r) \ 2 ^ min_height l + 2 ^ min_height r" + by (simp add: min_def) + also have "\ \ size_kdt (Node k v l r)" using Node.IH by simp + finally show ?case . +qed simp + +lemma complete_iff_height: + "complete kdt \ (min_height kdt = height kdt)" + apply (induction kdt) + apply simp + apply (simp add: min_def max_def) + by (metis le_antisym le_trans min_height_le_height) + +lemma size_if_complete: + "complete kdt \ size_kdt kdt = 2 ^ height kdt" + by (induction kdt) auto + +lemma complete_if_size_height: + "size_kdt kdt = 2 ^ height kdt \ complete kdt" +proof (induction "height kdt" arbitrary: kdt) + case 0 thus ?case by auto +next + case (Suc h) + hence "\p. kdt = Leaf p" + by auto + then obtain k v l r where [simp]: "kdt = Node k v l r" + using neq_Leaf_iff by metis + have 1: "height l \ h" and 2: "height r \ h" using Suc(2) by(auto) + have 3: "\ height l < h" + proof + assume 0: "height l < h" + have "size_kdt kdt = size_kdt l + size_kdt r" by simp + also have "\ \ 2 ^ height l + 2 ^ height r" + using size_height[of l] size_height[of r] by arith + also have " \ < 2 ^ h + 2 ^ height r" using 0 by (simp) + also have " \ \ 2 ^ h + 2 ^ h" using 2 by (simp) + also have "\ = 2 ^ (Suc h)" by (simp) + also have "\ = size_kdt kdt" using Suc(2,3) by simp + finally have "size_kdt kdt < size_kdt kdt" . + thus False by (simp) + qed + have 4: "\ height r < h" + proof + assume 0: "height r < h" + have "size_kdt kdt = size_kdt l + size_kdt r" by simp + also have "\ \ 2 ^ height l + 2 ^ height r" + using size_height[of l] size_height[of r] by arith + also have " \ < 2 ^ height l + 2 ^ h" using 0 by (simp) + also have " \ \ 2 ^ h + 2 ^ h" using 1 by (simp) + also have "\ = 2 ^ (Suc h)" by (simp) + also have "\ = size_kdt kdt" using Suc(2,3) by simp + finally have "size_kdt kdt < size_kdt kdt" . + thus False by (simp) + qed + from 1 2 3 4 have *: "height l = h" "height r = h" by linarith+ + hence "size_kdt l = 2 ^ height l" "size_kdt r = 2 ^ height r" + using Suc(3) size_height[of l] size_height[of r] by (auto) + with * Suc(1) show ?case by simp +qed + +lemma complete_if_size_min_height: + "size_kdt kdt = 2 ^ min_height kdt \ complete kdt" +proof (induct "min_height kdt" arbitrary: kdt) + case 0 thus ?case by auto +next + case (Suc h) + hence "\p. kdt = Leaf p" + by auto + then obtain k v l r where [simp]: "kdt = Node k v l r" + using neq_Leaf_iff by metis + have 1: "h \ min_height l" and 2: "h \ min_height r" using Suc(2) by (auto) + have 3: "\ h < min_height l" + proof + assume 0: "h < min_height l" + have "size_kdt kdt = size_kdt l + size_kdt r" by simp + also note min_height_size[of l] + also(xtrans) note min_height_size[of r] + also(xtrans) have "(2::nat) ^ min_height l > 2 ^ h" + using 0 by (simp add: diff_less_mono) + also(xtrans) have "(2::nat) ^ min_height r \ 2 ^ h" using 2 by simp + also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) + also have "\ = size_kdt kdt" using Suc(2,3) by simp + finally show False by (simp add: diff_le_mono) + qed + have 4: "\ h < min_height r" + proof + assume 0: "h < min_height r" + have "size_kdt kdt = size_kdt l + size_kdt r" by simp + also note min_height_size[of l] + also(xtrans) note min_height_size[of r] + also(xtrans) have "(2::nat) ^ min_height r > 2 ^ h" + using 0 by (simp add: diff_less_mono) + also(xtrans) have "(2::nat) ^ min_height l \ 2 ^ h" using 1 by simp + also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp) + also have "\ = size_kdt kdt" using Suc(2,3) by simp + finally show False by (simp add: diff_le_mono) + qed + from 1 2 3 4 have *: "min_height l = h" "min_height r = h" by linarith+ + hence "size_kdt l = 2 ^ min_height l" "size_kdt r = 2 ^ min_height r" + using Suc(3) min_height_size[of l] min_height_size[of r] by (auto) + with * Suc(1) show ?case + by (simp add: complete_iff_height) +qed + +lemma complete_iff_size: + "complete kdt \ size_kdt kdt = 2 ^ height kdt" + using complete_if_size_height size_if_complete by blast + +lemma size_height_if_incomplete: + "\ complete kdt \ size_kdt kdt < 2 ^ height kdt" + by (meson antisym_conv complete_iff_size not_le size_height) + +lemma min_height_size_if_incomplete: + "\ complete kdt \ 2 ^ min_height kdt < size_kdt kdt" + by (metis complete_if_size_min_height le_less min_height_size) + +lemma balanced_subtreeL: + "balanced (Node k v l r) \ balanced l" + by (simp add: balanced_def) + +lemma balanced_subtreeR: + "balanced (Node k v l r) \ balanced r" + by (simp add: balanced_def) + +lemma balanced_optimal: + assumes "balanced kdt" "size_kdt kdt \ size_kdt kdt'" + shows "height kdt \ height kdt'" +proof (cases "complete kdt") + case True + have "(2::nat) ^ height kdt \ 2 ^ height kdt'" + proof - + have "2 ^ height kdt = size_kdt kdt" + using True by (simp add: complete_iff_height size_if_complete) + also have "\ \ size_kdt kdt'" using assms(2) by simp + also have "\ \ 2 ^ height kdt'" by (rule size_height) + finally show ?thesis . + qed + thus ?thesis by (simp) +next + case False + have "(2::nat) ^ min_height kdt < 2 ^ height kdt'" + proof - + have "(2::nat) ^ min_height kdt < size_kdt kdt" + by(rule min_height_size_if_incomplete[OF False]) + also have "\ \ size_kdt kdt'" using assms(2) by simp + also have "\ \ 2 ^ height kdt'" by(rule size_height) + finally have "(2::nat) ^ min_height kdt < (2::nat) ^ height kdt'" . + thus ?thesis . + qed + hence *: "min_height kdt < height kdt'" by simp + have "min_height kdt + 1 = height kdt" + using min_height_le_height[of kdt] assms(1) False + by (simp add: complete_iff_height balanced_def) + with * show ?thesis by arith +qed + + +subsection \Lemmas adapted from \HOL-Library.Tree_Real\ to \k\-d Tree\ + +lemma size_height_log: + "log 2 (size_kdt kdt) \ height kdt" + by (simp add: log2_of_power_le size_height) + +lemma min_height_size_log: + "min_height kdt \ log 2 (size_kdt kdt)" + by (simp add: le_log2_of_power min_height_size) + +lemma size_log_if_complete: + "complete kdt \ height kdt = log 2 (size_kdt kdt)" + using complete_iff_size log2_of_power_eq by blast + +lemma min_height_size_log_if_incomplete: + "\ complete kdt \ min_height kdt < log 2 (size_kdt kdt)" + by (simp add: less_log2_of_power min_height_size_if_incomplete) + +lemma min_height_balanced: + assumes "balanced kdt" + shows "min_height kdt = nat(floor(log 2 (size_kdt kdt)))" +proof cases + assume *: "complete kdt" + hence "size_kdt kdt = 2 ^ min_height kdt" + by (simp add: complete_iff_height size_if_complete) + from log2_of_power_eq[OF this] show ?thesis by linarith +next + assume *: "\ complete kdt" + hence "height kdt = min_height kdt + 1" + using assms min_height_le_height[of kdt] + by(auto simp add: balanced_def complete_iff_height) + hence "size_kdt kdt < 2 ^ (min_height kdt + 1)" + by (metis * size_height_if_incomplete) + hence "log 2 (size_kdt kdt) < min_height kdt + 1" + using log2_of_power_less size_ge0 by blast + thus ?thesis using min_height_size_log[of kdt] by linarith +qed + +lemma height_balanced: + assumes "balanced kdt" + shows "height kdt = nat(ceiling(log 2 (size_kdt kdt)))" +proof cases + assume *: "complete kdt" + hence "size_kdt kdt = 2 ^ height kdt" + by (simp add: size_if_complete) + from log2_of_power_eq[OF this] show ?thesis + by linarith +next + assume *: "\ complete kdt" + hence **: "height kdt = min_height kdt + 1" + using assms min_height_le_height[of kdt] + by(auto simp add: balanced_def complete_iff_height) + hence "size_kdt kdt \ 2 ^ (min_height kdt + 1)" by (metis size_height) + from log2_of_power_le[OF this size_ge0] min_height_size_log_if_incomplete[OF *] ** + show ?thesis by linarith +qed + +lemma balanced_Node_if_wbal1: + assumes "balanced l" "balanced r" "size_kdt l = size_kdt r + 1" + shows "balanced (Node k v l r)" +proof - + from assms(3) have [simp]: "size_kdt l = size_kdt r + 1" by simp + have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" + by(rule nat_mono[OF ceiling_mono]) simp + hence 1: "height(Node k v l r) = nat \log 2 (1 + size_kdt r)\ + 1" + using height_balanced[OF assms(1)] height_balanced[OF assms(2)] + by (simp del: nat_ceiling_le_eq add: max_def) + have "nat \log 2 (1 + size_kdt r)\ \ nat \log 2 (size_kdt r)\" + by(rule nat_mono[OF floor_mono]) simp + hence 2: "min_height(Node k v l r) = nat \log 2 (size_kdt r)\ + 1" + using min_height_balanced[OF assms(1)] min_height_balanced[OF assms(2)] + by (simp) + have "size_kdt r \ 1" by (simp add: Suc_leI) + then obtain i where i: "2 ^ i \ size_kdt r" "size_kdt r < 2 ^ (i + 1)" + using ex_power_ivl1[of 2 "size_kdt r"] by auto + hence i1: "2 ^ i < size_kdt r + 1" "size_kdt r + 1 \ 2 ^ (i + 1)" by auto + from 1 2 floor_log_nat_eq_if[OF i] ceiling_log_nat_eq_if[OF i1] + show ?thesis by(simp add:balanced_def) +qed + +lemma balanced_sym: + "balanced (Node k v l r) \ balanced (Node k' v' r l)" + by (auto simp: balanced_def) + +lemma balanced_Node_if_wbal2: + assumes "balanced l" "balanced r" "abs(int(size_kdt l) - int(size_kdt r)) \ 1" + shows "balanced (Node k v l r)" +proof - + have "size_kdt l = size_kdt r \ (size_kdt l = size_kdt r + 1 \ size_kdt r = size_kdt l + 1)" (is "?A \ ?B") + using assms(3) by linarith + thus ?thesis + proof + assume "?A" + thus ?thesis using assms(1,2) + apply(simp add: balanced_def min_def max_def) + by (metis assms(1,2) balanced_optimal le_antisym le_less) + next + assume "?B" + thus ?thesis + by (meson assms(1,2) balanced_sym balanced_Node_if_wbal1) + qed +qed + +end