diff --git a/thys/KD_Tree/Balanced.thy b/thys/KD_Tree/Balanced.thy
deleted file mode 100644
--- a/thys/KD_Tree/Balanced.thy
+++ /dev/null
@@ -1,597 +0,0 @@
-(*
-  File:     Balanced.thy
-  Author:   Martin Rau, TU München
-*)
-
-section "Building a balanced \<open>k\<close>-d Tree from a List of Points"
-
-theory Balanced
-imports
-  KDTree
-  Median_Of_Medians_Selection.Median_Of_Medians_Selection
-begin
-
-text \<open>
-  Build a balanced \<open>k\<close>-d Tree by recursively partition the points into two lists.
-  The partitioning criteria will be the median at a particular axis \<open>ws\<close>.
-  The left list will contain all points \<open>p\<close> with @{term "p!ws \<le> median"}.
-  The right list will contain all points with median at axis @{term "ws \<le> p!a"}.
-  The left and right list differ in length by one or none.
-  The axis \<open>ws\<close> will the widest spread axis. The axis which has the widest spread of point values.
-\<close>
-
-
-subsection "Widest Spread of a List of Points"
-
-definition spread_set :: "axis \<Rightarrow> point set \<Rightarrow> real" where
-  "spread_set a ps = (
-    let as = (\<lambda>p. p!a) ` ps in
-    Max as - Min as
-  )"
-
-definition spread :: "axis \<Rightarrow> point list \<Rightarrow> real" where
-  "spread a ps = (
-    let as = map (\<lambda>p. p!a) (tl ps) in
-    fold max as (hd ps !a) - fold min as (hd ps!a)
-  )"
-
-definition is_widest_spread :: "axis \<Rightarrow> dimension \<Rightarrow> point set \<Rightarrow> bool" where
-  "is_widest_spread a k ps = (\<forall>a' < k. spread_set a' ps \<le> spread_set a ps)"
-
-fun widest_spread_rec :: "axis \<Rightarrow> point list \<Rightarrow> axis * real" where
-  "widest_spread_rec 0 ps = (0, spread 0 ps)"
-| "widest_spread_rec a ps = (
-    let (a', s') = widest_spread_rec (a - 1) ps in
-    let s = spread a ps in
-    if s \<le> s' then (a', s') else (a, s)
-  )"
-
-definition widest_spread :: "point list \<Rightarrow> axis" where
-  "widest_spread ps = (
-    let (a, _) = widest_spread_rec (dim (hd ps) - 1) ps in
-    a
-  )"
-
-fun widest_spread_invar :: "dimension \<Rightarrow> kdt \<Rightarrow> bool" where
-  "widest_spread_invar _ (Leaf _) \<longleftrightarrow> True"
-| "widest_spread_invar k (Node a s l r) \<longleftrightarrow> is_widest_spread a k (set_kdt l \<union> set_kdt r) \<and> widest_spread_invar k l \<and> widest_spread_invar k r"
-
-lemma spread_is_spread_set:
-  "ps \<noteq> [] \<Longrightarrow> spread_set a (set ps) = spread a ps"
-  using Max.set_eq_fold[of "hd ps !a" _] Min.set_eq_fold[of "hd ps !a"]
-  apply (auto simp add: Let_def spread_def spread_set_def)
-  by (metis (no_types, lifting) hd_Cons_tl list.simps(15) list.simps(9) set_map)
-
-lemma widest_spread_rec_is_spread:
-  "(ws, s) = widest_spread_rec a ps \<Longrightarrow> s = spread ws ps"
-  by (induction a) (auto simp add: Let_def split: prod.splits if_splits)
-
-lemma is_widest_spread_k_le_ws:
-  "is_widest_spread ws k ps \<Longrightarrow> spread_set k ps \<le> spread_set ws ps \<Longrightarrow> is_widest_spread ws (k+1) ps"
-  using is_widest_spread_def less_Suc_eq by auto
-
-lemma is_widest_spread_k_gt_ws:
-  "is_widest_spread ws k ps \<Longrightarrow> \<not> (spread_set k ps \<le> spread_set ws ps) \<Longrightarrow> is_widest_spread k (k+1) ps"
-  using is_widest_spread_def less_Suc_eq by auto
-
-lemma widest_spread_rec_le_a:
-  "ps \<noteq> [] \<Longrightarrow> (ws, s) = widest_spread_rec a ps \<Longrightarrow> ws \<le> a"
-  by (induction a arbitrary: ws s) (auto simp add: Let_def le_Suc_eq split: prod.splits if_splits)
-
-lemma widest_spread_rec_is_widest_spread:
-  "ps \<noteq> [] \<Longrightarrow> (ws, s) = widest_spread_rec a ps \<Longrightarrow> is_widest_spread ws (a+1) (set ps)"
-proof (induction a arbitrary: ws s)
-  case 0
-  thus ?case
-    using is_widest_spread_def by simp
-next
-  case (Suc a)
-  then obtain ws' s' where *: "(ws', s') = widest_spread_rec a ps"
-    by (metis surj_pair)
-  hence "is_widest_spread ws' (Suc a) (set ps)"
-    using Suc.IH Suc.prems(1) by simp
-  then show ?case 
-    using Suc.prems * spread_is_spread_set widest_spread_rec_is_spread 
-    is_widest_spread_k_le_ws[of ws' "Suc a" "set ps"] is_widest_spread_k_gt_ws[of ws' "Suc a" "set ps"]
-    by (auto simp add: Let_def split: prod.splits if_splits)
-qed
-
-lemma widest_spread_lt_k:
-  "\<forall>p \<in> set ps. dim p = k \<Longrightarrow> 0 < k \<Longrightarrow> ps \<noteq> [] \<Longrightarrow> widest_spread ps < k"
-  using widest_spread_def widest_spread_rec_le_a
-  apply (auto split: prod.splits)
-  by (metis Suc_le_lessD Suc_le_mono Suc_pred)
-
-lemma widest_spread_is_widest_spread:
-  assumes "ps \<noteq> []" "\<forall>p \<in> set ps. dim p = k"
-  shows "is_widest_spread (widest_spread ps) k (set ps)"
-proof (cases k)
-  case 0
-  then show ?thesis
-    using is_widest_spread_def by simp
-next
-  case (Suc n)
-  obtain ws s where *: "(ws, s) = widest_spread_rec (dim (hd ps) - 1) ps"
-    using prod.collapse by blast
-  moreover have "dim (hd ps) = k"
-    using assms(1,2) by simp
-  ultimately have "is_widest_spread ws (k - 1 + 1) (set ps)"
-    using widest_spread_rec_is_widest_spread assms(1) by simp 
-  hence "is_widest_spread ws k (set ps)"
-    using Suc by simp
-  thus ?thesis
-    using * widest_spread_def by (auto split: prod.split)
-qed
-
-
-subsection "Partitioning a List of Points by Median"
-
-definition axis_median :: "axis \<Rightarrow> point list \<Rightarrow> real" where
-  "axis_median a ps = (
-    let n = (length ps - 1) div 2  in
-    let ps' = map (\<lambda>p. p!a) ps in
-    fast_select n ps'
-  )"
-
-lemma size_mset_map_P:
-  "size {# y \<in># mset (map f xs). P y #} = size {# x \<in># mset xs. P (f x) #}"
-  by (induction xs) auto
-
-lemma size_axis_median_length:
-  assumes "0 < length ps"
-  shows "size {# p \<in># mset ps. p!a < axis_median a ps #} \<le> (length ps - 1) div 2" (is "?thesis1")
-    and "size {# p \<in># mset ps. axis_median a ps < p!a #} \<le> length ps div 2"       (is "?thesis2")
-proof -
-  let ?n = "(length ps - 1) div 2"
-  let ?ps' = "map (\<lambda>p. p!a) ps"
-  let ?m = "fast_select ?n ?ps'"
-
-  have "length ps = length ?ps'"
-    by simp
-  moreover have "?n < length ?ps'"
-    using assms calculation by linarith
-  ultimately have *: "median ?ps' = ?m"
-    using median_def fast_select_correct by metis
-
-  have "size {# a \<in># mset ?ps'. a < ?m #} \<le> (length ps - 1) div 2"
-    using * size_less_than_median[of ?ps'] by simp
-  hence "size {# p \<in># mset ps. p!a < ?m #} \<le> (length ps - 1) div 2"
-    using size_mset_map_P[of "\<lambda>a. a < ?m"] by metis
-  thus ?thesis1
-    using axis_median_def by metis
-  
-  have "size {# a \<in># mset ?ps'. ?m < a #} \<le> length ps div 2"
-    using * size_greater_than_median[of ?ps'] by simp
-  hence "size {# p \<in># mset ps. ?m < p!a #} \<le> length ps div 2"
-    using size_mset_map_P[of "\<lambda>a. ?m < a"] by metis
-  thus ?thesis2 
-    using axis_median_def by metis
-qed
-
-
-subsubsection "Threeway Partitioning a List of Points"
-
-fun partition :: "axis \<Rightarrow> real \<Rightarrow> point list \<Rightarrow> point list * point list * point list" where
-  "partition _ _ [] = ([], [], [])"
-| "partition a m (p # ps) = (
-    let (lt, eq, gt) = partition a m ps in
-    if p!a < m then 
-      (p # lt, eq, gt)
-    else if p!a = m then 
-      (lt, p # eq, gt)
-    else 
-      (lt, eq, p # gt)
-  )"
-
-lemma partition_set:
-  assumes "(lt, eq, gt) = partition a m ps"
-  shows "set ps = set lt \<union> set eq \<union> set gt"
-  using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits)
-
-lemma partition_mset:
-  assumes "(lt, eq, gt) = partition a m ps"
-  shows "mset lt = {# p \<in># mset ps. p!a < m #}"
-    and "mset eq = {# p \<in># mset ps. p!a = m #}"
-    and "mset gt = {# p \<in># mset ps. m < p!a #}"
-  using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits)
-
-lemma partition_filter:
-  assumes "(lt, eq, gt) = partition a m ps"
-  shows "\<forall>p \<in> set lt. p!a < m"
-    and "\<forall>p \<in> set eq. p!a = m"
-    and "\<forall>p \<in> set gt. m < p!a"
-  using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits)
-
-lemma partition_length:
-  assumes "(lt, eq, gt) = partition a m ps"
-  shows "length ps = length lt + length eq + length gt"
-    and "length lt = size {# p \<in># mset ps. p!a < m #}"
-    and "length eq = size {# p \<in># mset ps. p!a = m #}"
-    and "length gt = size {# p \<in># mset ps. m < p!a #}"
-  using assms by (induction ps arbitrary: lt eq gt) (auto split: prod.splits if_splits)
-
-
-subsubsection "Partitioning a List of Points balanced by Median"
-
-definition partition_by_median :: "axis \<Rightarrow> point list \<Rightarrow> point list * real * point list" where
-  "partition_by_median a ps = (
-     let m = axis_median a ps in
-     let (lt, eq, gt) = partition a m ps in
-     let rem = length ps div 2 - length lt in
-     (lt @ take rem eq, m, drop rem eq @ gt)
-  )"
-
-lemma set_take_drop: "set xs = set (take n xs) \<union> set (drop n xs)"
-  by (metis append_take_drop_id set_append)
-
-lemma partition_by_median_set:
-  assumes "(l, m, r) = partition_by_median a ps"
-  shows "set ps = set l \<union> set r"
-  using assms unfolding partition_by_median_def
-  apply (simp add: Let_def split: prod.splits)
-  using set_take_drop by (metis partition_set inf_sup_aci(6))
-
-lemma partition_by_median_filter:
-  assumes "(l, m, r) = partition_by_median a ps"
-  shows "\<forall>p \<in> set l. p!a \<le> m"
-    and "\<forall>p \<in> set r. m \<le> p!a"
-  using assms unfolding partition_by_median_def
-  by (auto simp add: Let_def le_less split: prod.splits dest!: in_set_takeD in_set_dropD)
-    (metis partition_filter)+
-
-lemma partition_by_median_length1:
-  assumes "(l, m, r) = partition_by_median a ps"
-  shows "length ps = length l + length r"
-  using assms unfolding partition_by_median_def
-  apply (simp add: Let_def min_def split: prod.splits)
-  by (metis (no_types, lifting) add.assoc partition_length(1))
-
-lemma partition_by_median_length2:
-  assumes "(l, m, r) = partition_by_median a ps" "0 < length ps"
-  shows "length r - length l \<le> 1"
-    and "length l \<le> length r"
-proof -
-  let ?m = "axis_median a ps"
-  let ?leg = "partition a ?m ps"
-  let ?lt = "fst ?leg"
-  let ?eq = "fst (snd ?leg)"
-  let ?gt = "snd (snd ?leg)"
-  let ?rem = "length ps div 2 - length ?lt"
-  let ?l = "?lt @ take ?rem ?eq"
-  let ?r = "drop ?rem ?eq @ ?gt"
-
-  have *: "(?lt, ?eq, ?gt) = partition a ?m ps"
-    by simp
-  have "length ?lt \<le> (length ps - 1) div 2"
-    using assms * partition_length(2) size_axis_median_length(1) by presburger
-  moreover have "length ?gt \<le> length ps div 2"
-    using assms * partition_length(4) size_axis_median_length(2) by presburger
-  moreover have "length ps = length ?lt + length ?eq + length ?gt"
-    using * partition_length(1) by simp
-  ultimately have L: "length ?l = length ps div 2"
-    by simp
-
-  have **: "(?l, ?m, ?r) = partition_by_median a ps"
-    by (auto simp add: Let_def partition_by_median_def split: prod.splits)
-  hence "length ps = length ?l + length ?r"
-    using partition_by_median_length1 by blast
-  hence "length ?l \<le> length ?r" "length ?r - length ?l \<le> 1"
-    using L by linarith+
-
-  thus "length l \<le> length r" "length r - length l \<le> 1" 
-    using ** by (metis Pair_inject assms(1))+
-qed
-
-lemma partition_by_median_length3: 
-  assumes "(l, m, r) = partition_by_median a ps" "0 < length ps"
-  shows "length l < length ps"
-proof (rule ccontr)
-  assume *: "\<not> (length l < length ps)"
-  have "length ps = length l + length r"
-    using partition_by_median_length1 assms(1) by simp
-  hence "length l = length ps" "length r = 0"
-    using * by simp_all
-  moreover have "length l \<le> length r"
-    using partition_by_median_length2 assms by fastforce+
-  ultimately show "False" using assms(2) by simp
-qed
-
-lemma partition_by_median_length4: 
-  assumes "(l, m, r) = partition_by_median a ps" "1 < length ps"
-  shows "length r < length ps"
-proof (rule ccontr)
-  assume *: "\<not> (length r < length ps)"
-  have "length ps = length l + length r"
-    using partition_by_median_length1 assms(1) by simp
-  hence "length r = length ps" "length l = 0"
-    using * by simp_all
-  moreover have "length r - length l \<le> 1"
-    using partition_by_median_length2 assms by fastforce+
-  ultimately show "False" using assms(2) by simp
-qed
-
-lemma partition_by_median_length5:
-  assumes "(l, m, r) = partition_by_median a ps" "1 < length ps"
-  shows "0 < length l"
-    and "0 < length r"
-  using assms partition_by_median_length1 partition_by_median_length4 apply simp
-  using assms partition_by_median_length1 partition_by_median_length3 by fastforce
-
-lemmas partition_by_median_length = 
-  partition_by_median_length1 partition_by_median_length2
-  partition_by_median_length3 partition_by_median_length4
-  partition_by_median_length5
-
-
-subsection \<open>Building the Tree\<close>
-
-function (sequential) build :: "dimension \<Rightarrow> point list \<Rightarrow> kdt" where
-  "build _ [] = undefined" (* We never hit this case recursively. Only if the original input is really [].*)
-| "build k [p] = Leaf p" 
-| "build k ps = (
-    let a = widest_spread ps in
-    let (l, m, r) = partition_by_median a ps in
-    Node a m (build k l) (build k r)
-  )"
-  by pat_completeness auto
-termination build
-  using partition_by_median_length(4,5)
-  apply (relation "measure (\<lambda>( _, ps). length ps)")
-  apply (auto)
-  apply fastforce+
-  done
-
-
-text \<open>
-  Setting up different build simps for length induct.
-\<close>
-
-lemma build_simp1:
-  "ps = [p] \<Longrightarrow> build k ps = Leaf p"
-  by simp
-
-lemma build_simp2:
-  "ps = p\<^sub>0 # p\<^sub>1 # ps' \<Longrightarrow> a = widest_spread ps \<Longrightarrow> (l, m, r) = partition_by_median a ps \<Longrightarrow> build k ps = Node a m (build k l) (build k r)"
-  using build.simps(3) by (auto simp add: Let_def split: prod.splits)
-
-lemma length_ps_gt_1:
-  "1 < length ps \<Longrightarrow> \<exists>p\<^sub>0 p\<^sub>1 ps'. ps = p\<^sub>0 # p\<^sub>1 # ps'"
-  by (induction ps) (auto simp add: neq_Nil_conv)
-
-lemma build_simp3:
-  "1 < length ps \<Longrightarrow> a = widest_spread ps \<Longrightarrow> (l, m, r) = partition_by_median a ps \<Longrightarrow> build k ps = Node a m (build k l) (build k r)"
-  using build_simp2 length_ps_gt_1 by fast
-
-lemmas build_simps[simp] = build_simp1 build_simp3
-
-declare build.simps[simp del]
-
-
-text \<open>
-  The main theorems.
-\<close>
-
-theorem build_set:
-  assumes "0 < length ps"
-  shows "set ps = set_kdt (build k ps)"
-  using assms
-proof (induction ps rule: length_induct)
-  case (1 ps)
-  then show ?case
-  proof (cases "length ps \<le> 1")
-    case True
-    then obtain p where "ps = [p]"
-      using "1.prems" by (cases ps) auto
-    thus ?thesis
-      by simp
-  next
-    case False
-
-    let ?a = "widest_spread ps"
-    let ?lmr = "partition_by_median ?a ps"
-    let ?l = "fst ?lmr"
-    let ?m = "fst (snd ?lmr)"
-    let ?r = "snd (snd ?lmr)"
-
-    have "set ?l = set_kdt (build k ?l)" "set ?r = set_kdt (build k ?r)" 
-      using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+
-    moreover have "set ps = set ?l \<union> set ?r"
-      using partition_by_median_set by (metis prod.collapse)
-    moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)"
-      using False by simp
-    ultimately show ?thesis
-      by auto
-  qed
-qed
-
-theorem build_invar:
-  assumes "0 < length ps" "\<forall>p \<in> set ps. dim p = k" "distinct ps" "0 < k"
-  shows "invar k (build k ps)"
-  using assms
-proof (induction ps rule: length_induct)
-  case (1 ps)
-  then show ?case
-  proof (cases "length ps \<le> 1")
-    case True
-    then obtain p where P: "ps = [p]"
-      using "1.prems"(1) by (cases ps) auto
-    hence "dim p = k"
-      using "1.prems"(2) by simp
-    thus ?thesis
-      using P by simp
-  next
-    case False
-
-    let ?a = "widest_spread ps"
-    let ?lmr = "partition_by_median ?a ps"
-    let ?l = "fst ?lmr"
-    let ?m = "fst (snd ?lmr)"
-    let ?r = "snd (snd ?lmr)"
-
-    have 1: "length ps = length ?l + length ?r"
-      using partition_by_median_length(1) by (metis prod.collapse)+
-    hence 2: "length ?l < length ps" "length ?r < length ps"
-      using False partition_by_median_length(4,5) not_le_imp_less "1.prems" by (metis prod.collapse)+
-    hence 3: "0 < length ?l" "0 < length ?r"
-      using 1 False partition_by_median_length(6,7) by simp_all
-    moreover have 4: "set ps = set ?l \<union> set ?r"
-      using partition_by_median_set by (metis prod.collapse)
-    moreover have "distinct ?l" "distinct ?r" and 5: "set ?l \<inter> set ?r = {}"
-      using "1.prems"(3) 1 4 by (metis card_distinct distinct_append distinct_card length_append set_append)+
-    moreover have "\<forall>p \<in> set ?l .dim p = k" "\<forall>p \<in> set ?r .dim p = k"
-      using "1.prems"(2) 4 5 by simp_all
-    ultimately have "invar k (build k ?l)" "invar k (build k ?r)"
-      using "1.IH" 2 by (simp_all add: "1.prems"(4))
-    moreover have "\<forall>p \<in> set ?l. p ! ?a \<le> ?m" "\<forall>p \<in> set ?r. ?m \<le> p ! ?a"
-      using partition_by_median_filter by (metis prod.collapse)+
-    moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)"
-      using False by simp
-    moreover have "?a < k"
-      using "1.prems"(1,2,4) widest_spread_lt_k by auto
-    ultimately show ?thesis 
-      using 3 5 build_set by auto
-  qed
-qed
-
-theorem build_widest_spread:
-  assumes "0 < length ps" "\<forall>p \<in> set ps. dim p = k"
-  shows "widest_spread_invar k (build k ps)"
-  using assms
-proof (induction ps rule: length_induct)
-  case (1 ps)
-  then show ?case
-  proof (cases "length ps \<le> 1")
-    case True
-    then obtain p where "ps = [p]"
-      using "1.prems" by (cases ps) auto
-    thus ?thesis by simp
-  next
-    case False
-
-    let ?a = "widest_spread ps"
-    let ?lmr = "partition_by_median ?a ps"
-    let ?l = "fst ?lmr"
-    let ?m = "fst (snd ?lmr)"
-    let ?r = "snd (snd ?lmr)"
-
-    have 1: "length ps = length ?l + length ?r"
-      using partition_by_median_length(1) by (metis prod.collapse)+
-    hence 2: "length ?l < length ps" "length ?r < length ps"
-      using False partition_by_median_length(4,5) not_le_imp_less "1.prems"
-        apply (auto simp add: not_le)
-       apply (metis "1" not_add_less1 not_add_less2 prod.collapse)+
-      done
-    hence 3: "0 < length ?l" "0 < length ?r"
-      using 1 False partition_by_median_length(6,7) by simp_all
-    moreover have 4: "set ps = set ?l \<union> set ?r"
-      using partition_by_median_set by (metis prod.collapse)
-    moreover have "\<forall>p \<in> set ?l .dim p = k" "\<forall>p \<in> set ?r .dim p = k"
-      using "1.prems"(2) 4 by simp_all
-    ultimately have "widest_spread_invar k (build k ?l)" "widest_spread_invar k (build k ?r)"
-      using "1.IH" 2 by simp_all
-    moreover have "is_widest_spread ?a k (set_kdt (build k ?l) \<union> set_kdt (build k ?r))"
-      using widest_spread_is_widest_spread "1.prems" 3 4 build_set by fastforce
-    moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)"
-      using False by simp
-    ultimately show ?thesis
-      by auto
-  qed
-qed
-
-theorem build_size:
-  assumes "0 < length ps"
-  shows "size_kdt (build k ps) = length ps"
-  using assms
-proof (induction ps rule: length_induct)
-  case (1 ps)
-  then show ?case
-  proof (cases "length ps \<le> 1")
-    case True
-    then obtain p where "ps = [p]"
-      using "1.prems" by (cases ps) auto
-    thus ?thesis by simp
-  next
-    case False
-
-    let ?a = "widest_spread ps"
-    let ?lmr = "partition_by_median ?a ps"
-    let ?l = "fst ?lmr"
-    let ?m = "fst (snd ?lmr)"
-    let ?r = "snd (snd ?lmr)"
-
-    have "size_kdt (build k ?l) = length ?l" "size_kdt (build k ?r) = length ?r" 
-      using False partition_by_median_length(4,5,6,7)[of ?l ?m ?r ?a ps] "1.IH" by force+
-    moreover have "build k ps = Node ?a ?m (build k ?l) (build k ?r)"
-      using False by simp
-    ultimately show ?thesis
-      using partition_by_median_length(1) by (metis prod.collapse size_kdt.simps(2))
-  qed
-qed
-
-theorem build_balanced:
-  assumes "0 < length ps"
-  shows "balanced (build k ps)"
-  using assms
-proof (induction ps rule: length_induct)
-  case (1 ps)
-  show ?case
-  proof (cases "length ps \<le> 1")
-    case True
-    then obtain p where "ps = [p]"
-      using "1.prems" by (cases ps) auto
-    thus ?thesis 
-      unfolding balanced_def by simp
-  next
-    case False
-
-    let ?a = "widest_spread ps"
-    let ?lmr = "partition_by_median ?a ps"
-    let ?l = "fst ?lmr"
-    let ?m = "fst (snd ?lmr)"
-    let ?r = "snd (snd ?lmr)"
-
-    have 0: "length ps = length ?l + length ?r" "length ?r - length ?l \<le> 1" "length ?l \<le> length ?r"
-      using partition_by_median_length(1,2,3) "1.prems" by (metis prod.collapse)+
-    hence 1: "length ?l + 1 = length ?r \<or> length ?l = length ?r"
-      by linarith
-    moreover have 2: "length ?l < length ps" "length ?r < length ps"
-      using False 0 by auto
-    moreover have 3: "0 < length ?l" "0 < length ?r"
-      using "1.prems" 0 1 2 by linarith+
-    ultimately have 4: "balanced (build k ?l)" "balanced (build k ?r)"
-      using "1.IH" by simp_all
-    have "build k ps = Node ?a ?m (build k ?l) (build k ?r)"
-      using False by simp
-    moreover have "size_kdt (build k ?l) + 1 = size_kdt (build k ?r) \<or> size_kdt (build k ?l) = size_kdt (build k ?r)"
-      using 1 3 build_size by simp
-    ultimately show ?thesis
-      using 4 balanced_Node_if_wbal2 by auto
-  qed
-qed
-
-lemma complete_if_balanced_size_2powh:
-  assumes "balanced kdt" "size_kdt kdt = 2 ^ h"
-  shows "complete kdt"
-proof (rule ccontr)
-  assume "\<not> complete kdt"
-  hence "2 ^ (min_height kdt) < size_kdt kdt" "size_kdt kdt < 2 ^ height kdt"
-    by (simp_all add: min_height_size_if_incomplete size_height_if_incomplete)
-  hence "height kdt - min_height kdt > 1"
-    using assms(2) by simp
-  hence "\<not> balanced kdt"
-    using balanced_def by simp
-  thus "False"
-    using assms(1) by simp
-qed
-
-theorem build_complete:
-  assumes "length ps = 2 ^ h"
-  shows "complete (build k ps)"
-  using assms complete_if_balanced_size_2powh
-  by (simp add: build_balanced build_size)
-
-corollary build_height:
-  "length ps = 2 ^ h \<Longrightarrow> length ps = 2 ^ (height (build k ps))"
-  using build_complete build_size complete_iff_size by auto
-
-end
\ No newline at end of file
diff --git a/thys/KD_Tree/Build.thy b/thys/KD_Tree/Build.thy
new file mode 100644
--- /dev/null
+++ b/thys/KD_Tree/Build.thy
@@ -0,0 +1,464 @@
+(*
+  File:     Build.thy
+  Author:   Martin Rau, TU München
+*)
+
+section \<open>Building a balanced \<open>k\<close>-d Tree from a List of Points\<close>
+
+theory Build
+imports
+  KD_Tree
+  Median_Of_Medians_Selection.Median_Of_Medians_Selection
+begin
+
+text \<open>
+  Build a balanced \<open>k\<close>-d Tree by recursively partition the points into two lists.
+  The partitioning criteria will be the median at a particular axis \<open>k\<close>.
+  The left list will contain all points \<open>p\<close> with @{term "p$k \<le> median"}.
+  The right list will contain all points with median at axis @{term "median < p$k"}.
+  The left and right list differ in length by one or none.
+  The axis \<open>k\<close> will the widest spread axis.
+\<close>
+
+subsection "Auxiliary Lemmas"
+
+lemma length_filter_mset_sorted_nth:
+  assumes "distinct xs" "n < length xs" "sorted xs"
+  shows "{# x \<in># mset xs. x \<le> xs ! n #} = mset (take (n + 1) xs)"
+  using assms
+proof (induction xs arbitrary: n rule: list.induct)
+  case (Cons x xs)
+  thus ?case
+  proof (cases n)
+    case 0
+    thus ?thesis
+      using Cons.prems(1,3) filter_mset_is_empty_iff by fastforce
+  next
+    case (Suc n')
+    thus ?thesis
+      using Cons by simp
+  qed
+qed auto
+
+lemma length_filter_sort_nth:
+  assumes "distinct xs" "n < length xs"
+  shows "length (filter (\<lambda>x. x \<le> sort xs ! n) xs) = n + 1"
+proof -
+  have "length (filter (\<lambda>x. x \<le> sort xs ! n) xs) = length (filter (\<lambda>x. x \<le> sort xs ! n) (sort xs))"
+    by (simp add: filter_sort)
+  also have "... = size (mset (filter (\<lambda>x. x \<le> sort xs ! n) (sort xs)))"
+    using size_mset by metis
+  also have "... = size ({# x \<in># mset (sort xs). x \<le> sort xs ! n #})"
+    using mset_filter by simp
+  also have "... = size (mset (take (n + 1) (sort xs)))"
+    using length_filter_mset_sorted_nth assms sorted_sort distinct_sort length_sort by metis
+  finally show ?thesis
+    using assms(2) by auto
+qed
+
+
+subsection \<open>Widest Spread Axis\<close>
+
+definition calc_spread :: "('k::finite) \<Rightarrow> 'k point list \<Rightarrow> real" where
+  "calc_spread k ps = (case ps of [] \<Rightarrow> 0 | ps \<Rightarrow>
+    let ks = map (\<lambda>p. p$k) (tl ps) in
+    fold max ks ((hd ps)$k) - fold min ks ((hd ps)$k)
+  )"
+
+fun widest_spread :: "('k::finite) list \<Rightarrow> 'k point list \<Rightarrow> 'k \<times> real" where
+  "widest_spread [] _ = undefined"
+| "widest_spread [k] ps = (k, calc_spread k ps)"
+| "widest_spread (k # ks) ps = (
+    let (k', s') = widest_spread ks ps in
+    let s = calc_spread k ps in
+    if s \<le> s' then (k', s') else (k, s)
+  )"
+
+lemma calc_spread_spec:
+  "calc_spread k ps = spread k (set ps)"
+  using Max.set_eq_fold[of "(hd ps)$k"] Min.set_eq_fold[of "(hd ps)$k"]
+  by (auto simp: Let_def spread_def calc_spread_def split: list.splits, metis set_map)
+
+lemma widest_spread_calc_spread:
+  "ks \<noteq> [] \<Longrightarrow> (k, s) = widest_spread ks ps \<Longrightarrow> s = calc_spread k ps"
+  by (induction ks ps rule: widest_spread.induct) (auto simp: Let_def split: prod.splits if_splits)
+
+lemma widest_spread_axis_Un:
+  shows "widest_spread_axis k K P \<Longrightarrow> spread k' P \<le> spread k P \<Longrightarrow> widest_spread_axis k (K \<union> { k' }) P"
+    and "widest_spread_axis k K P \<Longrightarrow> spread k P \<le> spread k' P \<Longrightarrow> widest_spread_axis k' (K \<union> { k' }) P"
+  unfolding widest_spread_axis_def by auto
+
+lemma widest_spread_spec:
+  "(k, s) = widest_spread ks ps \<Longrightarrow> widest_spread_axis k (set ks) (set ps)"
+proof (induction ks ps arbitrary: k s rule: widest_spread.induct)
+  case (3 k\<^sub>0 k\<^sub>1 ks ps)
+  obtain K' S' where K'_def: "(K', S') = widest_spread (k\<^sub>1 # ks) ps"
+    by (metis surj_pair)
+  hence IH: "widest_spread_axis K' (set (k\<^sub>1 # ks)) (set ps)"
+    using "3.IH" by blast
+  hence 0: "S' = spread K' (set ps)"
+    using K'_def widest_spread_calc_spread calc_spread_spec by blast
+  define S where "S = calc_spread k\<^sub>0 ps"
+  hence 1: "S = spread k\<^sub>0 (set ps)"
+    using calc_spread_spec by blast
+  show ?case
+  proof (cases "S \<le> S'")
+    case True
+    hence "widest_spread_axis K' (set (k\<^sub>0 # k\<^sub>1 # ks)) (set ps)"
+      using 0 1 widest_spread_axis_Un(1)[OF IH, of k\<^sub>0]  by auto
+    thus ?thesis
+      using True K'_def S_def "3.prems" by (auto split: prod.splits)
+  next
+    case False
+    hence "widest_spread_axis k\<^sub>0 (set (k\<^sub>0 # k\<^sub>1 # ks)) (set ps)"
+      using 0 1 widest_spread_axis_Un(2)[OF IH, of k\<^sub>0] "3.prems"(1) by auto
+    thus ?thesis
+      using False K'_def S_def "3.prems" by (auto split: prod.splits)
+  qed
+qed (auto simp: widest_spread_axis_def)
+
+
+subsection \<open>Fast Axis Median\<close>
+
+definition axis_median :: "('k::finite) \<Rightarrow> 'k point list \<Rightarrow> real" where
+  "axis_median k ps = (let n = (length ps - 1) div 2 in fast_select n (map (\<lambda>p. p$k) ps))"
+
+lemma length_filter_le_axis_median:
+  assumes "0 < length ps" "\<forall>k. distinct (map (\<lambda>p. p$k) ps)"
+  shows "length (filter (\<lambda>p. p$k \<le> axis_median k ps) ps) = (length ps - 1) div 2 + 1"
+proof -
+  let ?n = "(length ps - 1) div 2"
+  let ?ps = "map (\<lambda>p. p$k) ps"
+  let ?m = "fast_select ?n ?ps"
+  have 0: "?n < length ?ps"
+    using assms(1) by (auto, linarith)
+  have 1: "distinct ?ps"
+    using assms(2) by blast
+  have "?m = select ?n ?ps"
+    using fast_select_correct[OF 0] by blast
+  hence "length (filter (\<lambda>p. p$k \<le> axis_median k ps) ps) =
+        length (filter (\<lambda>p. p$k \<le> sort ?ps ! ?n) ps)"
+    unfolding axis_median_def by (auto simp add: Let_def select_def simp del: fast_select.simps)
+  also have "... = length (filter (\<lambda>v. v \<le> sort ?ps ! ?n) ?ps)"
+    by (induction ps) (auto, metis comp_apply)
+  also have "... = ?n + 1"
+    using length_filter_sort_nth[OF 1 0] by blast
+  finally show ?thesis .
+qed
+
+definition partition_by_median :: "('k::finite) \<Rightarrow> 'k point list \<Rightarrow> 'k point list \<times> real \<times> 'k point list" where
+  "partition_by_median k ps = (
+     let m = axis_median k ps in
+     let (l, r) = partition (\<lambda>p. p$k \<le> m) ps in
+     (l, m, r)
+  )"
+
+lemma set_partition_by_median:
+  "(l, m, r) = partition_by_median k ps \<Longrightarrow> set ps = set l \<union> set r"
+  unfolding partition_by_median_def by (auto simp: Let_def)
+
+lemma filter_partition_by_median:
+  assumes "(l, m, r) = partition_by_median k ps"
+  shows "\<forall>p \<in> set l. p$k \<le> m"
+    and "\<forall>p \<in> set r. \<not>p$k \<le> m"
+  using assms unfolding partition_by_median_def by (auto simp: Let_def)
+
+lemma sum_length_partition_by_median:
+  assumes "(l, m, r) = partition_by_median k ps"
+  shows "length ps = length l + length r"
+  using assms sum_length_filter_compl[of "(\<lambda>p. p $ k \<le> axis_median k ps)"]
+  unfolding partition_by_median_def by (simp add: Let_def o_def)
+
+lemma length_l_partition_by_median:
+  assumes "0 < length ps" "\<forall>k. distinct (map (\<lambda>p. p$k) ps)" "(l, m, r) = partition_by_median k ps"
+  shows "length l = (length ps - 1) div 2 + 1"
+  using assms unfolding partition_by_median_def by (auto simp: Let_def length_filter_le_axis_median)
+
+corollary lengths_partition_by_median_1:
+  assumes "0 < length ps"  "\<forall>k. distinct (map (\<lambda>p. p$k) ps)" "(l, m, r) = partition_by_median k ps"
+  shows "length l - length r \<le> 1"
+    and "length r \<le> length l"
+    and "0 < length l"
+    and "length r < length ps"
+  using length_l_partition_by_median[OF assms] sum_length_partition_by_median[OF assms(3)] by auto
+
+corollary lengths_partition_by_median_2:
+  assumes "1 < length ps" "\<forall>k. distinct (map (\<lambda>p. p$k) ps)" "(l, m, r) = partition_by_median k ps"
+  shows "0 < length r"
+    and "length l < length ps"
+proof -
+  have *: "0 < length ps"
+    using assms(1) by auto
+  show "0 < length r" "length l < length ps"
+    using length_l_partition_by_median[OF * assms(2,3)] sum_length_partition_by_median[OF assms(3)]
+    using assms(1) by linarith+
+qed
+
+lemmas length_partition_by_median =
+  sum_length_partition_by_median length_l_partition_by_median
+  lengths_partition_by_median_1 lengths_partition_by_median_2
+
+
+subsection \<open>Building the Tree\<close>
+
+function (domintros, sequential) build :: "('k::finite) list \<Rightarrow> 'k point list \<Rightarrow> 'k kdt" where
+  "build _ [] = undefined"
+| "build _ [p] = Leaf p"
+| "build ks ps = (
+    let (k, _) = widest_spread ks ps in
+    let (l, m, r) = partition_by_median k ps in
+    Node k m (build ks l) (build ks r)
+  )"
+  by pat_completeness auto
+
+lemma build_domintros3:
+  assumes "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)"
+  assumes "build_dom (ks, l)" "build_dom (ks, r)"
+  shows "build_dom (ks, x # y # zs)"
+proof -
+  {
+    fix k s l m r
+    assume "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)"
+    hence "build_dom (ks, l)" "build_dom (ks, r)"
+      using assms by (metis Pair_inject)+
+  }
+  thus ?thesis
+    by (simp add: build.domintros(3))
+qed
+
+lemma build_termination:
+  assumes "\<forall>k. distinct (map (\<lambda>p. p$k) ps)"
+  shows "build_dom (ks, ps)"
+  using assms
+proof (induction ps rule: length_induct)
+  case (1 xs)
+  consider (A) "xs = []" | (B) "\<exists>x. xs = [x]" | (C) "\<exists>x y zs. xs = x # y # zs"
+    by (induction xs rule: induct_list012) auto
+  then show ?case
+  proof cases
+    case C
+    then obtain x y zs where xyzs_def: "xs = x # y # zs"
+      by blast
+    obtain k s where ks_def: "(k, s) = widest_spread ks xs"
+      by (metis surj_pair)
+    obtain l m r where lmr_def: "(l, m, r) = partition_by_median k xs"
+      by (metis prod_cases3)
+    note defs = xyzs_def ks_def lmr_def
+    have "\<forall>k. distinct (map (\<lambda>p. p $ k) l)" "\<forall>k. distinct (map (\<lambda>p. p $ k) r)"
+      using lmr_def unfolding partition_by_median_def
+      by (auto simp: Let_def "1.prems" distinct_map_filter)
+    moreover have "length l < length xs" "length r < length xs"
+      using length_partition_by_median(8)[OF _ "1.prems"] length_partition_by_median(6)[OF _ "1.prems"]
+      using defs by auto
+    ultimately have "build_dom (ks, l)" "build_dom (ks, r)"
+      using "1.IH" by blast+
+    thus ?thesis
+      using build_domintros3 defs by blast
+  qed (auto intro: build.domintros)
+qed
+
+lemma build_psimp_1:
+  "ps = [p] \<Longrightarrow> build k ps = Leaf p"
+  by (simp add: build.domintros(2) build.psimps(2))
+
+lemma build_psimp_2:
+  assumes "(k, s) = widest_spread ks (x # y # zs)" "(l, m, r) = partition_by_median k (x # y # zs)"
+  assumes "build_dom (ks, l)" "build_dom (ks, r)"
+  shows "build ks (x # y # zs) = Node k m (build ks l) (build ks r)"
+proof -
+  have 0: "build_dom (ks, x # y # zs)"
+    using assms build_domintros3 by blast
+  thus ?thesis
+    using build.psimps(3)[OF 0] assms(1,2) by (auto split: prod.splits)
+qed
+
+lemma length_xs_gt_1:
+  "1 < length xs \<Longrightarrow> \<exists>x y ys. xs = x # y # ys"
+  by (cases xs, auto simp: neq_Nil_conv)
+
+lemma build_psimp_3:
+  assumes "1 < length ps" "(k, s) = widest_spread ks ps" "(l, m, r) = partition_by_median k ps"
+  assumes "build_dom (ks, l)" "build_dom (ks, r)"
+  shows "build ks ps = Node k m (build ks l) (build ks r)"
+  using build_psimp_2 length_xs_gt_1 assms by blast
+
+lemmas build_psimps[simp] = build_psimp_1 build_psimp_3
+
+
+subsection \<open>Main Theorems\<close>
+
+theorem set_build:
+  "0 < length ps \<Longrightarrow> \<forall>k. distinct (map (\<lambda>p. p$k) ps) \<Longrightarrow> set ps = set_kdt (build ks ps)"
+proof (induction ps rule: length_induct)
+  case (1 ps)
+  show ?case
+  proof (cases "1 < length ps")
+    case True
+    obtain k s where ks_def: "(k, s) = widest_spread ks ps"
+      by (metis surj_pair)
+    obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps"
+      by (metis prod_cases3)
+    have D: "\<forall>k. distinct (map (\<lambda>p. p$k) l)" "\<forall>k. distinct (map (\<lambda>p. p$k) r)"
+      using lmr_def unfolding partition_by_median_def
+      by (auto simp: "1.prems"(2) Let_def distinct_map_filter)
+    moreover have "length l < length ps" "0 < length l"
+                  "length r < length ps" "0 < length r"
+      using length_partition_by_median(8)[OF True "1.prems"(2)]
+            length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(7)[OF True "1.prems"(2)]
+            lmr_def by blast+
+    ultimately have "set l = set_kdt (build ks l)" "set r = set_kdt (build ks r)"
+      using "1.IH" by blast+
+    moreover have "set ps = set l \<union> set r"
+      using lmr_def unfolding partition_by_median_def by (auto simp: Let_def)
+    moreover have "build ks ps = Node k m (build ks l) (build ks r)"
+      using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast
+    ultimately show ?thesis
+      by simp
+  next
+    case False
+    thus ?thesis
+      using "1.prems" by (cases ps) auto
+  qed
+qed
+
+theorem invar_build:
+  "0 < length ps \<Longrightarrow> \<forall>k. distinct (map (\<lambda>p. p$k) ps) \<Longrightarrow> set ks = UNIV \<Longrightarrow> invar (build ks ps)"
+proof (induction ps rule: length_induct)
+  case (1 ps)
+  show ?case
+  proof (cases "1 < length ps")
+    case True
+    obtain k s where ks_def: "(k, s) = widest_spread ks ps"
+      by (metis surj_pair)
+    obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps"
+      by (metis prod_cases3)
+    have D: "\<forall>k. distinct (map (\<lambda>p. p$k) l)" "\<forall>k. distinct (map (\<lambda>p. p$k) r)"
+      using lmr_def unfolding partition_by_median_def
+      by (auto simp: "1.prems"(2) Let_def distinct_map_filter)
+    moreover have "length l < length ps" "0 < length l"
+                  "length r < length ps" "0 < length r"
+      using length_partition_by_median(8)[OF True "1.prems"(2)]
+            length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(7)[OF True "1.prems"(2)]
+            lmr_def by blast+
+    ultimately have "invar (build ks l)" "invar (build ks r)"
+      using "1.IH" "1.prems"(3) by blast+
+    moreover have "\<forall>p \<in> set l. p$k \<le> m" "\<forall>p \<in> set r. m < p$k"
+      using filter_partition_by_median(1)[OF lmr_def]
+            filter_partition_by_median(2)[OF lmr_def] by auto
+    moreover have "widest_spread_axis k UNIV (set l \<union> set r)"
+      using widest_spread_spec[OF ks_def] "1.prems"(3) set_partition_by_median[OF lmr_def] by simp
+    moreover have "build ks ps = Node k m (build ks l) (build ks r)"
+      using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast
+    ultimately show ?thesis
+      using set_build[OF \<open>0 < length l\<close> D(1)] set_build[OF \<open>0 < length r\<close> D(2)] by simp
+  next
+    case False
+    thus ?thesis
+      using "1.prems" by (cases ps) auto
+  qed
+qed
+
+theorem size_build:
+  "0 < length ps \<Longrightarrow> \<forall>k. distinct (map (\<lambda>p. p$k) ps) \<Longrightarrow> size_kdt (build ks ps) = length ps"
+proof (induction ps rule: length_induct)
+  case (1 ps)
+  show ?case
+  proof (cases "1 < length ps")
+    case True
+    obtain k s where ks_def: "(k, s) = widest_spread ks ps"
+      by (metis surj_pair)
+    obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps"
+      by (metis prod_cases3)
+    have D: "\<forall>k. distinct (map (\<lambda>p. p$k) l)" "\<forall>k. distinct (map (\<lambda>p. p$k) r)"
+      using lmr_def unfolding partition_by_median_def
+      by (auto simp: "1.prems"(2) Let_def distinct_map_filter)
+    moreover have "length l < length ps" "0 < length l"
+                  "length r < length ps" "0 < length r"
+      using length_partition_by_median(8)[OF True "1.prems"(2)]
+            length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(7)[OF True "1.prems"(2)]
+            lmr_def by blast+
+    ultimately have "size_kdt (build ks l) = length l" "size_kdt (build ks r) = length r"
+      using "1.IH" by blast+
+    moreover have "build ks ps = Node k m (build ks l) (build ks r)"
+      using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast
+    ultimately show ?thesis
+      using length_partition_by_median(1)[OF lmr_def] by simp
+  next
+    case False
+    thus ?thesis
+      using "1.prems" by (cases ps) auto
+  qed
+qed
+
+theorem balanced_build:
+  "0 < length ps \<Longrightarrow> \<forall>k. distinct (map (\<lambda>p. p$k) ps) \<Longrightarrow> balanced (build ks ps)"
+proof (induction ps rule: length_induct)
+  case (1 ps)
+  show ?case
+  proof (cases "1 < length ps")
+    case True
+    obtain k s where ks_def: "(k, s) = widest_spread ks ps"
+      by (metis surj_pair)
+    obtain l m r where lmr_def: "(l, m, r) = partition_by_median k ps"
+      by (metis prod_cases3)
+    have D: "\<forall>k. distinct (map (\<lambda>p. p$k) l)" "\<forall>k. distinct (map (\<lambda>p. p$k) r)"
+      using lmr_def unfolding partition_by_median_def
+      by (auto simp: "1.prems"(2) Let_def distinct_map_filter)
+    moreover have "length l < length ps" "0 < length l"
+                  "length r < length ps" "0 < length r"
+      using length_partition_by_median(8)[OF True "1.prems"(2)]
+            length_partition_by_median(5)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(6)[OF "1.prems"(1) "1.prems"(2)]
+            length_partition_by_median(7)[OF True "1.prems"(2)]
+            lmr_def by blast+
+    ultimately have IH: "balanced (build ks l)" "balanced (build ks r)"
+      using "1.IH" by blast+
+    have "build ks ps = Node k m (build ks l) (build ks r)"
+      using build_psimp_3[OF True ks_def lmr_def] build_termination D by blast
+    moreover have "length r + 1 = length l \<or> length r = length l"
+      using length_partition_by_median(1)[OF lmr_def]
+            length_partition_by_median(3)[OF "1.prems"(1) "1.prems"(2) lmr_def]
+            length_partition_by_median(4)[OF "1.prems"(1) "1.prems"(2) lmr_def]
+      by linarith
+    ultimately show ?thesis
+      using balanced_Node_if_wbal1[OF IH] balanced_Node_if_wbal2[OF IH]
+            size_build[OF \<open>0 < length l\<close> D(1)] size_build[OF \<open>0 < length r\<close> D(2)]
+      by auto
+  next
+    case False
+    thus ?thesis
+      using "1.prems" by (cases ps) (auto simp: balanced_def)
+  qed
+qed
+
+lemma complete_if_balanced_size_2powh:
+  assumes "balanced kdt" "size_kdt kdt = 2 ^ h"
+  shows "complete kdt"
+proof (rule ccontr)
+  assume "\<not> complete kdt"
+  hence "2 ^ (min_height kdt) < size_kdt kdt" "size_kdt kdt < 2 ^ height kdt"
+    by (simp_all add: min_height_size_if_incomplete size_height_if_incomplete)
+  hence "height kdt - min_height kdt > 1"
+    using assms(2) by simp
+  hence "\<not> balanced kdt"
+    using balanced_def by force
+  thus "False"
+    using assms(1) by simp
+qed
+
+theorem complete_build:
+  "length ps = 2 ^ h \<Longrightarrow> \<forall>k. distinct (map (\<lambda>p. p$k) ps) \<Longrightarrow> complete (build k ps)"
+  by (simp add: balanced_build complete_if_balanced_size_2powh size_build)
+
+corollary height_build:
+  assumes "length ps = 2 ^ h" "\<forall>k. distinct (map (\<lambda>p. p$k) ps)"
+  shows "h = height (build k ps)"
+  using complete_build[OF assms] size_build[OF _ assms(2)] by (simp add: assms(1) complete_iff_size)
+
+end
diff --git a/thys/KD_Tree/KDTree.thy b/thys/KD_Tree/KDTree.thy
deleted file mode 100644
--- a/thys/KD_Tree/KDTree.thy
+++ /dev/null
@@ -1,441 +0,0 @@
-(*
-  File:     KDTree.thy
-  Author:   Martin Rau, TU München
-*)
-
-section "Definition of the \<open>k\<close>-d Tree"
-
-theory KDTree
-imports
-  Complex_Main
-begin
-
-
-text \<open>
-  A \<open>k\<close>-d tree is a space-partitioning data structure for organizing points in a \<open>k\<close>-dimensional space.
-  In principle the \<open>k\<close>-d tree is a binary tree. The leafs hold the \<open>k\<close>-dimensional points and the nodes
-  contain left and right subtrees as well as a discriminator \<open>s\<close> at a particular axis \<open>a\<close>.
-  Every node divides the space into two parts by splitting along a hyperplane.
-  Consider a node \<open>n\<close> with associated discriminator \<open>s\<close> at axis \<open>a\<close>.
-  All points in the left subtree must have a value at axis \<open>a\<close> that is less than or
-  equal to \<open>s\<close> and all points in the right subtree must have a value at axis \<open>a\<close> that is
-  greater than or equal to \<open>s\<close>.
-
-  Deviations from the papers:
-
-  The chosen tree representation is taken from @{cite "DBLP:journals/toms/FriedmanBF77"} with one minor
-  adjustment. Originally the leafs hold buckets of points of size \<open>b\<close>. This representation fixes the
-  bucket size to \<open>b = 1\<close>, a single point per Leaf. This is only a minor adjustment since the paper
-  proves that \<open>b = 1\<close> is the optimal bucket size for minimizing the runtime of the nearest neighbor
-  algorithm @{cite "DBLP:journals/toms/FriedmanBF77"}, only simplifies building the optimized
-  \<open>k\<close>-d trees @{cite "DBLP:journals/toms/FriedmanBF77"} and has little influence on the
-  search algorithm @{cite "DBLP:journals/cacm/Bentley75"}.
-\<close>
-
-type_synonym point = "real list"
-type_synonym axis = nat
-type_synonym dimension = nat
-type_synonym discriminator = real
-
-datatype kdt =
-  Leaf point
-| Node axis discriminator kdt kdt
-
-
-subsection "Definition of the \<open>k\<close>-d Tree Invariant and Related Functions"
-
-definition dim :: "point \<Rightarrow> nat"  where
-  "dim p = length p"
-
-declare dim_def[simp]
-
-fun set_kdt :: "kdt \<Rightarrow> point set" where
-  "set_kdt (Leaf p) = {p}"
-| "set_kdt (Node _ _ l r) = set_kdt l \<union> set_kdt r"
-
-fun invar :: "dimension \<Rightarrow> kdt \<Rightarrow> bool" where
-  "invar k (Leaf p) \<longleftrightarrow> dim p = k"
-| "invar k (Node a s l r) \<longleftrightarrow> (\<forall>p \<in> set_kdt l. p!a \<le> s) \<and> (\<forall>p \<in> set_kdt r. s \<le> p!a) \<and>
-    invar k l \<and> invar k r \<and> a < k \<and> set_kdt l \<inter> set_kdt r = {}"
-
-fun size_kdt :: "kdt \<Rightarrow> nat" where
-  "size_kdt (Leaf _) = 1"
-| "size_kdt (Node _ _ l r) = size_kdt l + size_kdt r"
-
-fun height :: "kdt \<Rightarrow> nat" where
-  "height (Leaf _) = 0"
-| "height (Node _ _ l r) = max (height l) (height r) + 1"
-
-fun min_height :: "kdt \<Rightarrow> nat" where
-  "min_height (Leaf _) = 0"
-| "min_height (Node _ _ l r) = min (min_height l) (min_height r) + 1"
-
-definition balanced :: "kdt \<Rightarrow> bool" where
-  "balanced kdt \<longleftrightarrow> height kdt - min_height kdt \<le> 1"
-
-fun complete :: "kdt \<Rightarrow> bool" where
-  "complete (Leaf _) = True"
-| "complete (Node _ _ l r) \<longleftrightarrow> complete l \<and> complete r \<and> height l = height r"
-
-
-lemma invar_l:
-  "invar k (Node a s l r) \<Longrightarrow> invar k l"
-  by simp
-
-lemma invar_r:
-  "invar k (Node a s l r) \<Longrightarrow> invar k r"
-  by simp
-
-lemma invar_axis_lt_d:
-  "invar k (Node a s l r) \<Longrightarrow> a < k"
-  by simp
-
-lemma invar_dim:
-  "invar k kdt \<Longrightarrow> \<forall>p \<in> set_kdt kdt. dim p = k"
-  by (induction kdt) auto
-
-lemma invar_l_le_a:
-  "invar k (Node a s l r) \<Longrightarrow> \<forall>p \<in> set_kdt l. p!a \<le> s"
-  by simp
-
-lemma invar_r_ge_a:
-  "invar k (Node a s l r) \<Longrightarrow> \<forall>p \<in> set_kdt r. s \<le> p!a"
-  by simp
-
-lemma invar_distinct:
-  "invar k kdt \<Longrightarrow> kdt = Node a s l r \<Longrightarrow> set_kdt l \<inter> set_kdt r = {}"
-  by fastforce
-
-lemma invar_set:
-  "set_kdt (Node a s l r) = set_kdt l \<union> set_kdt r"
-  by simp
-
-
-subsection "Lemmas adapted from \<open>HOL-Library.Tree\<close> to \<open>k\<close>-d Tree"
-
-lemma size_ge_0[simp]: 
-  "0 < size_kdt kdt"
-  by (induction kdt) auto
-
-lemma eq_size_1[simp]:
-  "size_kdt kdt = 1 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  apply (induction kdt)
-  apply (auto)
-  using size_ge_0 nat_less_le apply blast+
-  done
-
-lemma eq_1_size[simp]:
-  "1 = size_kdt kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  using eq_size_1 by metis
-
-lemma neq_Leaf_iff: 
-  "(\<nexists>p. kdt = Leaf p) = (\<exists>a s l r. kdt = Node a s l r)"
-  by (cases kdt) auto
-
-lemma eq_height_0[simp]: 
-  "height kdt = 0 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  by (cases kdt) auto
-
-lemma eq_0_height[simp]: 
-  "0 = height kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  by (cases kdt) auto
-
-lemma eq_min_height_0[simp]: 
-  "min_height kdt = 0 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  by (cases kdt) auto
-
-lemma eq_0_min_height[simp]: 
-  "0 = min_height kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
-  by (cases kdt) auto
-
-lemma size_height: 
-  "size_kdt kdt \<le> 2 ^ height kdt"
-proof(induction kdt)
-  case (Node a s l r)
-  show ?case
-  proof (cases "height l \<le> height r")
-    case True
-    have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp
-    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r" using Node.IH by arith
-    also have "\<dots> \<le> 2 ^ height r + 2 ^ height r" using True by simp
-    also have "\<dots> = 2 ^ height (Node a s l r)"
-      using True by (auto simp: max_def mult_2)
-    finally show ?thesis .
-  next
-    case False
-    have "size_kdt (Node a s l r) = size_kdt l + size_kdt r" by simp
-    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r" using Node.IH by arith
-    also have "\<dots> \<le> 2 ^ height l + 2 ^ height l" using False by simp
-    finally show ?thesis using False by (auto simp: max_def mult_2)
-  qed
-qed simp
-
-lemma min_height_le_height: 
-  "min_height kdt \<le> height kdt"
-  by (induction kdt) auto
-
-lemma min_height_size: 
-  "2 ^ min_height kdt \<le> size_kdt kdt"
-proof(induction kdt)
-  case (Node a s l r)
-  have "(2::nat) ^ min_height (Node a s l r) \<le> 2 ^ min_height l + 2 ^ min_height r"
-    by (simp add: min_def)
-  also have "\<dots> \<le> size_kdt (Node a s l r)" using Node.IH by simp
-  finally show ?case .
-qed simp
-
-lemma complete_iff_height: 
-  "complete kdt \<longleftrightarrow> (min_height kdt = height kdt)"
-  apply (induction kdt)
-  apply simp
-  apply (simp add: min_def max_def)
-  by (metis le_antisym le_trans min_height_le_height)
-
-lemma size_if_complete: 
-  "complete kdt \<Longrightarrow> size_kdt kdt = 2 ^ height kdt"
-  by (induction kdt) auto
-
-lemma complete_if_size_height: 
-  "size_kdt kdt = 2 ^ height kdt \<Longrightarrow> complete kdt"
-proof (induction "height kdt" arbitrary: kdt)
-  case 0 thus ?case by auto
-next
-  case (Suc h)
-  hence "\<nexists>p. kdt = Leaf p"
-    by auto
-  then obtain a s l r where [simp]: "kdt = Node a s l r"
-    using neq_Leaf_iff by auto
-  have 1: "height l \<le> h" and 2: "height r \<le> h" using Suc(2) by(auto)
-  have 3: "\<not> height l < h"
-  proof
-    assume 0: "height l < h"
-    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
-    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r"
-      using size_height[of l] size_height[of r] by arith
-    also have " \<dots> < 2 ^ h + 2 ^ height r" using 0 by (simp)
-    also have " \<dots> \<le> 2 ^ h + 2 ^ h" using 2 by (simp)
-    also have "\<dots> = 2 ^ (Suc h)" by (simp)
-    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
-    finally have "size_kdt kdt < size_kdt kdt" .
-    thus False by (simp)
-  qed
-  have 4: "\<not> height r < h"
-  proof
-    assume 0: "height r < h"
-    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
-    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r"
-      using size_height[of l] size_height[of r] by arith
-    also have " \<dots> < 2 ^ height l + 2 ^ h" using 0 by (simp)
-    also have " \<dots> \<le> 2 ^ h + 2 ^ h" using 1 by (simp)
-    also have "\<dots> = 2 ^ (Suc h)" by (simp)
-    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
-    finally have "size_kdt kdt < size_kdt kdt" .
-    thus False by (simp)
-  qed
-  from 1 2 3 4 have *: "height l = h" "height r = h" by linarith+
-  hence "size_kdt l = 2 ^ height l" "size_kdt r = 2 ^ height r"
-    using Suc(3) size_height[of l] size_height[of r] by (auto)
-  with * Suc(1) show ?case by simp
-qed
-
-lemma complete_if_size_min_height: 
-  "size_kdt kdt = 2 ^ min_height kdt \<Longrightarrow> complete kdt"
-proof (induct "min_height kdt" arbitrary: kdt)
-  case 0 thus ?case by auto
-next
-  case (Suc h)
-  hence "\<nexists>p. kdt = Leaf p"
-    by auto
-  then obtain a s l r where [simp]: "kdt = Node a s l r"
-    using neq_Leaf_iff by auto
-  have 1: "h \<le> min_height l" and 2: "h \<le> min_height r" using Suc(2) by (auto)
-  have 3: "\<not> h < min_height l"
-  proof
-    assume 0: "h < min_height l"
-    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
-    also note min_height_size[of l]
-    also(xtrans) note min_height_size[of r]
-    also(xtrans) have "(2::nat) ^ min_height l > 2 ^ h"
-        using 0 by (simp add: diff_less_mono)
-    also(xtrans) have "(2::nat) ^ min_height r \<ge> 2 ^ h" using 2 by simp
-    also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp)
-    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
-    finally show False by (simp add: diff_le_mono)
-  qed
-  have 4: "\<not> h < min_height r"
-  proof
-    assume 0: "h < min_height r"
-    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
-    also note min_height_size[of l]
-    also(xtrans) note min_height_size[of r]
-    also(xtrans) have "(2::nat) ^ min_height r > 2 ^ h"
-        using 0 by (simp add: diff_less_mono)
-    also(xtrans) have "(2::nat) ^ min_height l \<ge> 2 ^ h" using 1 by simp
-    also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp)
-    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
-    finally show False by (simp add: diff_le_mono)
-  qed
-  from 1 2 3 4 have *: "min_height l = h" "min_height r = h" by linarith+
-  hence "size_kdt l = 2 ^ min_height l" "size_kdt r = 2 ^ min_height r"
-    using Suc(3) min_height_size[of l] min_height_size[of r] by (auto)
-  with * Suc(1) show ?case
-    by (simp add: complete_iff_height)
-qed
-
-lemma complete_iff_size: 
-  "complete kdt \<longleftrightarrow> size_kdt kdt = 2 ^ height kdt"
-  using complete_if_size_height size_if_complete by blast
-
-lemma size_height_if_incomplete:
-  "\<not> complete kdt \<Longrightarrow> size_kdt kdt < 2 ^ height kdt"
-  by (meson antisym_conv complete_iff_size not_le size_height)
-
-lemma min_height_size_if_incomplete:
-  "\<not> complete kdt \<Longrightarrow> 2 ^ min_height kdt < size_kdt kdt"
-  by (metis complete_if_size_min_height le_less min_height_size)
-
-lemma balanced_subtreeL: 
-  "balanced (Node a s l r) \<Longrightarrow> balanced l"
-  by (simp add: balanced_def)
-
-lemma balanced_subtreeR: 
-  "balanced (Node a s l r) \<Longrightarrow> balanced r"
-  by (simp add: balanced_def)
-
-lemma balanced_optimal:
-  assumes "balanced kdt" "size_kdt kdt \<le> size_kdt kdt'"
-  shows "height kdt \<le> height kdt'"
-proof (cases "complete kdt")
-  case True
-  have "(2::nat) ^ height kdt \<le> 2 ^ height kdt'"
-  proof -
-    have "2 ^ height kdt = size_kdt kdt"
-      using True by (simp add: complete_iff_height size_if_complete)
-    also have "\<dots> \<le> size_kdt kdt'" using assms(2) by simp
-    also have "\<dots> \<le> 2 ^ height kdt'" by (rule size_height)
-    finally show ?thesis .
-  qed
-  thus ?thesis by (simp)
-next
-  case False
-  have "(2::nat) ^ min_height kdt < 2 ^ height kdt'"
-  proof -
-    have "(2::nat) ^ min_height kdt < size_kdt kdt"
-      by(rule min_height_size_if_incomplete[OF False])
-    also have "\<dots> \<le> size_kdt kdt'" using assms(2) by simp
-    also have "\<dots> \<le> 2 ^ height kdt'"  by(rule size_height)
-    finally have "(2::nat) ^ min_height kdt < (2::nat) ^ height kdt'" .
-    thus ?thesis .
-  qed
-  hence *: "min_height kdt < height kdt'" by simp
-  have "min_height kdt + 1 = height kdt"
-    using min_height_le_height[of kdt] assms(1) False
-    by (simp add: complete_iff_height balanced_def)
-  with * show ?thesis by arith
-qed
-
-
-subsection "Lemmas adapted from \<open>HOL-Library.Tree_Real\<close> to \<open>k\<close>-d Tree"
-
-lemma size_height_log: 
-  "log 2 (size_kdt kdt) \<le> height kdt"
-  by (simp add: log2_of_power_le size_height)
-
-lemma min_height_size_log: 
-  "min_height kdt \<le> log 2 (size_kdt kdt)"
-  by (simp add: le_log2_of_power min_height_size)
-
-lemma size_log_if_complete: 
-  "complete kdt \<Longrightarrow> height kdt = log 2 (size_kdt kdt)"
-  by (simp add: size_if_complete)
-
-lemma min_height_size_log_if_incomplete:
-  "\<not> complete kdt \<Longrightarrow> min_height kdt < log 2 (size_kdt kdt)"
-  by (simp add: less_log2_of_power min_height_size_if_incomplete)
-
-lemma min_height_balanced: 
-  assumes "balanced kdt"
-  shows "min_height kdt = nat(floor(log 2 (size_kdt kdt)))"
-proof cases
-  assume *: "complete kdt"
-  hence "size_kdt kdt = 2 ^ min_height kdt"
-    by (simp add: complete_iff_height size_if_complete)
-  from log2_of_power_eq[OF this] show ?thesis by linarith
-next
-  assume *: "\<not> complete kdt"
-  hence "height kdt = min_height kdt + 1"
-    using assms min_height_le_height[of kdt]
-    by(auto simp add: balanced_def complete_iff_height)
-  hence "size_kdt kdt < 2 ^ (min_height kdt + 1)"
-    by (metis * size_height_if_incomplete)
-  hence "log 2 (size_kdt kdt) < min_height kdt + 1"
-    using log2_of_power_less size_ge_0 by blast
-  thus ?thesis using min_height_size_log[of kdt] by linarith
-qed
-
-lemma height_balanced: 
-  assumes "balanced kdt"
-  shows "height kdt = nat(ceiling(log 2 (size_kdt kdt)))"
-proof cases
-  assume *: "complete kdt"
-  hence "size_kdt kdt = 2 ^ height kdt"
-    by (simp add: size_if_complete)
-  from log2_of_power_eq[OF this] show ?thesis
-    by linarith
-next
-  assume *: "\<not> complete kdt"
-  hence **: "height kdt = min_height kdt + 1"
-    using assms min_height_le_height[of kdt]
-    by(auto simp add: balanced_def complete_iff_height)
-  hence "size_kdt kdt \<le> 2 ^ (min_height kdt + 1)" by (metis size_height)
-  from  log2_of_power_le[OF this size_ge_0] min_height_size_log_if_incomplete[OF *] **
-  show ?thesis by linarith
-qed
-
-lemma balanced_Node_if_wbal1:
-  assumes "balanced l" "balanced r" "size_kdt l = size_kdt r + 1"
-  shows "balanced (Node a s l r)"
-proof -
-  from assms(3) have [simp]: "size_kdt l = size_kdt r + 1" by simp
-  have "nat \<lceil>log 2 (1 + size_kdt r)\<rceil> \<ge> nat \<lceil>log 2 (size_kdt r)\<rceil>"
-    by(rule nat_mono[OF ceiling_mono]) simp
-  hence 1: "height(Node a s l r) = nat \<lceil>log 2 (1 + size_kdt r)\<rceil> + 1"
-    using height_balanced[OF assms(1)] height_balanced[OF assms(2)]
-    by (simp del: nat_ceiling_le_eq add: max_def)
-  have "nat \<lfloor>log 2 (1 + size_kdt r)\<rfloor> \<ge> nat \<lfloor>log 2 (size_kdt r)\<rfloor>"
-    by(rule nat_mono[OF floor_mono]) simp
-  hence 2: "min_height(Node a s l r) = nat \<lfloor>log 2 (size_kdt r)\<rfloor> + 1"
-    using min_height_balanced[OF assms(1)] min_height_balanced[OF assms(2)]
-    by (simp)
-  have "size_kdt r \<ge> 1" by (simp add: Suc_leI)
-  then obtain i where i: "2 ^ i \<le> size_kdt r" "size_kdt r < 2 ^ (i + 1)"
-    using ex_power_ivl1[of 2 "size_kdt r"] by auto
-  hence i1: "2 ^ i < size_kdt r + 1" "size_kdt r + 1 \<le> 2 ^ (i + 1)" by auto
-  from 1 2 floor_log_nat_eq_if[OF i] ceiling_log_nat_eq_if[OF i1]
-  show ?thesis by(simp add:balanced_def)
-qed
-
-lemma balanced_sym: 
-  "balanced (Node a s l r) \<Longrightarrow> balanced (Node a' s' r l)"
-  by (auto simp: balanced_def)
-
-lemma balanced_Node_if_wbal2:
-  assumes "balanced l" "balanced r" "abs(int(size_kdt l) - int(size_kdt r)) \<le> 1"
-  shows "balanced (Node a s l r)"
-proof -
-  have "size_kdt l = size_kdt r \<or> (size_kdt l = size_kdt r + 1 \<or> size_kdt r = size_kdt l + 1)" (is "?A \<or> ?B")
-    using assms(3) by linarith
-  thus ?thesis
-  proof
-    assume "?A"
-    thus ?thesis using assms(1,2)
-      apply(simp add: balanced_def min_def max_def)
-      by (metis assms(1,2) balanced_optimal le_antisym le_less)
-  next
-    assume "?B"
-    thus ?thesis
-      by (meson assms(1,2) balanced_sym balanced_Node_if_wbal1)
-  qed
-qed
-
-end
\ No newline at end of file
diff --git a/thys/KD_Tree/KD_Tree.thy b/thys/KD_Tree/KD_Tree.thy
new file mode 100644
--- /dev/null
+++ b/thys/KD_Tree/KD_Tree.thy
@@ -0,0 +1,434 @@
+(*
+  File:     KD_Tree.thy
+  Author:   Martin Rau, TU München
+*)
+
+section "Definition of the \<open>k\<close>-d Tree"
+
+theory KD_Tree
+imports
+  Complex_Main
+  "HOL-Analysis.Finite_Cartesian_Product"
+  "HOL-Analysis.Topology_Euclidean_Space"
+begin
+
+
+text \<open>
+  A \<open>k\<close>-d tree is a space-partitioning data structure for organizing points in a \<open>k\<close>-dimensional space.
+  In principle the \<open>k\<close>-d tree is a binary tree. The leafs hold the \<open>k\<close>-dimensional points and the nodes
+  contain left and right subtrees as well as a discriminator \<open>v\<close> at a particular axis \<open>k\<close>.
+  Every node divides the space into two parts by splitting along a hyperplane.
+  Consider a node \<open>n\<close> with associated discriminator \<open>v\<close> at axis \<open>k\<close>.
+  All points in the left subtree must have a value at axis \<open>k\<close> that is less than or
+  equal to \<open>v\<close> and all points in the right subtree must have a value at axis \<open>k\<close> that is
+  greater than \<open>v\<close>.
+
+  Deviations from the papers:
+
+  The chosen tree representation is taken from @{cite "DBLP:journals/toms/FriedmanBF77"} with one minor
+  adjustment. Originally the leafs hold buckets of points of size \<open>b\<close>. This representation fixes the
+  bucket size to \<open>b = 1\<close>, a single point per Leaf. This is only a minor adjustment since the paper
+  proves that \<open>b = 1\<close> is the optimal bucket size for minimizing the running time of the nearest neighbor
+  algorithm @{cite "DBLP:journals/toms/FriedmanBF77"}, only simplifies building the optimized
+  \<open>k\<close>-d trees @{cite "DBLP:journals/toms/FriedmanBF77"} and has little influence on the
+  search algorithm @{cite "DBLP:journals/cacm/Bentley75"}.
+\<close>
+
+type_synonym 'k point = "(real, 'k) vec"
+
+lemma dist_point_def:
+  fixes p\<^sub>0 :: "('k::finite) point"
+  shows "dist p\<^sub>0 p\<^sub>1 = sqrt (\<Sum>k \<in> UNIV. (p\<^sub>0$k - p\<^sub>1$k)\<^sup>2)"
+  unfolding dist_vec_def L2_set_def dist_real_def by simp
+
+datatype 'k kdt =
+  Leaf "'k point"
+| Node 'k real "'k kdt" "'k kdt"
+
+
+subsection \<open>Definition of the \<open>k\<close>-d Tree Invariant and Related Functions\<close>
+
+fun set_kdt :: "'k kdt \<Rightarrow> ('k point) set" where
+  "set_kdt (Leaf p) = { p }"
+| "set_kdt (Node _ _ l r) = set_kdt l \<union> set_kdt r"
+
+definition spread :: "('k::finite) \<Rightarrow> 'k point set \<Rightarrow> real" where
+  "spread k P = (if P = {} then 0 else let V = (\<lambda>p. p$k) ` P in Max V - Min V)"
+
+definition widest_spread_axis :: "('k::finite) \<Rightarrow> 'k set \<Rightarrow> 'k point set \<Rightarrow> bool" where
+  "widest_spread_axis k K ps \<longleftrightarrow> (\<forall>k' \<in> K. spread k' ps \<le> spread k ps)"
+
+fun invar :: "('k::finite) kdt \<Rightarrow> bool" where
+  "invar (Leaf p) \<longleftrightarrow> True"
+| "invar (Node k v l r) \<longleftrightarrow> (\<forall>p \<in> set_kdt l. p$k \<le> v) \<and> (\<forall>p \<in> set_kdt r. v < p$k) \<and>
+    widest_spread_axis k UNIV (set_kdt l \<union> set_kdt r) \<and> invar l \<and> invar r"
+
+fun size_kdt :: "'k kdt \<Rightarrow> nat" where
+  "size_kdt (Leaf _) = 1"
+| "size_kdt (Node _ _ l r) = size_kdt l + size_kdt r"
+
+fun height :: "'k kdt \<Rightarrow> nat" where
+  "height (Leaf _) = 0"
+| "height (Node _ _ l r) = max (height l) (height r) + 1"
+
+fun min_height :: "'k kdt \<Rightarrow> nat" where
+  "min_height (Leaf _) = 0"
+| "min_height (Node _ _ l r) = min (min_height l) (min_height r) + 1"
+
+definition balanced :: "'k kdt \<Rightarrow> bool" where
+  "balanced kdt \<longleftrightarrow> height kdt - min_height kdt \<le> 1"
+
+fun complete :: "'k kdt \<Rightarrow> bool" where
+  "complete (Leaf _) = True"
+| "complete (Node _ _ l r) \<longleftrightarrow> complete l \<and> complete r \<and> height l = height r"
+
+
+lemma invar_l:
+  "invar (Node k v l r) \<Longrightarrow> invar l"
+  by simp
+
+lemma invar_r:
+  "invar (Node k v l r) \<Longrightarrow> invar r"
+  by simp
+
+lemma invar_l_le_k:
+  "invar (Node k v l r) \<Longrightarrow> \<forall>p \<in> set_kdt l. p$k \<le> v"
+  by simp
+
+lemma invar_r_ge_k:
+  "invar (Node k v l r) \<Longrightarrow> \<forall>p \<in> set_kdt r. v < p$k"
+  by simp
+
+lemma invar_set:
+  "set_kdt (Node k v l r) = set_kdt l \<union> set_kdt r"
+  by simp
+
+
+subsection \<open>Lemmas adapted from \<open>HOL-Library.Tree\<close> to \<open>k\<close>-d Tree\<close>
+
+lemma size_ge0[simp]:
+  "0 < size_kdt kdt"
+  by (induction kdt) auto
+
+lemma eq_size_1[simp]:
+  "size_kdt kdt = 1 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  apply (induction kdt)
+  apply (auto)
+  using size_ge0 nat_less_le apply blast+
+  done
+
+lemma eq_1_size[simp]:
+  "1 = size_kdt kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  using eq_size_1 by metis
+
+lemma neq_Leaf_iff:
+  "(\<nexists>p. kdt = Leaf p) = (\<exists>k v l r. kdt = Node k v l r)"
+  by (cases kdt) auto
+
+lemma eq_height_0[simp]:
+  "height kdt = 0 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  by (cases kdt) auto
+
+lemma eq_0_height[simp]:
+  "0 = height kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  by (cases kdt) auto
+
+lemma eq_min_height_0[simp]:
+  "min_height kdt = 0 \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  by (cases kdt) auto
+
+lemma eq_0_min_height[simp]:
+  "0 = min_height kdt \<longleftrightarrow> (\<exists>p. kdt = Leaf p)"
+  by (cases kdt) auto
+
+lemma size_height:
+  "size_kdt kdt \<le> 2 ^ height kdt"
+proof(induction kdt)
+  case (Node k v l r)
+  show ?case
+  proof (cases "height l \<le> height r")
+    case True
+    have "size_kdt (Node k v l r) = size_kdt l + size_kdt r" by simp
+    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r" using Node.IH by arith
+    also have "\<dots> \<le> 2 ^ height r + 2 ^ height r" using True by simp
+    also have "\<dots> = 2 ^ height (Node k v l r)"
+      using True by (auto simp: max_def mult_2)
+    finally show ?thesis .
+  next
+    case False
+    have "size_kdt (Node k v l r) = size_kdt l + size_kdt r" by simp
+    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r" using Node.IH by arith
+    also have "\<dots> \<le> 2 ^ height l + 2 ^ height l" using False by simp
+    finally show ?thesis using False by (auto simp: max_def mult_2)
+  qed
+qed simp
+
+lemma min_height_le_height:
+  "min_height kdt \<le> height kdt"
+  by (induction kdt) auto
+
+lemma min_height_size:
+  "2 ^ min_height kdt \<le> size_kdt kdt"
+proof(induction kdt)
+  case (Node k v l r)
+  have "(2::nat) ^ min_height (Node k v l r) \<le> 2 ^ min_height l + 2 ^ min_height r"
+    by (simp add: min_def)
+  also have "\<dots> \<le> size_kdt (Node k v l r)" using Node.IH by simp
+  finally show ?case .
+qed simp
+
+lemma complete_iff_height:
+  "complete kdt \<longleftrightarrow> (min_height kdt = height kdt)"
+  apply (induction kdt)
+  apply simp
+  apply (simp add: min_def max_def)
+  by (metis le_antisym le_trans min_height_le_height)
+
+lemma size_if_complete:
+  "complete kdt \<Longrightarrow> size_kdt kdt = 2 ^ height kdt"
+  by (induction kdt) auto
+
+lemma complete_if_size_height:
+  "size_kdt kdt = 2 ^ height kdt \<Longrightarrow> complete kdt"
+proof (induction "height kdt" arbitrary: kdt)
+  case 0 thus ?case by auto
+next
+  case (Suc h)
+  hence "\<nexists>p. kdt = Leaf p"
+    by auto
+  then obtain k v l r where [simp]: "kdt = Node k v l r"
+    using neq_Leaf_iff by metis
+  have 1: "height l \<le> h" and 2: "height r \<le> h" using Suc(2) by(auto)
+  have 3: "\<not> height l < h"
+  proof
+    assume 0: "height l < h"
+    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
+    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r"
+      using size_height[of l] size_height[of r] by arith
+    also have " \<dots> < 2 ^ h + 2 ^ height r" using 0 by (simp)
+    also have " \<dots> \<le> 2 ^ h + 2 ^ h" using 2 by (simp)
+    also have "\<dots> = 2 ^ (Suc h)" by (simp)
+    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
+    finally have "size_kdt kdt < size_kdt kdt" .
+    thus False by (simp)
+  qed
+  have 4: "\<not> height r < h"
+  proof
+    assume 0: "height r < h"
+    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
+    also have "\<dots> \<le> 2 ^ height l + 2 ^ height r"
+      using size_height[of l] size_height[of r] by arith
+    also have " \<dots> < 2 ^ height l + 2 ^ h" using 0 by (simp)
+    also have " \<dots> \<le> 2 ^ h + 2 ^ h" using 1 by (simp)
+    also have "\<dots> = 2 ^ (Suc h)" by (simp)
+    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
+    finally have "size_kdt kdt < size_kdt kdt" .
+    thus False by (simp)
+  qed
+  from 1 2 3 4 have *: "height l = h" "height r = h" by linarith+
+  hence "size_kdt l = 2 ^ height l" "size_kdt r = 2 ^ height r"
+    using Suc(3) size_height[of l] size_height[of r] by (auto)
+  with * Suc(1) show ?case by simp
+qed
+
+lemma complete_if_size_min_height:
+  "size_kdt kdt = 2 ^ min_height kdt \<Longrightarrow> complete kdt"
+proof (induct "min_height kdt" arbitrary: kdt)
+  case 0 thus ?case by auto
+next
+  case (Suc h)
+  hence "\<nexists>p. kdt = Leaf p"
+    by auto
+  then obtain k v l r where [simp]: "kdt = Node k v l r"
+    using neq_Leaf_iff by metis
+  have 1: "h \<le> min_height l" and 2: "h \<le> min_height r" using Suc(2) by (auto)
+  have 3: "\<not> h < min_height l"
+  proof
+    assume 0: "h < min_height l"
+    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
+    also note min_height_size[of l]
+    also(xtrans) note min_height_size[of r]
+    also(xtrans) have "(2::nat) ^ min_height l > 2 ^ h"
+        using 0 by (simp add: diff_less_mono)
+    also(xtrans) have "(2::nat) ^ min_height r \<ge> 2 ^ h" using 2 by simp
+    also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp)
+    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
+    finally show False by (simp add: diff_le_mono)
+  qed
+  have 4: "\<not> h < min_height r"
+  proof
+    assume 0: "h < min_height r"
+    have "size_kdt kdt = size_kdt l + size_kdt r" by simp
+    also note min_height_size[of l]
+    also(xtrans) note min_height_size[of r]
+    also(xtrans) have "(2::nat) ^ min_height r > 2 ^ h"
+        using 0 by (simp add: diff_less_mono)
+    also(xtrans) have "(2::nat) ^ min_height l \<ge> 2 ^ h" using 1 by simp
+    also(xtrans) have "(2::nat) ^ h + 2 ^ h = 2 ^ (Suc h)" by (simp)
+    also have "\<dots> = size_kdt kdt" using Suc(2,3) by simp
+    finally show False by (simp add: diff_le_mono)
+  qed
+  from 1 2 3 4 have *: "min_height l = h" "min_height r = h" by linarith+
+  hence "size_kdt l = 2 ^ min_height l" "size_kdt r = 2 ^ min_height r"
+    using Suc(3) min_height_size[of l] min_height_size[of r] by (auto)
+  with * Suc(1) show ?case
+    by (simp add: complete_iff_height)
+qed
+
+lemma complete_iff_size:
+  "complete kdt \<longleftrightarrow> size_kdt kdt = 2 ^ height kdt"
+  using complete_if_size_height size_if_complete by blast
+
+lemma size_height_if_incomplete:
+  "\<not> complete kdt \<Longrightarrow> size_kdt kdt < 2 ^ height kdt"
+  by (meson antisym_conv complete_iff_size not_le size_height)
+
+lemma min_height_size_if_incomplete:
+  "\<not> complete kdt \<Longrightarrow> 2 ^ min_height kdt < size_kdt kdt"
+  by (metis complete_if_size_min_height le_less min_height_size)
+
+lemma balanced_subtreeL:
+  "balanced (Node k v l r) \<Longrightarrow> balanced l"
+  by (simp add: balanced_def)
+
+lemma balanced_subtreeR:
+  "balanced (Node k v l r) \<Longrightarrow> balanced r"
+  by (simp add: balanced_def)
+
+lemma balanced_optimal:
+  assumes "balanced kdt" "size_kdt kdt \<le> size_kdt kdt'"
+  shows "height kdt \<le> height kdt'"
+proof (cases "complete kdt")
+  case True
+  have "(2::nat) ^ height kdt \<le> 2 ^ height kdt'"
+  proof -
+    have "2 ^ height kdt = size_kdt kdt"
+      using True by (simp add: complete_iff_height size_if_complete)
+    also have "\<dots> \<le> size_kdt kdt'" using assms(2) by simp
+    also have "\<dots> \<le> 2 ^ height kdt'" by (rule size_height)
+    finally show ?thesis .
+  qed
+  thus ?thesis by (simp)
+next
+  case False
+  have "(2::nat) ^ min_height kdt < 2 ^ height kdt'"
+  proof -
+    have "(2::nat) ^ min_height kdt < size_kdt kdt"
+      by(rule min_height_size_if_incomplete[OF False])
+    also have "\<dots> \<le> size_kdt kdt'" using assms(2) by simp
+    also have "\<dots> \<le> 2 ^ height kdt'"  by(rule size_height)
+    finally have "(2::nat) ^ min_height kdt < (2::nat) ^ height kdt'" .
+    thus ?thesis .
+  qed
+  hence *: "min_height kdt < height kdt'" by simp
+  have "min_height kdt + 1 = height kdt"
+    using min_height_le_height[of kdt] assms(1) False
+    by (simp add: complete_iff_height balanced_def)
+  with * show ?thesis by arith
+qed
+
+
+subsection \<open>Lemmas adapted from \<open>HOL-Library.Tree_Real\<close> to \<open>k\<close>-d Tree\<close>
+
+lemma size_height_log:
+  "log 2 (size_kdt kdt) \<le> height kdt"
+  by (simp add: log2_of_power_le size_height)
+
+lemma min_height_size_log:
+  "min_height kdt \<le> log 2 (size_kdt kdt)"
+  by (simp add: le_log2_of_power min_height_size)
+
+lemma size_log_if_complete:
+  "complete kdt \<Longrightarrow> height kdt = log 2 (size_kdt kdt)"
+  using complete_iff_size log2_of_power_eq by blast
+
+lemma min_height_size_log_if_incomplete:
+  "\<not> complete kdt \<Longrightarrow> min_height kdt < log 2 (size_kdt kdt)"
+  by (simp add: less_log2_of_power min_height_size_if_incomplete)
+
+lemma min_height_balanced:
+  assumes "balanced kdt"
+  shows "min_height kdt = nat(floor(log 2 (size_kdt kdt)))"
+proof cases
+  assume *: "complete kdt"
+  hence "size_kdt kdt = 2 ^ min_height kdt"
+    by (simp add: complete_iff_height size_if_complete)
+  from log2_of_power_eq[OF this] show ?thesis by linarith
+next
+  assume *: "\<not> complete kdt"
+  hence "height kdt = min_height kdt + 1"
+    using assms min_height_le_height[of kdt]
+    by(auto simp add: balanced_def complete_iff_height)
+  hence "size_kdt kdt < 2 ^ (min_height kdt + 1)"
+    by (metis * size_height_if_incomplete)
+  hence "log 2 (size_kdt kdt) < min_height kdt + 1"
+    using log2_of_power_less size_ge0 by blast
+  thus ?thesis using min_height_size_log[of kdt] by linarith
+qed
+
+lemma height_balanced:
+  assumes "balanced kdt"
+  shows "height kdt = nat(ceiling(log 2 (size_kdt kdt)))"
+proof cases
+  assume *: "complete kdt"
+  hence "size_kdt kdt = 2 ^ height kdt"
+    by (simp add: size_if_complete)
+  from log2_of_power_eq[OF this] show ?thesis
+    by linarith
+next
+  assume *: "\<not> complete kdt"
+  hence **: "height kdt = min_height kdt + 1"
+    using assms min_height_le_height[of kdt]
+    by(auto simp add: balanced_def complete_iff_height)
+  hence "size_kdt kdt \<le> 2 ^ (min_height kdt + 1)" by (metis size_height)
+  from  log2_of_power_le[OF this size_ge0] min_height_size_log_if_incomplete[OF *] **
+  show ?thesis by linarith
+qed
+
+lemma balanced_Node_if_wbal1:
+  assumes "balanced l" "balanced r" "size_kdt l = size_kdt r + 1"
+  shows "balanced (Node k v l r)"
+proof -
+  from assms(3) have [simp]: "size_kdt l = size_kdt r + 1" by simp
+  have "nat \<lceil>log 2 (1 + size_kdt r)\<rceil> \<ge> nat \<lceil>log 2 (size_kdt r)\<rceil>"
+    by(rule nat_mono[OF ceiling_mono]) simp
+  hence 1: "height(Node k v l r) = nat \<lceil>log 2 (1 + size_kdt r)\<rceil> + 1"
+    using height_balanced[OF assms(1)] height_balanced[OF assms(2)]
+    by (simp del: nat_ceiling_le_eq add: max_def)
+  have "nat \<lfloor>log 2 (1 + size_kdt r)\<rfloor> \<ge> nat \<lfloor>log 2 (size_kdt r)\<rfloor>"
+    by(rule nat_mono[OF floor_mono]) simp
+  hence 2: "min_height(Node k v l r) = nat \<lfloor>log 2 (size_kdt r)\<rfloor> + 1"
+    using min_height_balanced[OF assms(1)] min_height_balanced[OF assms(2)]
+    by (simp)
+  have "size_kdt r \<ge> 1" by (simp add: Suc_leI)
+  then obtain i where i: "2 ^ i \<le> size_kdt r" "size_kdt r < 2 ^ (i + 1)"
+    using ex_power_ivl1[of 2 "size_kdt r"] by auto
+  hence i1: "2 ^ i < size_kdt r + 1" "size_kdt r + 1 \<le> 2 ^ (i + 1)" by auto
+  from 1 2 floor_log_nat_eq_if[OF i] ceiling_log_nat_eq_if[OF i1]
+  show ?thesis by(simp add:balanced_def)
+qed
+
+lemma balanced_sym:
+  "balanced (Node k v l r) \<Longrightarrow> balanced (Node k' v' r l)"
+  by (auto simp: balanced_def)
+
+lemma balanced_Node_if_wbal2:
+  assumes "balanced l" "balanced r" "abs(int(size_kdt l) - int(size_kdt r)) \<le> 1"
+  shows "balanced (Node k v l r)"
+proof -
+  have "size_kdt l = size_kdt r \<or> (size_kdt l = size_kdt r + 1 \<or> size_kdt r = size_kdt l + 1)" (is "?A \<or> ?B")
+    using assms(3) by linarith
+  thus ?thesis
+  proof
+    assume "?A"
+    thus ?thesis using assms(1,2)
+      apply(simp add: balanced_def min_def max_def)
+      by (metis assms(1,2) balanced_optimal le_antisym le_less)
+  next
+    assume "?B"
+    thus ?thesis
+      by (meson assms(1,2) balanced_sym balanced_Node_if_wbal1)
+  qed
+qed
+
+end