diff --git a/src/HOL/Data_Structures/AVL_Map.thy b/src/HOL/Data_Structures/AVL_Map.thy --- a/src/HOL/Data_Structures/AVL_Map.thy +++ b/src/HOL/Data_Structures/AVL_Map.thy @@ -1,201 +1,186 @@ (* Author: Tobias Nipkow *) section "AVL Tree Implementation of Maps" theory AVL_Map imports AVL_Set Lookup2 begin fun update :: "'a::linorder \ 'b \ ('a*'b) avl_tree \ ('a*'b) avl_tree" where "update x y Leaf = Node Leaf ((x,y), 1) Leaf" | "update x y (Node l ((a,b), h) r) = (case cmp x a of EQ \ Node l ((x,y), h) r | LT \ balL (update x y l) (a,b) r | GT \ balR l (a,b) (update x y r))" fun delete :: "'a::linorder \ ('a*'b) avl_tree \ ('a*'b) avl_tree" where "delete _ Leaf = Leaf" | "delete x (Node l ((a,b), h) r) = (case cmp x a of EQ \ if l = Leaf then r else let (l', ab') = split_max l in balR l' ab' r | LT \ balR (delete x l) (a,b) r | GT \ balL l (a,b) (delete x r))" subsection \Functional Correctness\ theorem inorder_update: "sorted1(inorder t) \ inorder(update x y t) = upd_list x y (inorder t)" by (induct t) (auto simp: upd_list_simps inorder_balL inorder_balR) theorem inorder_delete: "sorted1(inorder t) \ inorder (delete x t) = del_list x (inorder t)" by(induction t) (auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits) subsection \AVL invariants\ subsubsection \Insertion maintains AVL balance\ theorem avl_update: assumes "avl t" shows "avl(update x y t)" "(height (update x y t) = height t \ height (update x y t) = height t + 1)" using assms proof (induction x y t rule: update.induct) case eq2: (2 x y l a b h r) case 1 show ?case proof(cases "x = a") case True with eq2 1 show ?thesis by simp next case False with eq2 1 show ?thesis proof(cases "xx\a\ show ?thesis by (auto simp add:avl_balR) + case False with eq2 1 \x\a\ show ?thesis by (auto intro!: avl_balR) qed qed case 2 show ?case proof(cases "x = a") case True with eq2 1 show ?thesis by simp next case False show ?thesis proof(cases "xx < a\ show ?thesis by (auto simp: height_balL2) next case True hence "(height (balL (update x y l) (a,b) r) = height r + 2) \ (height (balL (update x y l) (a,b) r) = height r + 3)" (is "?A \ ?B") - using eq2 2 \x by (intro height_balL) simp_all + using eq2 2 \x height_balL[OF _ _ True] by simp thus ?thesis proof assume ?A with 2 \x < a\ show ?thesis by (auto) next assume ?B with True 1 eq2(2) \x < a\ show ?thesis by (simp) arith qed qed next case False show ?thesis proof(cases "height (update x y r) = height l + 2") case False with eq2 2 \\x < a\ show ?thesis by (auto simp: height_balR2) next case True hence "(height (balR l (a,b) (update x y r)) = height l + 2) \ (height (balR l (a,b) (update x y r)) = height l + 3)" (is "?A \ ?B") - using eq2 2 \\x < a\ \x \ a\ by (intro height_balR) simp_all + using eq2 2 \\x < a\ \x \ a\ height_balR[OF _ _ True] by simp thus ?thesis proof assume ?A with 2 \\x < a\ show ?thesis by (auto) next assume ?B with True 1 eq2(4) \\x < a\ show ?thesis by (simp) arith qed qed qed qed qed simp_all subsubsection \Deletion maintains AVL balance\ theorem avl_delete: assumes "avl t" shows "avl(delete x t)" and "height t = (height (delete x t)) \ height t = height (delete x t) + 1" using assms proof (induct t rule: tree2_induct) - case (Node l n h r) - obtain a b where [simp]: "n = (a,b)" by fastforce + case (Node l ab h r) + obtain a b where [simp]: "ab = (a,b)" by fastforce case 1 show ?case proof(cases "x = a") case True with Node 1 show ?thesis - using avl_split_max[of l] by (auto simp: avl_balR split: prod.split) + using avl_split_max[of l] by (auto intro!: avl_balR split: prod.split) next case False show ?thesis proof(cases "xx\a\ show ?thesis by (auto simp add:avl_balL) + case False with Node 1 \x\a\ show ?thesis by (auto intro!: avl_balL) qed qed case 2 show ?case proof(cases "x = a") case True then show ?thesis using 1 avl_split_max[of l] by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) next case False show ?thesis proof(cases "xx < a\ show ?thesis by(auto simp: balR_def) next case True - hence "(height (balR (delete x l) n r) = height (delete x l) + 2) \ - height (balR (delete x l) n r) = height (delete x l) + 3" (is "?A \ ?B") - using Node 2 by (intro height_balR) auto - thus ?thesis - proof - assume ?A with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) - next - assume ?B with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) - qed + thus ?thesis using height_balR[OF _ _ True, of ab] 2 Node(1,2) \x < a\ by simp linarith qed next case False show ?thesis proof(cases "height l = height (delete x r) + 2") case False with Node 1 \\x < a\ \x \ a\ show ?thesis by(auto simp: balL_def) next - case True - hence "(height (balL l n (delete x r)) = height (delete x r) + 2) \ - height (balL l n (delete x r)) = height (delete x r) + 3" (is "?A \ ?B") - using Node 2 by (intro height_balL) auto - thus ?thesis - proof - assume ?A with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) - next - assume ?B with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) - qed + case True + thus ?thesis + using height_balL[OF _ _ True, of ab] 2 Node(3,4) \\x < a\ \x \ a\ by auto qed qed qed qed simp_all interpretation M: Map_by_Ordered where empty = empty and lookup = lookup and update = update and delete = delete and inorder = inorder and inv = avl proof (standard, goal_cases) case 1 show ?case by (simp add: empty_def) next case 2 thus ?case by(simp add: lookup_map_of) next case 3 thus ?case by(simp add: inorder_update) next case 4 thus ?case by(simp add: inorder_delete) next case 5 show ?case by (simp add: empty_def) next case 6 thus ?case by(simp add: avl_update(1)) next case 7 thus ?case by(simp add: avl_delete(1)) qed end diff --git a/src/HOL/Data_Structures/AVL_Set.thy b/src/HOL/Data_Structures/AVL_Set.thy --- a/src/HOL/Data_Structures/AVL_Set.thy +++ b/src/HOL/Data_Structures/AVL_Set.thy @@ -1,352 +1,358 @@ -(* -Author: Tobias Nipkow -*) +(* Author: Tobias Nipkow *) -subsection \Invariant expressed via 3-way cases\ +subsection \Invariant\ theory AVL_Set imports AVL_Set_Code "HOL-Number_Theory.Fib" begin fun avl :: "'a avl_tree \ bool" where "avl Leaf = True" | "avl (Node l (a,n) r) = - ((height l = height r \ height l = height r + 1 \ height r = height l + 1) \ + (abs(int(height l) - int(height r)) \ 1 \ n = max (height l) (height r) + 1 \ avl l \ avl r)" - subsubsection \Insertion maintains AVL balance\ declare Let_def [simp] lemma ht_height[simp]: "avl t \ ht t = height t" by (cases t rule: tree2_cases) simp_all text \First, a fast but relatively manual proof with many lemmas:\ lemma height_balL: "\ avl l; avl r; height l = height r + 2 \ \ - height (balL l a r) = height r + 2 \ - height (balL l a r) = height r + 3" -by (cases l) (auto simp:node_def balL_def split:tree.split) - + height (balL l a r) \ {height r + 2, height r + 3}" +apply (cases l) + apply (auto simp:node_def balL_def split:tree.split) + by arith+ + lemma height_balR: "\ avl l; avl r; height r = height l + 2 \ \ - height (balR l a r) = height l + 2 \ - height (balR l a r) = height l + 3" -by (cases r) (auto simp add:node_def balR_def split:tree.split) + height (balR l a r) : {height l + 2, height l + 3}" +apply (cases r) + apply(auto simp add:node_def balR_def split:tree.split) + by arith+ lemma height_node[simp]: "height(node l a r) = max (height l) (height r) + 1" by (simp add: node_def) lemma height_balL2: "\ avl l; avl r; height l \ height r + 2 \ \ height (balL l a r) = 1 + max (height l) (height r)" by (cases l, cases r) (simp_all add: balL_def) lemma height_balR2: "\ avl l; avl r; height r \ height l + 2 \ \ height (balR l a r) = 1 + max (height l) (height r)" by (cases l, cases r) (simp_all add: balR_def) lemma avl_balL: "\ avl l; avl r; height r - 1 \ height l \ height l \ height r + 2 \ \ avl(balL l a r)" by(cases l, cases r) (auto simp: balL_def node_def split!: if_split tree.split) lemma avl_balR: "\ avl l; avl r; height l - 1 \ height r \ height r \ height l + 2 \ \ avl(balR l a r)" by(cases r, cases l) (auto simp: balR_def node_def split!: if_split tree.split) text\Insertion maintains the AVL property. Requires simultaneous proof.\ theorem avl_insert: - assumes "avl t" - shows "avl(insert x t)" - "(height (insert x t) = height t \ height (insert x t) = height t + 1)" -using assms + "avl t \ avl(insert x t)" + "avl t \ height (insert x t) \ {height t, height t + 1}" proof (induction t rule: tree2_induct) case (Node l a _ r) case 1 show ?case proof(cases "x = a") - case True with Node 1 show ?thesis by simp + case True with 1 show ?thesis by simp next case False show ?thesis proof(cases "xx\a\ show ?thesis by (auto simp add:avl_balR) + case False with 1 Node(3,4) \x\a\ show ?thesis by (auto intro!:avl_balR) qed qed case 2 show ?case proof(cases "x = a") - case True with Node 1 show ?thesis by simp + case True with 2 show ?thesis by simp next case False show ?thesis proof(cases "xx < a\ show ?thesis by (auto simp: height_balL2) + case False with 2 Node(1,2) \x < a\ show ?thesis by (auto simp: height_balL2) next case True hence "(height (balL (insert x l) a r) = height r + 2) \ (height (balL (insert x l) a r) = height r + 3)" (is "?A \ ?B") - using Node 2 by (intro height_balL) simp_all + using 2 Node(1,2) height_balL[OF _ _ True] by simp thus ?thesis proof assume ?A with 2 \x < a\ show ?thesis by (auto) next - assume ?B with True 1 Node(2) \x < a\ show ?thesis by (simp) arith + assume ?B with 2 Node(2) True \x < a\ show ?thesis by (simp) arith qed qed next case False - show ?thesis + show ?thesis proof(cases "height (insert x r) = height l + 2") - case False with Node 2 \\x < a\ show ?thesis by (auto simp: height_balR2) + case False with 2 Node(3,4) \\x < a\ show ?thesis by (auto simp: height_balR2) next case True hence "(height (balR l a (insert x r)) = height l + 2) \ (height (balR l a (insert x r)) = height l + 3)" (is "?A \ ?B") - using Node 2 by (intro height_balR) simp_all - thus ?thesis + using 2 Node(3) height_balR[OF _ _ True] by simp + thus ?thesis proof assume ?A with 2 \\x < a\ show ?thesis by (auto) next - assume ?B with True 1 Node(4) \\x < a\ show ?thesis by (simp) arith + assume ?B with 2 Node(4) True \\x < a\ show ?thesis by (simp) arith qed qed qed qed qed simp_all text \Now an automatic proof without lemmas:\ theorem avl_insert_auto: "avl t \ avl(insert x t) \ height (insert x t) \ {height t, height t + 1}" apply (induction t rule: tree2_induct) (* if you want to save a few secs: apply (auto split!: if_split) *) apply (auto simp: balL_def balR_def node_def max_absorb2 split!: if_split tree.split) done subsubsection \Deletion maintains AVL balance\ lemma avl_split_max: "\ avl t; t \ Leaf \ \ avl (fst (split_max t)) \ height t \ {height(fst (split_max t)), height(fst (split_max t)) + 1}" by(induct t rule: split_max_induct) (auto simp: balL_def node_def max_absorb2 split!: prod.split if_split tree.split) text\Deletion maintains the AVL property:\ theorem avl_delete: - assumes "avl t" - shows "avl(delete x t)" and "height t = (height (delete x t)) \ height t = height (delete x t) + 1" -using assms + "avl t \ avl(delete x t)" + "avl t \ height t \ {height (delete x t), height (delete x t) + 1}" proof (induct t rule: tree2_induct) case (Node l a n r) case 1 - thus ?case - using Node avl_split_max[of l] by (auto simp: avl_balL avl_balR split: prod.split) + show ?case + proof(cases "x = a") + case True thus ?thesis + using 1 avl_split_max[of l] by (auto intro!: avl_balR split: prod.split) + next + case False thus ?thesis + using Node 1 by (auto intro!: avl_balL avl_balR) + qed case 2 show ?case proof(cases "x = a") - case True then show ?thesis using 1 avl_split_max[of l] + case True thus ?thesis using 2 avl_split_max[of l] by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) next case False - show ?thesis + show ?thesis proof(cases "xx < a\ show ?thesis by(auto simp: balR_def) + case False + thus ?thesis using 2 Node(1,2) \x < a\ by(auto simp: balR_def) next - case True - hence "(height (balR (delete x l) a r) = height (delete x l) + 2) \ - height (balR (delete x l) a r) = height (delete x l) + 3" (is "?A \ ?B") - using Node 2 by (intro height_balR) auto - thus ?thesis - proof - assume ?A with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) - next - assume ?B with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) - qed + case True + thus ?thesis using height_balR[OF _ _ True, of a] 2 Node(1,2) \x < a\ by simp linarith qed next case False show ?thesis proof(cases "height l = height (delete x r) + 2") - case False with Node 1 \\x < a\ \x \ a\ show ?thesis by(auto simp: balL_def) + case False + thus ?thesis using 2 Node(3,4) \\x < a\ \x \ a\ by(auto simp: balL_def) next - case True - hence "(height (balL l a (delete x r)) = height (delete x r) + 2) \ - height (balL l a (delete x r)) = height (delete x r) + 3" (is "?A \ ?B") - using Node 2 by (intro height_balL) auto - thus ?thesis - proof - assume ?A with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) - next - assume ?B with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) - qed + case True + thus ?thesis + using height_balL[OF _ _ True, of a] 2 Node(3,4) \\x < a\ \x \ a\ by simp linarith qed qed qed qed simp_all text \A more automatic proof. Complete automation as for insertion seems hard due to resource requirements.\ theorem avl_delete_auto: - assumes "avl t" - shows "avl(delete x t)" and "height t \ {height (delete x t), height (delete x t) + 1}" -using assms + "avl t \ avl(delete x t)" + "avl t \ height t \ {height (delete x t), height (delete x t) + 1}" proof (induct t rule: tree2_induct) case (Node l a n r) case 1 - show ?case - proof(cases "x = a") - case True thus ?thesis - using 1 avl_split_max[of l] by (auto simp: avl_balR split: prod.split) - next - case False thus ?thesis - using Node 1 by (auto simp: avl_balL avl_balR) - qed + thus ?case + using Node avl_split_max[of l] by (auto intro!: avl_balL avl_balR split: prod.split) case 2 show ?case proof(cases "x = a") case True thus ?thesis - using 1 avl_split_max[of l] + using 2 avl_split_max[of l] by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) next case False thus ?thesis - using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] Node 1 + using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] 2 Node by(auto simp: balL_def balR_def split!: if_split) qed qed simp_all subsection "Overall correctness" interpretation S: Set_by_Ordered where empty = empty and isin = isin and insert = insert and delete = delete and inorder = inorder and inv = avl proof (standard, goal_cases) case 1 show ?case by (simp add: empty_def) next case 2 thus ?case by(simp add: isin_set_inorder) next case 3 thus ?case by(simp add: inorder_insert) next case 4 thus ?case by(simp add: inorder_delete) next case 5 thus ?case by (simp add: empty_def) next case 6 thus ?case by (simp add: avl_insert(1)) next case 7 thus ?case by (simp add: avl_delete(1)) qed subsection \Height-Size Relation\ -text \Based on theorems by Daniel St\"uwe, Manuel Eberl and Peter Lammich, much simplified.\ - text \Any AVL tree of height \n\ has at least \fib (n+2)\ leaves:\ -lemma avl_fib_bound: "avl t \ height t = n \ fib (n+2) \ size1 t" -proof (induction n arbitrary: t rule: fib.induct) - case 1 thus ?case by (simp) -next - case 2 thus ?case by (cases t) (auto) +theorem avl_fib_bound: + "avl t \ fib(height t + 2) \ size1 t" +proof (induction rule: tree2_induct) + case (Node l a h r) + have 1: "height l + 1 \ height r + 2" and 2: "height r + 1 \ height l + 2" + using Node.prems by auto + have "fib (max (height l) (height r) + 3) \ size1 l + size1 r" + proof cases + assume "height l \ height r" + hence "fib (max (height l) (height r) + 3) = fib (height l + 3)" + by(simp add: max_absorb1) + also have "\ = fib (height l + 2) + fib (height l + 1)" + by (simp add: numeral_eq_Suc) + also have "\ \ size1 l + fib (height l + 1)" + using Node by (simp) + also have "\ \ size1 r + size1 l" + using Node fib_mono[OF 1] by auto + also have "\ = size1 (Node l (a,h) r)" + by simp + finally show ?thesis + by (simp) + next + assume "\ height l \ height r" + hence "fib (max (height l) (height r) + 3) = fib (height r + 3)" + by(simp add: max_absorb1) + also have "\ = fib (height r + 2) + fib (height r + 1)" + by (simp add: numeral_eq_Suc) + also have "\ \ size1 r + fib (height r + 1)" + using Node by (simp) + also have "\ \ size1 r + size1 l" + using Node fib_mono[OF 2] by auto + also have "\ = size1 (Node l (a,h) r)" + by simp + finally show ?thesis + by (simp) + qed + also have "\ = size1 (Node l (a,h) r)" + by simp + finally show ?case by (simp del: fib.simps add: numeral_eq_Suc) +qed auto + +lemma avl_fib_bound_auto: "avl t \ fib (height t + 2) \ size1 t" +proof (induction t rule: tree2_induct) + case Leaf thus ?case by (simp) next - case (3 h) - from "3.prems" obtain l a r where - [simp]: "t = Node l (a,Suc(Suc h)) r" "avl l" "avl r" - and C: " - height r = Suc h \ height l = Suc h - \ height r = Suc h \ height l = h - \ height r = h \ height l = Suc h" (is "?C1 \ ?C2 \ ?C3") - by (cases t) (simp, fastforce) - { - assume ?C1 - with "3.IH"(1) - have "fib (h + 3) \ size1 l" "fib (h + 3) \ size1 r" - by (simp_all add: eval_nat_numeral) - hence ?case by (auto simp: eval_nat_numeral) - } moreover { - assume ?C2 - hence ?case using "3.IH"(1)[of r] "3.IH"(2)[of l] by auto - } moreover { - assume ?C3 - hence ?case using "3.IH"(1)[of l] "3.IH"(2)[of r] by auto - } ultimately show ?case using C by blast + case (Node l a h r) + have 1: "height l + 1 \ height r + 2" and 2: "height r + 1 \ height l + 2" + using Node.prems by auto + have left: "height l \ height r \ ?case" (is "?asm \ _") + using Node fib_mono[OF 1] by (simp add: max.absorb1) + have right: "height l \ height r \ ?case" + using Node fib_mono[OF 2] by (simp add: max.absorb2) + show ?case using left right using Node.prems by simp linarith qed text \An exponential lower bound for \<^const>\fib\:\ lemma fib_lowerbound: defines "\ \ (1 + sqrt 5) / 2" shows "real (fib(n+2)) \ \ ^ n" proof (induction n rule: fib.induct) case 1 then show ?case by simp next case 2 then show ?case by (simp add: \_def real_le_lsqrt) next case (3 n) term ?case have "\ ^ Suc (Suc n) = \ ^ 2 * \ ^ n" by (simp add: field_simps power2_eq_square) also have "\ = (\ + 1) * \ ^ n" by (simp_all add: \_def power2_eq_square field_simps) also have "\ = \ ^ Suc n + \ ^ n" by (simp add: field_simps) also have "\ \ real (fib (Suc n + 2)) + real (fib (n + 2))" by (intro add_mono "3.IH") finally show ?case by simp qed text \The size of an AVL tree is (at least) exponential in its height:\ lemma avl_size_lowerbound: defines "\ \ (1 + sqrt 5) / 2" assumes "avl t" shows "\ ^ (height t) \ size1 t" proof - have "\ ^ height t \ fib (height t + 2)" unfolding \_def by(rule fib_lowerbound) also have "\ \ size1 t" - using avl_fib_bound[of t "height t"] assms by simp + using avl_fib_bound[of t] assms by simp finally show ?thesis . qed text \The height of an AVL tree is most \<^term>\(1/log 2 \)\ \\ 1.44\ times worse than \<^term>\log 2 (size1 t)\:\ lemma avl_height_upperbound: defines "\ \ (1 + sqrt 5) / 2" assumes "avl t" shows "height t \ (1/log 2 \) * log 2 (size1 t)" proof - have "\ > 0" "\ > 1" by(auto simp: \_def pos_add_strict) hence "height t = log \ (\ ^ height t)" by(simp add: log_nat_power) also have "\ \ log \ (size1 t)" using avl_size_lowerbound[OF assms(2), folded \_def] \1 < \\ by (simp add: le_log_of_power) also have "\ = (1/log 2 \) * log 2 (size1 t)" by(simp add: log_base_change[of 2 \]) finally show ?thesis . qed end