diff --git a/src/HOL/Data_Structures/AVL_Set.thy b/src/HOL/Data_Structures/AVL_Set.thy --- a/src/HOL/Data_Structures/AVL_Set.thy +++ b/src/HOL/Data_Structures/AVL_Set.thy @@ -1,452 +1,353 @@ (* Author: Tobias Nipkow, Daniel Stüwe Based on the AFP entry AVL. *) -section "AVL Tree Implementation of Sets" +subsection \Invariant\ theory AVL_Set imports - Cmp - Isin2 + AVL_Set_Code "HOL-Number_Theory.Fib" begin -type_synonym 'a avl_tree = "('a*nat) tree" - -definition empty :: "'a avl_tree" where -"empty = Leaf" - -text \Invariant:\ - fun avl :: "'a avl_tree \ bool" where "avl Leaf = True" | "avl (Node l (a,n) r) = ((height l = height r \ height l = height r + 1 \ height r = height l + 1) \ n = max (height l) (height r) + 1 \ avl l \ avl r)" -fun ht :: "'a avl_tree \ nat" where -"ht Leaf = 0" | -"ht (Node l (a,n) r) = n" - -definition node :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where -"node l a r = Node l (a, max (ht l) (ht r) + 1) r" - -definition balL :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where -"balL AB b C = - (if ht AB = ht C + 2 then - case AB of - Node A (a, _) B \ - if ht A \ ht B then node A a (node B b C) - else - case B of - Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) - else node AB b C)" - -definition balR :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where -"balR A a BC = - (if ht BC = ht A + 2 then - case BC of - Node B (b, _) C \ - if ht B \ ht C then node (node A a B) b C - else - case B of - Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) - else node A a BC)" - -fun insert :: "'a::linorder \ 'a avl_tree \ 'a avl_tree" where -"insert x Leaf = Node Leaf (x, 1) Leaf" | -"insert x (Node l (a, n) r) = (case cmp x a of - EQ \ Node l (a, n) r | - LT \ balL (insert x l) a r | - GT \ balR l a (insert x r))" - -fun split_max :: "'a avl_tree \ 'a avl_tree * 'a" where -"split_max (Node l (a, _) r) = - (if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))" - -lemmas split_max_induct = split_max.induct[case_names Node Leaf] - -fun delete :: "'a::linorder \ 'a avl_tree \ 'a avl_tree" where -"delete _ Leaf = Leaf" | -"delete x (Node l (a, n) r) = - (case cmp x a of - EQ \ if l = Leaf then r - else let (l', a') = split_max l in balR l' a' r | - LT \ balR (delete x l) a r | - GT \ balL l a (delete x r))" - - -subsection \Functional Correctness Proofs\ - -text\Very different from the AFP/AVL proofs\ - - -subsubsection "Proofs for insert" - -lemma inorder_balL: - "inorder (balL l a r) = inorder l @ a # inorder r" -by (auto simp: node_def balL_def split:tree.splits) - -lemma inorder_balR: - "inorder (balR l a r) = inorder l @ a # inorder r" -by (auto simp: node_def balR_def split:tree.splits) - -theorem inorder_insert: - "sorted(inorder t) \ inorder(insert x t) = ins_list x (inorder t)" -by (induct t) - (auto simp: ins_list_simps inorder_balL inorder_balR) - - -subsubsection "Proofs for delete" - -lemma inorder_split_maxD: - "\ split_max t = (t',a); t \ Leaf \ \ - inorder t' @ [a] = inorder t" -by(induction t arbitrary: t' rule: split_max.induct) - (auto simp: inorder_balL split: if_splits prod.splits tree.split) - -theorem inorder_delete: - "sorted(inorder t) \ inorder (delete x t) = del_list x (inorder t)" -by(induction t) - (auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits) - - -subsection \AVL invariants\ - -text\Essentially the AFP/AVL proofs\ - subsubsection \Insertion maintains AVL balance\ declare Let_def [simp] lemma ht_height[simp]: "avl t \ ht t = height t" by (cases t rule: tree2_cases) simp_all text \First, a fast but relatively manual proof with many lemmas:\ lemma height_balL: "\ avl l; avl r; height l = height r + 2 \ \ height (balL l a r) = height r + 2 \ height (balL l a r) = height r + 3" by (cases l) (auto simp:node_def balL_def split:tree.split) lemma height_balR: "\ avl l; avl r; height r = height l + 2 \ \ height (balR l a r) = height l + 2 \ height (balR l a r) = height l + 3" by (cases r) (auto simp add:node_def balR_def split:tree.split) lemma height_node[simp]: "height(node l a r) = max (height l) (height r) + 1" by (simp add: node_def) lemma height_balL2: "\ avl l; avl r; height l \ height r + 2 \ \ height (balL l a r) = 1 + max (height l) (height r)" by (cases l, cases r) (simp_all add: balL_def) lemma height_balR2: "\ avl l; avl r; height r \ height l + 2 \ \ height (balR l a r) = 1 + max (height l) (height r)" by (cases l, cases r) (simp_all add: balR_def) lemma avl_balL: "\ avl l; avl r; height r - 1 \ height l \ height l \ height r + 2 \ \ avl(balL l a r)" by(cases l, cases r) (auto simp: balL_def node_def split!: if_split tree.split) lemma avl_balR: "\ avl l; avl r; height l - 1 \ height r \ height r \ height l + 2 \ \ avl(balR l a r)" by(cases r, cases l) (auto simp: balR_def node_def split!: if_split tree.split) text\Insertion maintains the AVL property. Requires simultaneous proof.\ theorem avl_insert: assumes "avl t" shows "avl(insert x t)" "(height (insert x t) = height t \ height (insert x t) = height t + 1)" using assms proof (induction t rule: tree2_induct) case (Node l a _ r) case 1 show ?case proof(cases "x = a") case True with Node 1 show ?thesis by simp next case False show ?thesis proof(cases "xx\a\ show ?thesis by (auto simp add:avl_balR) qed qed case 2 show ?case proof(cases "x = a") case True with Node 1 show ?thesis by simp next case False show ?thesis proof(cases "xx < a\ show ?thesis by (auto simp: height_balL2) next case True hence "(height (balL (insert x l) a r) = height r + 2) \ (height (balL (insert x l) a r) = height r + 3)" (is "?A \ ?B") using Node 2 by (intro height_balL) simp_all thus ?thesis proof assume ?A with 2 \x < a\ show ?thesis by (auto) next assume ?B with True 1 Node(2) \x < a\ show ?thesis by (simp) arith qed qed next case False show ?thesis proof(cases "height (insert x r) = height l + 2") case False with Node 2 \\x < a\ show ?thesis by (auto simp: height_balR2) next case True hence "(height (balR l a (insert x r)) = height l + 2) \ (height (balR l a (insert x r)) = height l + 3)" (is "?A \ ?B") using Node 2 by (intro height_balR) simp_all thus ?thesis proof assume ?A with 2 \\x < a\ show ?thesis by (auto) next assume ?B with True 1 Node(4) \\x < a\ show ?thesis by (simp) arith qed qed qed qed qed simp_all text \Now an automatic proof without lemmas:\ theorem avl_insert_auto: "avl t \ avl(insert x t) \ height (insert x t) \ {height t, height t + 1}" apply (induction t rule: tree2_induct) (* if you want to save a few secs: apply (auto split!: if_split) *) apply (auto simp: balL_def balR_def node_def max_absorb2 split!: if_split tree.split) done subsubsection \Deletion maintains AVL balance\ lemma avl_split_max: "\ avl t; t \ Leaf \ \ avl (fst (split_max t)) \ height t \ {height(fst (split_max t)), height(fst (split_max t)) + 1}" by(induct t rule: split_max_induct) (auto simp: balL_def node_def max_absorb2 split!: prod.split if_split tree.split) text\Deletion maintains the AVL property:\ theorem avl_delete: assumes "avl t" shows "avl(delete x t)" and "height t = (height (delete x t)) \ height t = height (delete x t) + 1" using assms proof (induct t rule: tree2_induct) case (Node l a n r) case 1 thus ?case using Node avl_split_max[of l] by (auto simp: avl_balL avl_balR split: prod.split) case 2 show ?case proof(cases "x = a") case True then show ?thesis using 1 avl_split_max[of l] by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) next case False show ?thesis proof(cases "xx < a\ show ?thesis by(auto simp: balR_def) next case True hence "(height (balR (delete x l) a r) = height (delete x l) + 2) \ height (balR (delete x l) a r) = height (delete x l) + 3" (is "?A \ ?B") using Node 2 by (intro height_balR) auto thus ?thesis proof assume ?A with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) next assume ?B with \x < a\ Node 2 show ?thesis by(auto simp: balR_def) qed qed next case False show ?thesis proof(cases "height l = height (delete x r) + 2") case False with Node 1 \\x < a\ \x \ a\ show ?thesis by(auto simp: balL_def) next case True hence "(height (balL l a (delete x r)) = height (delete x r) + 2) \ height (balL l a (delete x r)) = height (delete x r) + 3" (is "?A \ ?B") using Node 2 by (intro height_balL) auto thus ?thesis proof assume ?A with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) next assume ?B with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def) qed qed qed qed qed simp_all text \A more automatic proof. Complete automation as for insertion seems hard due to resource requirements.\ theorem avl_delete_auto: assumes "avl t" shows "avl(delete x t)" and "height t \ {height (delete x t), height (delete x t) + 1}" using assms proof (induct t rule: tree2_induct) case (Node l a n r) case 1 show ?case proof(cases "x = a") case True thus ?thesis using 1 avl_split_max[of l] by (auto simp: avl_balR split: prod.split) next case False thus ?thesis using Node 1 by (auto simp: avl_balL avl_balR) qed case 2 show ?case proof(cases "x = a") case True thus ?thesis using 1 avl_split_max[of l] by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) next case False thus ?thesis using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] Node 1 by(auto simp: balL_def balR_def split!: if_split) qed qed simp_all subsection "Overall correctness" interpretation S: Set_by_Ordered where empty = empty and isin = isin and insert = insert and delete = delete and inorder = inorder and inv = avl proof (standard, goal_cases) case 1 show ?case by (simp add: empty_def) next case 2 thus ?case by(simp add: isin_set_inorder) next case 3 thus ?case by(simp add: inorder_insert) next case 4 thus ?case by(simp add: inorder_delete) next case 5 thus ?case by (simp add: empty_def) next case 6 thus ?case by (simp add: avl_insert(1)) next case 7 thus ?case by (simp add: avl_delete(1)) qed subsection \Height-Size Relation\ text \Based on theorems by Daniel St\"uwe, Manuel Eberl and Peter Lammich, much simplified.\ text \Any AVL tree of height \n\ has at least \fib (n+2)\ leaves:\ lemma avl_fib_bound: "avl t \ height t = n \ fib (n+2) \ size1 t" proof (induction n arbitrary: t rule: fib.induct) case 1 thus ?case by (simp) next case 2 thus ?case by (cases t) (auto) next case (3 h) from "3.prems" obtain l a r where [simp]: "t = Node l (a,Suc(Suc h)) r" "avl l" "avl r" and C: " height r = Suc h \ height l = Suc h \ height r = Suc h \ height l = h \ height r = h \ height l = Suc h" (is "?C1 \ ?C2 \ ?C3") by (cases t) (simp, fastforce) { assume ?C1 with "3.IH"(1) have "fib (h + 3) \ size1 l" "fib (h + 3) \ size1 r" by (simp_all add: eval_nat_numeral) hence ?case by (auto simp: eval_nat_numeral) } moreover { assume ?C2 hence ?case using "3.IH"(1)[of r] "3.IH"(2)[of l] by auto } moreover { assume ?C3 hence ?case using "3.IH"(1)[of l] "3.IH"(2)[of r] by auto } ultimately show ?case using C by blast qed text \An exponential lower bound for \<^const>\fib\:\ lemma fib_lowerbound: defines "\ \ (1 + sqrt 5) / 2" shows "real (fib(n+2)) \ \ ^ n" proof (induction n rule: fib.induct) case 1 then show ?case by simp next case 2 then show ?case by (simp add: \_def real_le_lsqrt) next case (3 n) term ?case have "\ ^ Suc (Suc n) = \ ^ 2 * \ ^ n" by (simp add: field_simps power2_eq_square) also have "\ = (\ + 1) * \ ^ n" by (simp_all add: \_def power2_eq_square field_simps) also have "\ = \ ^ Suc n + \ ^ n" by (simp add: field_simps) also have "\ \ real (fib (Suc n + 2)) + real (fib (n + 2))" by (intro add_mono "3.IH") finally show ?case by simp qed text \The size of an AVL tree is (at least) exponential in its height:\ lemma avl_size_lowerbound: defines "\ \ (1 + sqrt 5) / 2" assumes "avl t" shows "\ ^ (height t) \ size1 t" proof - have "\ ^ height t \ fib (height t + 2)" unfolding \_def by(rule fib_lowerbound) also have "\ \ size1 t" using avl_fib_bound[of t "height t"] assms by simp finally show ?thesis . qed text \The height of an AVL tree is most \<^term>\(1/log 2 \)\ \\ 1.44\ times worse than \<^term>\log 2 (size1 t)\:\ lemma avl_height_upperbound: defines "\ \ (1 + sqrt 5) / 2" assumes "avl t" shows "height t \ (1/log 2 \) * log 2 (size1 t)" proof - have "\ > 0" "\ > 1" by(auto simp: \_def pos_add_strict) hence "height t = log \ (\ ^ height t)" by(simp add: log_nat_power) also have "\ \ log \ (size1 t)" using avl_size_lowerbound[OF assms(2), folded \_def] \1 < \\ by (simp add: le_log_of_power) also have "\ = (1/log 2 \) * log 2 (size1 t)" by(simp add: log_base_change[of 2 \]) finally show ?thesis . qed end diff --git a/src/HOL/Data_Structures/AVL_Set_Code.thy b/src/HOL/Data_Structures/AVL_Set_Code.thy new file mode 100644 --- /dev/null +++ b/src/HOL/Data_Structures/AVL_Set_Code.thy @@ -0,0 +1,107 @@ +(* +Author: Tobias Nipkow, Daniel Stüwe +Based on the AFP entry AVL. +*) + +section "AVL Tree Implementation of Sets" + +theory AVL_Set_Code +imports + Cmp + Isin2 +begin + +subsection \Code\ + +type_synonym 'a avl_tree = "('a*nat) tree" + +definition empty :: "'a avl_tree" where +"empty = Leaf" + +fun ht :: "'a avl_tree \ nat" where +"ht Leaf = 0" | +"ht (Node l (a,n) r) = n" + +definition node :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where +"node l a r = Node l (a, max (ht l) (ht r) + 1) r" + +definition balL :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where +"balL AB b C = + (if ht AB = ht C + 2 then + case AB of + Node A (a, _) B \ + if ht A \ ht B then node A a (node B b C) + else + case B of + Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) + else node AB b C)" + +definition balR :: "'a avl_tree \ 'a \ 'a avl_tree \ 'a avl_tree" where +"balR A a BC = + (if ht BC = ht A + 2 then + case BC of + Node B (b, _) C \ + if ht B \ ht C then node (node A a B) b C + else + case B of + Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) + else node A a BC)" + +fun insert :: "'a::linorder \ 'a avl_tree \ 'a avl_tree" where +"insert x Leaf = Node Leaf (x, 1) Leaf" | +"insert x (Node l (a, n) r) = (case cmp x a of + EQ \ Node l (a, n) r | + LT \ balL (insert x l) a r | + GT \ balR l a (insert x r))" + +fun split_max :: "'a avl_tree \ 'a avl_tree * 'a" where +"split_max (Node l (a, _) r) = + (if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))" + +lemmas split_max_induct = split_max.induct[case_names Node Leaf] + +fun delete :: "'a::linorder \ 'a avl_tree \ 'a avl_tree" where +"delete _ Leaf = Leaf" | +"delete x (Node l (a, n) r) = + (case cmp x a of + EQ \ if l = Leaf then r + else let (l', a') = split_max l in balR l' a' r | + LT \ balR (delete x l) a r | + GT \ balL l a (delete x r))" + + +subsection \Functional Correctness Proofs\ + +text\Very different from the AFP/AVL proofs\ + + +subsubsection "Proofs for insert" + +lemma inorder_balL: + "inorder (balL l a r) = inorder l @ a # inorder r" +by (auto simp: node_def balL_def split:tree.splits) + +lemma inorder_balR: + "inorder (balR l a r) = inorder l @ a # inorder r" +by (auto simp: node_def balR_def split:tree.splits) + +theorem inorder_insert: + "sorted(inorder t) \ inorder(insert x t) = ins_list x (inorder t)" +by (induct t) + (auto simp: ins_list_simps inorder_balL inorder_balR) + + +subsubsection "Proofs for delete" + +lemma inorder_split_maxD: + "\ split_max t = (t',a); t \ Leaf \ \ + inorder t' @ [a] = inorder t" +by(induction t arbitrary: t' rule: split_max.induct) + (auto simp: inorder_balL split: if_splits prod.splits tree.split) + +theorem inorder_delete: + "sorted(inorder t) \ inorder (delete x t) = del_list x (inorder t)" +by(induction t) + (auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits) + +end diff --git a/src/HOL/Data_Structures/Tree2.thy b/src/HOL/Data_Structures/Tree2.thy --- a/src/HOL/Data_Structures/Tree2.thy +++ b/src/HOL/Data_Structures/Tree2.thy @@ -1,37 +1,39 @@ +section \Augmented Tree (Tree2)\ + theory Tree2 imports "HOL-Library.Tree" begin text \This theory provides the basic infrastructure for the type @{typ \('a * 'b) tree\} of augmented trees where @{typ 'a} is the key and @{typ 'b} some additional information.\ text \IMPORTANT: Inductions and cases analyses on augmented trees need to use the following two rules explicitly. They generate nodes of the form @{term "Node l (a,b) r"} rather than @{term "Node l a r"} for trees of type @{typ "'a tree"}.\ lemmas tree2_induct = tree.induct[where 'a = "'a * 'b", split_format(complete)] lemmas tree2_cases = tree.exhaust[where 'a = "'a * 'b", split_format(complete)] fun inorder :: "('a*'b)tree \ 'a list" where "inorder Leaf = []" | "inorder (Node l (a,_) r) = inorder l @ a # inorder r" fun set_tree :: "('a*'b) tree \ 'a set" where "set_tree Leaf = {}" | "set_tree (Node l (a,_) r) = Set.insert a (set_tree l \ set_tree r)" fun bst :: "('a::linorder*'b) tree \ bool" where "bst Leaf = True" | "bst (Node l (a, _) r) = (bst l \ bst r \ (\x \ set_tree l. x < a) \ (\x \ set_tree r. a < x))" lemma finite_set_tree[simp]: "finite(set_tree t)" by(induction t) auto lemma eq_set_tree_empty[simp]: "set_tree t = {} \ t = Leaf" by (cases t) auto lemma set_inorder[simp]: "set (inorder t) = set_tree t" by (induction t) auto end