diff --git a/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy b/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy --- a/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy +++ b/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy @@ -1,676 +1,686 @@ section \Knuth-Morris-Pratt fast string search algorithm\ text \Development based on FilliĆ¢tre's verification using Why3\ -text \many thanks to Christian Zimmerer for expressing the algorithms -in the form of while loops\ +text \Many thanks to Christian Zimmerer for versions of the algorithms +as while loops\ theory KnuthMorrisPratt imports "Collections.Diff_Array" "HOL-Library.While_Combinator" begin subsection \General definitions\ abbreviation "array \ new_array" abbreviation length_array :: "'a array \ nat" ("\_\") where "length_array \ array_length" notation array_get (infixl "!!" 100) notation array_set ("_[_ ::= _]" [1000,0,0] 900) definition matches :: "'a array \ nat \ 'a array \ nat \ nat \ bool" where "matches a i b j n = (i+n \ \a\ \ j+n \ \b\ \ (\k i \ \a\ \ j \ \b\" by (simp add: matches_def) lemma matches_right_extension: "\matches a i b j n; Suc (i+n) \ \a\; Suc (j+n) \ \b\; a!!(i+n) = b!!(j+n)\ \ matches a i b j (Suc n)" by (auto simp: matches_def less_Suc_eq) lemma matches_contradiction_at_first: "\0 < n; a!!i \ b!!j\ \ \ matches a i b j n" by (auto simp: matches_def) lemma matches_contradiction_at_i: "\a!!(i+k) \ b!!(j+k); k < n\ \ \ matches a i b j n" by (auto simp: matches_def) lemma matches_right_weakening: "\matches a i b j n; n' \ n\ \ matches a i b j n'" by (auto simp: matches_def) lemma matches_left_weakening_add: assumes "matches a i b j n" "k\n" shows "matches a (i+k) b (j+k) (n-k)" using assms by (auto simp: matches_def less_diff_conv algebra_simps) lemma matches_left_weakening: assumes "matches a (i - (n - n')) b (j - (n - n')) n" and "n' \ n" and "n - n' \ i" and "n - n' \ j" shows "matches a i b j n'" by (metis assms diff_diff_cancel diff_le_self le_add_diff_inverse2 matches_left_weakening_add) lemma matches_sym: "matches a i b j n \ matches b j a i n" by (simp add: matches_def) lemma matches_trans: "\matches a i b j n; matches b j c k n\ \ matches a i c k n" by (simp add: matches_def) text \Denotes the maximal @{term "n < j"} such that the first @{term n} elements of @{term p} match the last @{term n} elements of @{term p}[0..@{term "j-1"}] The first @{term n} characters of the pattern have a copy starting at @{term "j-n"}.\ definition is_next :: "'a array \ nat \ nat \ bool" where "is_next p j n = (n < j \ matches p (j-n) p 0 n \ (\m. n < m \ m < j \ \ matches p (j-m) p 0 m))" lemma next_iteration: assumes "matches a (i-j) p 0 j" "is_next p j n" "j \ i" shows "matches a (i-n) p 0 n" proof - have "matches a (i-n) p (j-n) n" using assms by (auto simp: algebra_simps is_next_def intro: matches_left_weakening [where n=j]) moreover have "matches p (j-n) p 0 n" using is_next_def assms by blast ultimately show ?thesis using matches_trans by blast qed lemma next_is_maximal: assumes "matches a (i-j) p 0 j" "is_next p j n" and "j \ i" "n < m" "m < j" shows "\ matches a (i-m) p 0 m" proof - have "matches a (i-m) p (j-m) m" by (rule matches_left_weakening [where n=j]) (use assms in auto) with assms show ?thesis by (meson is_next_def matches_sym matches_trans) qed text \FilliĆ¢tre's version of the lemma above\ corollary next_is_maximal': assumes match: "matches a (i-j) p 0 j" "is_next p j n" and more: "j \ i" "i-j < k" "k < i-n" shows "\ matches a k p 0 \p\" proof - have "\ matches a k p 0 (i-k)" using next_is_maximal [OF match] more by (metis add.commute diff_diff_cancel diff_le_self le_trans less_diff_conv less_or_eq_imp_le) moreover have "i-k < \p\" using assms by (auto simp: matches_def) ultimately show ?thesis using matches_right_weakening nless_le by blast qed lemma next_1_0 [simp]: "is_next p 1 0 \ 1 \ \p\" by (auto simp add: is_next_def matches_def) subsection \The Build-table routine\ definition buildtab_step :: "'a array \ nat array \ nat \ nat \ nat array \ nat \ nat" where "buildtab_step p nxt i j = (if p!!i = p!!j then (nxt[Suc i::=Suc j], Suc i, Suc j) else if j=0 then (nxt[Suc i::=0], Suc i, j) else (nxt, i, nxt!!j))" text \The conjunction of the invariants given in the Why3 development\ definition buildtab_invariant :: "'a array \ nat array \ nat \ nat \ bool" where "buildtab_invariant p nxt i j = (\nxt\ = \p\ \ i \ \p\ \ j matches p (i-j) p 0 j \ (\k. 0 < k \ k \ i \ is_next p k (nxt!!k)) \ (\k. Suc j < k \ k < Suc i \ \ matches p (Suc i - k) p 0 k))" text \The invariant trivially holds upon initialisation\ lemma buildtab_invariant_init: "\p\ \ 2 \ buildtab_invariant p (array 0 \p\) 1 0" by (auto simp: buildtab_invariant_def is_next_def) subsubsection \The invariant holds after an iteration\ text \each conjunct is proved separately\ lemma length_invariant: shows "let (nxt',i',j') = buildtab_step p nxt i j in \nxt'\ = \nxt\" by (simp add: buildtab_step_def) lemma i_invariant: assumes "Suc i < m" shows "let (nxt',i',j') = buildtab_step p nxt i j in i' \ m" using assms by (simp add: buildtab_step_def) lemma ji_invariant: assumes "buildtab_invariant p nxt i j" shows "let (nxt',i',j') = buildtab_step p nxt i j in j' nxt !! j < j" using assms by (simp add: buildtab_invariant_def is_next_def) show ?thesis using assms by (auto simp add: buildtab_invariant_def buildtab_step_def intro: order.strict_trans j) qed lemma matches_invariant: assumes "buildtab_invariant p nxt i j" and "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in matches p (i' - j') p 0 j'" using assms by (auto simp: buildtab_invariant_def buildtab_step_def matches_right_extension intro: next_iteration) lemma is_next_invariant: assumes "buildtab_invariant p nxt i j" and "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in \k. 0 < k \ k \ i' \ is_next p k (nxt'!!k)" proof (cases "p!!i = p!!j") case True with assms have "matches p (i-j) p 0 (Suc j)" by (simp add: buildtab_invariant_def matches_right_extension) then have "is_next p (Suc i) (Suc j)" using assms by (auto simp: is_next_def buildtab_invariant_def) with True assms show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq) next case neq: False show ?thesis proof (cases "j=0") case True then have "\ matches p (i-j) p 0 (Suc j)" using matches_contradiction_at_first neq by fastforce with True assms have "is_next p (Suc i) 0" unfolding is_next_def buildtab_invariant_def by (metis Suc_leI diff_Suc_Suc diff_zero matches_empty nat_less_le zero_less_Suc) with assms neq show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq) next case False with assms neq show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def) qed qed lemma non_matches_aux: assumes "Suc (Suc j) < k" "matches p (Suc (Suc i) - k) p 0 k" shows "matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)" using matches_right_weakening assms by fastforce lemma non_matches_invariant: assumes bt: "buildtab_invariant p nxt i j" and "\p\ \ 2" "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in \k. Suc j' < k \ k < Suc i' \ \ matches p (Suc i' - k) p 0 k" proof (cases "p!!i = p!!j") case True with non_matches_aux bt show ?thesis by (fastforce simp add: Suc_less_eq2 buildtab_step_def buildtab_invariant_def) next case neq: False have "j < i" using bt by (auto simp: buildtab_invariant_def) then have no_match_Sj: "\ matches p (Suc i - Suc j) p 0 (Suc j)" using neq by (force simp: matches_def) show ?thesis proof (cases "j=0") case True have "\ matches p (Suc (Suc i) - k) p 0 k" if "Suc 0 < k" and "k < Suc (Suc i)" for k proof (cases "k = Suc (Suc 0)") case True with assms neq that show ?thesis by (auto simp add: matches_contradiction_at_first \j=0\) next case False then have "Suc 0 < k - Suc 0" using that by linarith with bt that have "\ matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)" using True by (force simp add: buildtab_invariant_def) then show ?thesis by (metis False Suc_lessI non_matches_aux that(1)) qed with True show ?thesis by (auto simp: buildtab_invariant_def buildtab_step_def) next case False then have "0 < j" by auto have False if lessK: "Suc (nxt!!j) < k" and "k < Suc i" and contra: "matches p (Suc i - k) p 0 k" for k proof (cases "Suc j < k") case True then show ?thesis using bt that by (auto simp: buildtab_invariant_def) next case False then have "k \ j" using less_Suc_eq_le no_match_Sj contra by fastforce obtain k' where k': "k = Suc k'" "k' < i" using \k < Suc i\ lessK not0_implies_Suc by fastforce have "is_next p j (nxt!!j)" using bt that \j>0\ by (auto simp: buildtab_invariant_def) with no_match_Sj k' have "\ matches p (j - k') p 0 k'" by (metis Suc_less_eq \k \ j\ is_next_def lessK less_Suc_eq_le) moreover have "matches p 0 p (i-j) j" using bt buildtab_invariant_def by (metis matches_sym) then have "matches p (j-k') p (i - k') k'" using \j False k' matches_left_weakening by (smt (verit, best) Nat.diff_diff_eq Suc_leI Suc_le_lessD \k \ j\ diff_diff_cancel diff_is_0_eq lessI nat_less_le) moreover have "matches p (i - k') p 0 k'" using contra k' matches_right_weakening by fastforce ultimately show False using matches_trans by blast qed with assms neq False show ?thesis by (auto simp: buildtab_invariant_def buildtab_step_def) qed qed lemma buildtab_invariant: assumes ini: "buildtab_invariant p nxt i j" and "Suc i < \p\" "(nxt',i',j') = buildtab_step p nxt i j" shows "buildtab_invariant p nxt' i' j'" unfolding buildtab_invariant_def using assms i_invariant [of concl: p nxt i j] length_invariant [of p nxt i j] ji_invariant [OF ini] matches_invariant [OF ini] non_matches_invariant [OF ini] is_next_invariant [OF ini] by (simp add: buildtab_invariant_def split: prod.split_asm) subsubsection \The build-table loop and its correctness\ -definition buildtab:: "'a array \ nat array \ nat \ nat \ nat array option" where - "buildtab p nxt i j \ - map_option fst (while_option (\(_, i', _). Suc i' < \p\) - (\(nxt', i', j'). buildtab_step p nxt' i' j') - (nxt, i, j))" +text \Declaring a partial recursive function with the tailrec option +relaxes the need for a termination proof, because a tail-recursive recursion +equation can never cause inconsistency.\ -lemma buildtab_halts: - assumes "buildtab_invariant p nxt i j" - shows "\y. buildtab p nxt i j = Some y" -proof - - have "\y. (\p nxt i j. while_option (\(_, i', _). Suc i' < \p\) - (\(nxt', i', j'). buildtab_step p nxt' i' j') - (nxt, i, j)) p nxt i j = Some y" - proof (rule measure_while_option_Some[of "\(nxt, i, j). buildtab_invariant p nxt i j" _ _ - "(\p (nxt, i, j). 2 * \p\ - 2 * i + j) p"], - clarify, rule conjI, goal_cases) - case (2 nxt i j) - then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def matches_def is_next_def) - qed (fastforce simp: assms buildtab_invariant)+ - then show ?thesis unfolding buildtab_def by blast -qed +subsubsection \The build-table loop and its correctness\ + +partial_function (tailrec) buildtab :: "'a array \ nat array \ nat \ nat \ nat array" where + "buildtab p nxt i j = + (if Suc i < \p\ + then let (nxt',i',j') = buildtab_step p nxt i j in buildtab p nxt' i' j' + else nxt)" +declare buildtab.simps[code] + +text \Nevertheless, termination must eventually be shown: + to use induction to reason about executions. We do so by defining +a well founded relation. Termination proofs are by well-founded induction.\ + +definition "rel_buildtab m = inv_image (lex_prod (measure (\i. m-i)) (measure id)) snd" + +lemma wf_rel_buildtab: "wf (rel_buildtab m)" + unfolding rel_buildtab_def + by (auto intro: wf_same_fst) lemma buildtab_correct: assumes k: "0 k < \p\" and ini: "buildtab_invariant p nxt i j" - shows "is_next p k (the (buildtab p nxt i j) !! k)" -proof - - obtain nxt' i' j' where \: - "while_option (\(_, i', _). Suc i' < \p\) (\(nxt', i', j'). buildtab_step p nxt' i' j') (nxt, i, j) = Some (nxt', i', j')" - using buildtab_halts[OF ini] unfolding buildtab_def by fast - from while_option_rule[OF _ \, of "\(nxt, i, j). buildtab_invariant p nxt i j"] - have "buildtab_invariant p nxt' i' j'" using buildtab_invariant ini by fastforce - with while_option_stop[OF \] \ show ?thesis - using assms k by (auto simp: is_next_def matches_def buildtab_invariant_def buildtab_def) + shows "is_next p k (buildtab p nxt i j !! k)" + using ini +proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) + case (1 nxt i j) + show ?case + proof (cases "Suc i < \p\") + case True + then obtain nxt' i' j' + where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'" + using "1.prems" buildtab_invariant by (metis surj_pair) + then have "j>0 \ nxt'!!j < j" + using "1.prems" + by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) + then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" + using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm) + show ?thesis + using "1.hyps" [OF decreasing invar'] "1.prems" eq True + by(auto simp add: buildtab.simps[of p nxt] split: prod.splits) + next + case False + with 1 k show ?thesis + by (auto simp: buildtab_invariant_def buildtab.simps) + qed qed text \Before building the table, check for the degenerate case\ definition table :: "'a array \ nat array" where - "table p = (if \p\ > 1 then the (buildtab p (array 0 \p\) 1 0) + "table p = (if \p\ > 1 then buildtab p (array 0 \p\) 1 0 else array 0 \p\)" declare table_def[code] lemma is_next_table: assumes "0 < j \ j < \p\" shows "is_next p j (table p !!j)" - using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp cong: table_def) + using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp add: table_def) -subsubsection \Linearity of @{term buildtab}\ - -definition T_buildtab :: "'a array \ nat array \ nat \ nat \ nat \ nat option" where - "T_buildtab p nxt i j t \ map_option (\(_, _, _, r). r) - (while_option (\(_, i, _, _). Suc i < \p\) - (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) - (nxt, i, j, t))" +subsubsection \Linearity of @{term buildtabW}\ -lemma T_buildtab_halts: - assumes "buildtab_invariant p nxt i j" - shows "\y. T_buildtab p nxt i j t = Some y" -proof - - have "\y. (while_option (\(_, i, _, _). Suc i < \p\) - (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) - (nxt, i, j, t)) = Some y" - proof (intro measure_while_option_Some[of "\(nxt, i, j, t). buildtab_invariant p nxt i j" _ _ - "(\p (nxt, i, j, t). 2 * \p\ - 2 * i + j) p"], clarify, rule conjI, goal_cases) - case (2 nxt i j t) - then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def is_next_def) - qed (fastforce simp: assms buildtab_invariant split: prod.splits)+ - then show ?thesis unfolding T_buildtab_def by blast -qed +partial_function (tailrec) T_buildtab :: "'a array \ nat array \ nat \ nat \ nat \ nat" where + "T_buildtab p nxt i j t = + (if Suc i < \p\ + then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab p nxt' i' j' (Suc t) + else t)" lemma T_buildtab_correct: assumes ini: "buildtab_invariant p nxt i j" - shows "the (T_buildtab p nxt i j t) \ 2*\p\ - 2*i + j + t" -proof - - let ?b = "(\(nxt', i', j', t'). Suc i' < \p\)" - let ?c = "(\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))" - let ?s = "(nxt, i, j, t)" - let ?P1 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' - \ (if Suc i' < \p\ then Suc t' else t') \ 2 * \p\ - (2 * i' - j') + t'" - let ?P2 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' - \ 2 * \p\ - 2 * i' + j' + t' \ 2 * \p\ - 2 * i + j + t" - - obtain nxt' i' j' t' where \: "(while_option ?b ?c ?s) = Some (nxt', i', j', t')" - using T_buildtab_halts[OF ini] unfolding T_buildtab_def by fast - have 1: "(\s. ?P1 s \ ?b s \ ?P1 (?c s))" proof (clarify, intro conjI, goal_cases) - case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) - then show ?case - by (auto simp: buildtab_step_def split: if_split_asm) - qed (insert buildtab_invariant, fastforce split: prod.splits) - have P1: "?P1 ?s" using ini by auto - from while_option_rule[OF 1 \ P1] - have invar1: "buildtab_invariant p nxt' i' j'" and - invar2: "t' \ 2 * \p\ - (2 * i' - j') + t'" by blast (simp add: while_option_stop[OF \]) - have "?P2 (nxt', i', j', t')" proof (rule while_option_rule[OF _ \], clarify, intro conjI, goal_cases) - case (1 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) - with buildtab_invariant[OF 1(3)] show invar: ?case by (auto split: prod.splits) - next - case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) - with 2(4) show ?case - by (auto 0 2 simp: buildtab_step_def buildtab_invariant_def is_next_def split: if_split_asm) - qed (use ini in simp) - with invar1 invar2 \ have "t' \ 2 * \p\ - 2 * i + j + t" by simp - with \ show ?thesis by (simp add: T_buildtab_def) + shows "T_buildtab p nxt i j t \ 2*\p\ - 2*i + j + t" + using ini +proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) + case 1 + have *: "Suc (T_buildtab p nxt' i' j' t) \ 2*\p\ - 2*i + j + t" + if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < \p\" + for nxt' i' j' t + proof - + have invar': "buildtab_invariant p nxt' i' j'" + using "1.prems" buildtab_invariant that by fastforce + then have nextj: "j>0 \ nxt'!!j < j" + using eq "1.prems" + by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) + then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" + using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm) + then have "T_buildtab p nxt' i' j' t \ 2 * \p\ - 2 * i' + j' + t" + using "1.hyps" invar' by blast + then show ?thesis + using "1.prems" that nextj + by (force simp: T_buildtab.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm) + qed + show ?case + using * [where t = "Suc t"] by (auto simp add: T_buildtab.simps split: prod.split) qed lemma T_buildtab_linear: - assumes "2 \ \p\" - shows "the (T_buildtab p (array 0 \p\) 1 0 0) \ 2*(\p\ - 1)" - using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by linarith + assumes "2 \ \p\" + shows "T_buildtab p (array 0 \p\) 1 0 0 \ 2*(\p\ - 1)" + using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by auto subsection \The actual string search algorithm\ definition "KMP_step p nxt a i j = (if a!!i = p!!j then (Suc i, Suc j) else if j=0 then (Suc i, 0) else (i, nxt!!j))" text \The conjunction of the invariants given in the Why3 development\ definition KMP_invariant :: "'a array \ 'a array \ nat \ nat \ bool" where "KMP_invariant p a i j = (j \ \p\ \ j\i \ i \ \a\ \ matches a (i-j) p 0 j \ (\k < i-j. \ matches a k p 0 \p\))" text \The invariant trivially holds upon initialisation\ lemma KMP_invariant_init: "KMP_invariant p a 0 0" by (auto simp: KMP_invariant_def) text \The invariant holds after an iteration\ lemma KMP_invariant: assumes ini: "KMP_invariant p a i j" and j: "j < \p\" and i: "i < \a\" shows "let (i',j') = KMP_step p (table p) a i j in KMP_invariant p a i' j'" proof (cases "a!!i = p!!j") case True then show ?thesis using assms by (simp add: KMP_invariant_def KMP_step_def matches_right_extension) next case neq: False show ?thesis proof (cases "j=0") case True with neq assms show ?thesis by (simp add: matches_contradiction_at_first KMP_invariant_def KMP_step_def less_Suc_eq) next case False then have is_nxt: "is_next p j (table p !!j)" using assms is_next_table j by blast then have "table p !! j \ j" by (simp add: is_next_def) moreover have "matches a (i - table p !! j) p 0 (table p !! j)" by (meson is_nxt KMP_invariant_def ini next_iteration) moreover have False if k: "k < i - table p !! j" and ma: "matches a k p 0 \p\" for k proof - have "k \ i-j" by (metis KMP_invariant_def add_0 ini j le_add_diff_inverse2 ma matches_contradiction_at_i neq) then show False by (meson KMP_invariant_def ini is_nxt k linorder_cases ma next_is_maximal') qed ultimately show ?thesis using neq assms False by (auto simp: KMP_invariant_def KMP_step_def) qed qed text \The first three arguments are precomputed so that they are not part of the inner loop.\ -definition search :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ (nat * nat) option" where - "search m n nxt p a i j = while_option (\(i, j). j < m \ i < n) (\(i,j). KMP_step p nxt a i j) (i,j)" +partial_function (tailrec) search :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ nat * nat" where + "search m n nxt p a i j = + (if j < m \ i < n then let (i',j') = KMP_step p nxt a i j in search m n nxt p a i' j' + else (i,j))" +declare search.simps[code] -lemma search_halts: - assumes "KMP_invariant p a i j" - shows "\y. search \p\ \a\ (table p) p a i j = Some y" - unfolding search_def -proof ((intro measure_while_option_Some[of "\(i,j). KMP_invariant p a i j" _ _ - "\(i, j). 2 * \a\ - 2 * i + j"], rule conjI; clarify), goal_cases) - case (2 i j) - moreover obtain i' j' where \: "(i', j') = KMP_step p (table p) a i j" by (metis surj_pair) - moreover have "KMP_invariant p a i' j'" using \ KMP_invariant[OF 2(1) 2(2) 2(3)] by auto - ultimately have "2 * \a\ - 2 * i' + j' < 2 * \a\ - 2 * i + j" - using is_next_table[of j p] - by (auto simp: KMP_invariant_def KMP_step_def matches_def is_next_def split: if_split_asm) - then show ?case using \ by (auto split: prod.splits) -qed (use KMP_invariant assms in fastforce)+ +definition "rel_KMP n = lex_prod (measure (\i. n-i)) (measure id)" +lemma wf_rel_KMP: "wf (rel_KMP n)" + unfolding rel_KMP_def by (auto intro: wf_same_fst) text \Also expresses the absence of a match, when @{term "r = \a\"}\ definition first_occur :: "'a array \ 'a array \ nat \ bool" where "first_occur p a r = ((r < \a\ \ matches a r p 0 \p\) \ (\k matches a k p 0 \p\))" lemma KMP_correct: - assumes ini: "KMP_invariant p a i j" - defines [simp]: "nxt \ table p" - shows "let (i',j') = the (search \p\ \a\ nxt p a i j) in first_occur p a (if j' = \p\ then i' - \p\ else i')" -proof - - obtain i' j' where \: "while_option (\(i, j). j < \p\ \ i < \a\) (\(i,j). KMP_step p nxt a i j) (i,j) = Some (i', j')" - using search_halts[OF ini] by (auto simp: search_def) - have "KMP_invariant p a i' j'" - using while_option_rule[OF _ \, of "\(i', j'). KMP_invariant p a i' j'"] ini KMP_invariant by fastforce - with \ while_option_stop[OF \] show ?thesis - by (auto simp: search_def KMP_invariant_def first_occur_def matches_def) + assumes ini: "KMP_invariant p a i j" + defines [simp]: "nxt \ table p" + shows "let (i',j') = search \p\ \a\ nxt p a i j in first_occur p a (if j' = \p\ then i' - \p\ else i')" + using ini +proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "\a\"]]) + case (1 i j) + then have ij: "j \ \p\" "j \ i" "i \ \a\" + and match: "matches a (i - j) p 0 j" + and nomatch: "(\k matches a k p 0 \p\)" + by (auto simp: KMP_invariant_def) + show ?case + proof (cases "j < \p\ \ i < \a\") + case True + have "first_occur p a (if j'' = \p\ then i'' - \p\ else i'')" + if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search \p\ \a\ nxt p a i' j' = (i'',j'')" + for i' j' i'' j'' + proof - + have decreasing: "((i',j'), i, j) \ rel_KMP \a\" + using that is_next_table [of j] True + by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm) + show ?thesis + using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce + qed + with True show ?thesis + by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search.simps) + next + case False + have "False" if "matches a k p 0 \p\" "j < \p\" "i = \a\" for k + proof - + have "\p\+k \ i" + using that by (simp add: matches_def) + with that nomatch show False by auto + qed + with False ij show ?thesis + apply (simp add: first_occur_def split: prod.split) + by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search.simps) + qed qed - definition KMP_search :: "'a array \ 'a array \ nat \ nat" where - "KMP_search p a = the (search \p\ \a\ (table p) p a 0 0)" + "KMP_search p a = search \p\ \a\ (table p) p a 0 0" declare KMP_search_def[code] lemma KMP_search: "(i,j) = KMP_search p a \ first_occur p a (if j = \p\ then i - \p\ else i)" unfolding KMP_search_def using KMP_correct[OF KMP_invariant_init[of p a]] by auto - subsection \Examples\ +text \Building the table, examples from the KMP paper and from Cormen et al.\ definition "Knuth_pattern = array_of_list [1,2,3,1,2,3,1,3,1,2::nat]" value "list_of_array (table Knuth_pattern)" definition "CLR_pattern = array_of_list [1,2,1,2,1,2,1,2,3,1::nat]" value "list_of_array (table CLR_pattern)" +text \Worst-case string searches\ + definition bad_list :: "nat \ nat list" where "bad_list n = replicate n 0 @ [Suc 0]" definition "bad_pattern = array_of_list (bad_list 1000)" definition "bad_string = array_of_list (bad_list 2000000)" definition "worse_string = array_of_list (replicate 2000000 (0::nat))" definition "lousy_string = array_of_list (concat (replicate 2002 (bad_list 999)))" value "list_of_array (table bad_pattern)" text \A successful search\ value "KMP_search bad_pattern bad_string" +text \The search above from the specification alone, i.e. brute-force\ lemma "matches bad_string (2000001-1001) bad_pattern 0 1001" by eval text \Unsuccessful searches\ value "KMP_search bad_pattern worse_string" +text \The search above from the specification alone, i.e. brute-force\ lemma "\k<2000000. \ matches worse_string k bad_pattern 0 1001" by eval value "KMP_search lousy_string bad_string" lemma "\k < \lousy_string\. \ matches lousy_string k bad_pattern 0 1001" by eval -subsection \Alternative approach, by declaring tail-recursive functions\ - -text \Declaring a partial recursive function with the tailrec option -relaxes the need for a termination proof, because a tail-recursive recursion -equation can never cause inconsistency.\ - -subsubsection \The build-table loop and its correctness\ - -partial_function (tailrec) buildtab' :: "'a array \ nat array \ nat \ nat \ nat array" where - "buildtab' p nxt i j = - (if Suc i < \p\ - then let (nxt',i',j') = buildtab_step p nxt i j in buildtab' p nxt' i' j' - else nxt)" -declare buildtab'.simps[code] - -text \Nevertheless, termination must eventually be shown: - to use induction to reason about executions. We do so by defining -a well founded relation. Termination proofs are by well-founded induction.\ - -definition "rel_buildtab m = inv_image (lex_prod (measure (\i. m-i)) (measure id)) snd" - -lemma wf_rel_buildtab: "wf (rel_buildtab m)" - unfolding rel_buildtab_def - by (auto intro: wf_same_fst) +subsection \Alternative approach, expressing the algorithms as while loops\ -lemma buildtab_correct': - assumes k: "0 k < \p\" and ini: "buildtab_invariant p nxt i j" - shows "is_next p k (buildtab' p nxt i j !! k)" - using ini -proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) - case (1 nxt i j) - show ?case - proof (cases "Suc i < \p\") - case True - then obtain nxt' i' j' - where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'" - using "1.prems" buildtab_invariant by (metis surj_pair) - then have "j>0 \ nxt'!!j < j" - using "1.prems" - by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) - then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" - using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm) - show ?thesis - using "1.hyps" [OF decreasing invar'] "1.prems" eq True - by(auto simp add: buildtab'.simps[of p nxt] split: prod.splits) - next - case False - with 1 k show ?thesis - by (auto simp: buildtab_invariant_def buildtab'.simps) - qed +definition buildtabW:: "'a array \ nat array \ nat \ nat \ nat array option" where + "buildtabW p nxt i j \ + map_option fst (while_option (\(_, i', _). Suc i' < \p\) + (\(nxt', i', j'). buildtab_step p nxt' i' j') + (nxt, i, j))" + +lemma buildtabW_halts: + assumes "buildtab_invariant p nxt i j" + shows "\y. buildtabW p nxt i j = Some y" +proof - + have "\y. (\p nxt i j. while_option (\(_, i', _). Suc i' < \p\) + (\(nxt', i', j'). buildtab_step p nxt' i' j') + (nxt, i, j)) p nxt i j = Some y" + proof (rule measure_while_option_Some[of "\(nxt, i, j). buildtab_invariant p nxt i j" _ _ + "(\p (nxt, i, j). 2 * \p\ - 2 * i + j) p"], + clarify, rule conjI, goal_cases) + case (2 nxt i j) + then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def matches_def is_next_def) + qed (fastforce simp: assms buildtab_invariant)+ + then show ?thesis unfolding buildtabW_def by blast qed -subsubsection \Linearity of @{term buildtab}\ - -partial_function (tailrec) T_buildtab' :: "'a array \ nat array \ nat \ nat \ nat \ nat" where - "T_buildtab' p nxt i j t = - (if Suc i < \p\ - then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab' p nxt' i' j' (Suc t) - else t)" - -lemma T_buildtab_correct': - assumes ini: "buildtab_invariant p nxt i j" - shows "T_buildtab' p nxt i j t \ 2*\p\ - 2*i + j + t" - using ini -proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) - case 1 - have *: "Suc (T_buildtab' p nxt' i' j' t) \ 2*\p\ - 2*i + j + t" - if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < \p\" - for nxt' i' j' t - proof - - have invar': "buildtab_invariant p nxt' i' j'" - using "1.prems" buildtab_invariant that by fastforce - then have nextj: "j>0 \ nxt'!!j < j" - using eq "1.prems" - by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) - then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" - using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm) - then have "T_buildtab' p nxt' i' j' t \ 2 * \p\ - 2 * i' + j' + t" - using "1.hyps" invar' by blast - then show ?thesis - using "1.prems" that nextj - by (force simp: T_buildtab'.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm) - qed - show ?case - using * [where t = "Suc t"] by (auto simp add: T_buildtab'.simps split: prod.split) +lemma buildtabW_correct: + assumes k: "0 k < \p\" and ini: "buildtab_invariant p nxt i j" + shows "is_next p k (the (buildtabW p nxt i j) !! k)" +proof - + obtain nxt' i' j' where \: + "while_option (\(_, i', _). Suc i' < \p\) (\(nxt', i', j'). buildtab_step p nxt' i' j') (nxt, i, j) = Some (nxt', i', j')" + using buildtabW_halts[OF ini] unfolding buildtabW_def by fast + from while_option_rule[OF _ \, of "\(nxt, i, j). buildtab_invariant p nxt i j"] + have "buildtab_invariant p nxt' i' j'" using buildtab_invariant ini by fastforce + with while_option_stop[OF \] \ show ?thesis + using assms k by (auto simp: is_next_def matches_def buildtab_invariant_def buildtabW_def) qed -lemma T_buildtab_linear': - assumes "2 \ \p\" - shows "T_buildtab' p (array 0 \p\) 1 0 0 \ 2*(\p\ - 1)" - using assms T_buildtab_correct' [OF buildtab_invariant_init, of p 0] by auto - -subsection \The actual string search algorithm\ - -partial_function (tailrec) search' :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ nat * nat" where - "search' m n nxt p a i j = - (if j < m \ i < n then let (i',j') = KMP_step p nxt a i j in search' m n nxt p a i' j' - else (i,j))" -declare search'.simps[code] - -definition "rel_KMP n = lex_prod (measure (\i. n-i)) (measure id)" - -lemma wf_rel_KMP: "wf (rel_KMP n)" - unfolding rel_KMP_def by (auto intro: wf_same_fst) +subsubsection \Linearity of @{term buildtabW}\ -lemma KMP_correct': - assumes ini: "KMP_invariant p a i j" - defines [simp]: "nxt \ table p" - shows "let (i',j') = search' \p\ \a\ nxt p a i j in first_occur p a (if j' = \p\ then i' - \p\ else i')" - using ini -proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "\a\"]]) - case (1 i j) - then have ij: "j \ \p\" "j \ i" "i \ \a\" - and match: "matches a (i - j) p 0 j" - and nomatch: "(\k matches a k p 0 \p\)" - by (auto simp: KMP_invariant_def) - show ?case - proof (cases "j < \p\ \ i < \a\") - case True - have "first_occur p a (if j'' = \p\ then i'' - \p\ else i'')" - if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search' \p\ \a\ nxt p a i' j' = (i'',j'')" - for i' j' i'' j'' - proof - - have decreasing: "((i',j'), i, j) \ rel_KMP \a\" - using that is_next_table [of j] True - by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm) - show ?thesis - using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce - qed - with True show ?thesis - by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search'.simps) - next - case False - have "False" if "matches a k p 0 \p\" "j < \p\" "i = \a\" for k - proof - - have "\p\+k \ i" - using that by (simp add: matches_def) - with that nomatch show False by auto - qed - with False ij show ?thesis - apply (simp add: first_occur_def split: prod.split) - by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search'.simps) - qed +definition T_buildtabW :: "'a array \ nat array \ nat \ nat \ nat \ nat option" where + "T_buildtabW p nxt i j t \ map_option (\(_, _, _, r). r) + (while_option (\(_, i, _, _). Suc i < \p\) + (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) + (nxt, i, j, t))" + +lemma T_buildtabW_halts: + assumes "buildtab_invariant p nxt i j" + shows "\y. T_buildtabW p nxt i j t = Some y" +proof - + have "\y. (while_option (\(_, i, _, _). Suc i < \p\) + (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) + (nxt, i, j, t)) = Some y" + proof (intro measure_while_option_Some[of "\(nxt, i, j, t). buildtab_invariant p nxt i j" _ _ + "(\p (nxt, i, j, t). 2 * \p\ - 2 * i + j) p"], clarify, rule conjI, goal_cases) + case (2 nxt i j t) + then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def is_next_def) + qed (fastforce simp: assms buildtab_invariant split: prod.splits)+ + then show ?thesis unfolding T_buildtabW_def by blast qed +lemma T_buildtabW_correct: + assumes ini: "buildtab_invariant p nxt i j" + shows "the (T_buildtabW p nxt i j t) \ 2*\p\ - 2*i + j + t" +proof - + let ?b = "(\(nxt', i', j', t'). Suc i' < \p\)" + let ?c = "(\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))" + let ?s = "(nxt, i, j, t)" + let ?P1 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' + \ (if Suc i' < \p\ then Suc t' else t') \ 2 * \p\ - (2 * i' - j') + t'" + let ?P2 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' + \ 2 * \p\ - 2 * i' + j' + t' \ 2 * \p\ - 2 * i + j + t" + + obtain nxt' i' j' t' where \: "(while_option ?b ?c ?s) = Some (nxt', i', j', t')" + using T_buildtabW_halts[OF ini] unfolding T_buildtabW_def by fast + have 1: "(\s. ?P1 s \ ?b s \ ?P1 (?c s))" proof (clarify, intro conjI, goal_cases) + case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + then show ?case + by (auto simp: buildtab_step_def split: if_split_asm) + qed (insert buildtab_invariant, fastforce split: prod.splits) + have P1: "?P1 ?s" using ini by auto + from while_option_rule[OF 1 \ P1] + have invar1: "buildtab_invariant p nxt' i' j'" and + invar2: "t' \ 2 * \p\ - (2 * i' - j') + t'" by blast (simp add: while_option_stop[OF \]) + have "?P2 (nxt', i', j', t')" proof (rule while_option_rule[OF _ \], clarify, intro conjI, goal_cases) + case (1 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + with buildtab_invariant[OF 1(3)] show invar: ?case by (auto split: prod.splits) + next + case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + with 2(4) show ?case + by (auto 0 2 simp: buildtab_step_def buildtab_invariant_def is_next_def split: if_split_asm) + qed (use ini in simp) + with invar1 invar2 \ have "t' \ 2 * \p\ - 2 * i + j + t" by simp + with \ show ?thesis by (simp add: T_buildtabW_def) +qed + +lemma T_buildtabW_linear: + assumes "2 \ \p\" + shows "the (T_buildtabW p (array 0 \p\) 1 0 0) \ 2*(\p\ - 1)" + using assms T_buildtabW_correct [OF buildtab_invariant_init, of p 0] by linarith + +subsubsection \The actual string search algorithm\ +definition searchW :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ (nat * nat) option" where + "searchW m n nxt p a i j = while_option (\(i, j). j < m \ i < n) (\(i,j). KMP_step p nxt a i j) (i,j)" + +lemma searchW_halts: + assumes "KMP_invariant p a i j" + shows "\y. searchW \p\ \a\ (table p) p a i j = Some y" + unfolding searchW_def +proof ((intro measure_while_option_Some[of "\(i,j). KMP_invariant p a i j" _ _ + "\(i, j). 2 * \a\ - 2 * i + j"], rule conjI; clarify), goal_cases) + case (2 i j) + moreover obtain i' j' where \: "(i', j') = KMP_step p (table p) a i j" by (metis surj_pair) + moreover have "KMP_invariant p a i' j'" using \ KMP_invariant[OF 2(1) 2(2) 2(3)] by auto + ultimately have "2 * \a\ - 2 * i' + j' < 2 * \a\ - 2 * i + j" + using is_next_table[of j p] + by (auto simp: KMP_invariant_def KMP_step_def matches_def is_next_def split: if_split_asm) + then show ?case using \ by (auto split: prod.splits) +qed (use KMP_invariant assms in fastforce)+ + +lemma KMP_correctW: + assumes ini: "KMP_invariant p a i j" + defines [simp]: "nxt \ table p" + shows "let (i',j') = the (searchW \p\ \a\ nxt p a i j) in first_occur p a (if j' = \p\ then i' - \p\ else i')" +proof - + obtain i' j' where \: "while_option (\(i, j). j < \p\ \ i < \a\) (\(i,j). KMP_step p nxt a i j) (i,j) = Some (i', j')" + using searchW_halts[OF ini] by (auto simp: searchW_def) + have "KMP_invariant p a i' j'" + using while_option_rule[OF _ \, of "\(i', j'). KMP_invariant p a i' j'"] ini KMP_invariant by fastforce + with \ while_option_stop[OF \] show ?thesis + by (auto simp: searchW_def KMP_invariant_def first_occur_def matches_def) +qed + +definition KMP_searchW :: "'a array \ 'a array \ nat \ nat" where + "KMP_searchW p a = the (searchW \p\ \a\ (table p) p a 0 0)" +declare KMP_searchW_def[code] + +lemma KMP_searchW: + "(i,j) = KMP_searchW p a \ first_occur p a (if j = \p\ then i - \p\ else i)" +unfolding KMP_searchW_def +using KMP_correctW[OF KMP_invariant_init[of p a]] by auto + end