diff --git a/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy b/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy --- a/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy +++ b/thys/KnuthMorrisPratt/KnuthMorrisPratt.thy @@ -1,528 +1,676 @@ section \Knuth-Morris-Pratt fast string search algorithm\ text \Development based on Filliâtre's verification using Why3\ -theory KnuthMorrisPratt imports "Collections.Diff_Array" +text \many thanks to Christian Zimmerer for expressing the algorithms +in the form of while loops\ + +theory KnuthMorrisPratt imports "Collections.Diff_Array" "HOL-Library.While_Combinator" begin subsection \General definitions\ abbreviation "array \ new_array" abbreviation length_array :: "'a array \ nat" ("\_\") where "length_array \ array_length" notation array_get (infixl "!!" 100) notation array_set ("_[_ ::= _]" [1000,0,0] 900) definition matches :: "'a array \ nat \ 'a array \ nat \ nat \ bool" where "matches a i b j n = (i+n \ \a\ \ j+n \ \b\ \ (\k i \ \a\ \ j \ \b\" by (simp add: matches_def) lemma matches_right_extension: "\matches a i b j n; Suc (i+n) \ \a\; Suc (j+n) \ \b\; a!!(i+n) = b!!(j+n)\ \ matches a i b j (Suc n)" by (auto simp: matches_def less_Suc_eq) lemma matches_contradiction_at_first: "\0 < n; a!!i \ b!!j\ \ \ matches a i b j n" by (auto simp: matches_def) lemma matches_contradiction_at_i: "\a!!(i+k) \ b!!(j+k); k < n\ \ \ matches a i b j n" by (auto simp: matches_def) lemma matches_right_weakening: "\matches a i b j n; n' \ n\ \ matches a i b j n'" by (auto simp: matches_def) lemma matches_left_weakening_add: assumes "matches a i b j n" "k\n" shows "matches a (i+k) b (j+k) (n-k)" using assms by (auto simp: matches_def less_diff_conv algebra_simps) lemma matches_left_weakening: assumes "matches a (i - (n - n')) b (j - (n - n')) n" and "n' \ n" and "n - n' \ i" and "n - n' \ j" shows "matches a i b j n'" by (metis assms diff_diff_cancel diff_le_self le_add_diff_inverse2 matches_left_weakening_add) lemma matches_sym: "matches a i b j n \ matches b j a i n" by (simp add: matches_def) lemma matches_trans: "\matches a i b j n; matches b j c k n\ \ matches a i c k n" by (simp add: matches_def) text \Denotes the maximal @{term "n < j"} such that the first @{term n} elements of @{term p} match the last @{term n} elements of @{term p}[0..@{term "j-1"}] The first @{term n} characters of the pattern have a copy starting at @{term "j-n"}.\ definition is_next :: "'a array \ nat \ nat \ bool" where "is_next p j n = (n < j \ matches p (j-n) p 0 n \ (\m. n < m \ m < j \ \ matches p (j-m) p 0 m))" lemma next_iteration: assumes "matches a (i-j) p 0 j" "is_next p j n" "j \ i" shows "matches a (i-n) p 0 n" proof - have "matches a (i-n) p (j-n) n" using assms by (auto simp: algebra_simps is_next_def intro: matches_left_weakening [where n=j]) moreover have "matches p (j-n) p 0 n" using is_next_def assms by blast ultimately show ?thesis using matches_trans by blast qed lemma next_is_maximal: assumes "matches a (i-j) p 0 j" "is_next p j n" and "j \ i" "n < m" "m < j" shows "\ matches a (i-m) p 0 m" proof - have "matches a (i-m) p (j-m) m" by (rule matches_left_weakening [where n=j]) (use assms in auto) with assms show ?thesis by (meson is_next_def matches_sym matches_trans) qed text \Filliâtre's version of the lemma above\ corollary next_is_maximal': assumes match: "matches a (i-j) p 0 j" "is_next p j n" and more: "j \ i" "i-j < k" "k < i-n" shows "\ matches a k p 0 \p\" proof - have "\ matches a k p 0 (i-k)" using next_is_maximal [OF match] more by (metis add.commute diff_diff_cancel diff_le_self le_trans less_diff_conv less_or_eq_imp_le) moreover have "i-k < \p\" using assms by (auto simp: matches_def) ultimately show ?thesis using matches_right_weakening nless_le by blast qed lemma next_1_0 [simp]: "is_next p 1 0 \ 1 \ \p\" by (auto simp add: is_next_def matches_def) subsection \The Build-table routine\ definition buildtab_step :: "'a array \ nat array \ nat \ nat \ nat array \ nat \ nat" where "buildtab_step p nxt i j = (if p!!i = p!!j then (nxt[Suc i::=Suc j], Suc i, Suc j) else if j=0 then (nxt[Suc i::=0], Suc i, j) else (nxt, i, nxt!!j))" text \The conjunction of the invariants given in the Why3 development\ definition buildtab_invariant :: "'a array \ nat array \ nat \ nat \ bool" where "buildtab_invariant p nxt i j = (\nxt\ = \p\ \ i \ \p\ \ j matches p (i-j) p 0 j \ (\k. 0 < k \ k \ i \ is_next p k (nxt!!k)) \ (\k. Suc j < k \ k < Suc i \ \ matches p (Suc i - k) p 0 k))" text \The invariant trivially holds upon initialisation\ lemma buildtab_invariant_init: "\p\ \ 2 \ buildtab_invariant p (array 0 \p\) 1 0" by (auto simp: buildtab_invariant_def is_next_def) subsubsection \The invariant holds after an iteration\ text \each conjunct is proved separately\ lemma length_invariant: shows "let (nxt',i',j') = buildtab_step p nxt i j in \nxt'\ = \nxt\" by (simp add: buildtab_step_def) lemma i_invariant: assumes "Suc i < m" shows "let (nxt',i',j') = buildtab_step p nxt i j in i' \ m" using assms by (simp add: buildtab_step_def) lemma ji_invariant: assumes "buildtab_invariant p nxt i j" shows "let (nxt',i',j') = buildtab_step p nxt i j in j' nxt !! j < j" using assms by (simp add: buildtab_invariant_def is_next_def) show ?thesis using assms by (auto simp add: buildtab_invariant_def buildtab_step_def intro: order.strict_trans j) qed lemma matches_invariant: assumes "buildtab_invariant p nxt i j" and "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in matches p (i' - j') p 0 j'" using assms by (auto simp: buildtab_invariant_def buildtab_step_def matches_right_extension intro: next_iteration) lemma is_next_invariant: assumes "buildtab_invariant p nxt i j" and "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in \k. 0 < k \ k \ i' \ is_next p k (nxt'!!k)" proof (cases "p!!i = p!!j") case True with assms have "matches p (i-j) p 0 (Suc j)" by (simp add: buildtab_invariant_def matches_right_extension) then have "is_next p (Suc i) (Suc j)" using assms by (auto simp: is_next_def buildtab_invariant_def) with True assms show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq) next case neq: False show ?thesis proof (cases "j=0") case True then have "\ matches p (i-j) p 0 (Suc j)" using matches_contradiction_at_first neq by fastforce with True assms have "is_next p (Suc i) 0" unfolding is_next_def buildtab_invariant_def by (metis Suc_leI diff_Suc_Suc diff_zero matches_empty nat_less_le zero_less_Suc) with assms neq show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq) next case False with assms neq show ?thesis by (simp add: buildtab_invariant_def buildtab_step_def) qed qed lemma non_matches_aux: assumes "Suc (Suc j) < k" "matches p (Suc (Suc i) - k) p 0 k" shows "matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)" using matches_right_weakening assms by fastforce lemma non_matches_invariant: assumes bt: "buildtab_invariant p nxt i j" and "\p\ \ 2" "Suc i < \p\" shows "let (nxt',i',j') = buildtab_step p nxt i j in \k. Suc j' < k \ k < Suc i' \ \ matches p (Suc i' - k) p 0 k" proof (cases "p!!i = p!!j") case True with non_matches_aux bt show ?thesis by (fastforce simp add: Suc_less_eq2 buildtab_step_def buildtab_invariant_def) next case neq: False have "j < i" using bt by (auto simp: buildtab_invariant_def) then have no_match_Sj: "\ matches p (Suc i - Suc j) p 0 (Suc j)" using neq by (force simp: matches_def) show ?thesis proof (cases "j=0") case True have "\ matches p (Suc (Suc i) - k) p 0 k" if "Suc 0 < k" and "k < Suc (Suc i)" for k proof (cases "k = Suc (Suc 0)") case True with assms neq that show ?thesis by (auto simp add: matches_contradiction_at_first \j=0\) next case False then have "Suc 0 < k - Suc 0" using that by linarith with bt that have "\ matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)" using True by (force simp add: buildtab_invariant_def) then show ?thesis by (metis False Suc_lessI non_matches_aux that(1)) qed with True show ?thesis by (auto simp: buildtab_invariant_def buildtab_step_def) next case False then have "0 < j" by auto have False if lessK: "Suc (nxt!!j) < k" and "k < Suc i" and contra: "matches p (Suc i - k) p 0 k" for k proof (cases "Suc j < k") case True then show ?thesis using bt that by (auto simp: buildtab_invariant_def) next case False then have "k \ j" using less_Suc_eq_le no_match_Sj contra by fastforce obtain k' where k': "k = Suc k'" "k' < i" using \k < Suc i\ lessK not0_implies_Suc by fastforce have "is_next p j (nxt!!j)" using bt that \j>0\ by (auto simp: buildtab_invariant_def) with no_match_Sj k' have "\ matches p (j - k') p 0 k'" by (metis Suc_less_eq \k \ j\ is_next_def lessK less_Suc_eq_le) moreover have "matches p 0 p (i-j) j" using bt buildtab_invariant_def by (metis matches_sym) then have "matches p (j-k') p (i - k') k'" using \j False k' matches_left_weakening by (smt (verit, best) Nat.diff_diff_eq Suc_leI Suc_le_lessD \k \ j\ diff_diff_cancel diff_is_0_eq lessI nat_less_le) moreover have "matches p (i - k') p 0 k'" using contra k' matches_right_weakening by fastforce ultimately show False using matches_trans by blast qed with assms neq False show ?thesis by (auto simp: buildtab_invariant_def buildtab_step_def) qed qed lemma buildtab_invariant: assumes ini: "buildtab_invariant p nxt i j" and "Suc i < \p\" "(nxt',i',j') = buildtab_step p nxt i j" shows "buildtab_invariant p nxt' i' j'" unfolding buildtab_invariant_def using assms i_invariant [of concl: p nxt i j] length_invariant [of p nxt i j] ji_invariant [OF ini] matches_invariant [OF ini] non_matches_invariant [OF ini] is_next_invariant [OF ini] by (simp add: buildtab_invariant_def split: prod.split_asm) subsubsection \The build-table loop and its correctness\ -partial_function (tailrec) buildtab :: "'a array \ nat array \ nat \ nat \ nat array" where - "buildtab p nxt i j = - (if Suc i < \p\ - then let (nxt',i',j') = buildtab_step p nxt i j in buildtab p nxt' i' j' - else nxt)" -declare buildtab.simps[code] +definition buildtab:: "'a array \ nat array \ nat \ nat \ nat array option" where + "buildtab p nxt i j \ + map_option fst (while_option (\(_, i', _). Suc i' < \p\) + (\(nxt', i', j'). buildtab_step p nxt' i' j') + (nxt, i, j))" -definition "rel_buildtab m = inv_image (lex_prod (measure (\i. m-i)) (measure id)) snd" - -lemma wf_rel_buildtab: "wf (rel_buildtab m)" - unfolding rel_buildtab_def - by (auto intro: wf_same_fst) +lemma buildtab_halts: + assumes "buildtab_invariant p nxt i j" + shows "\y. buildtab p nxt i j = Some y" +proof - + have "\y. (\p nxt i j. while_option (\(_, i', _). Suc i' < \p\) + (\(nxt', i', j'). buildtab_step p nxt' i' j') + (nxt, i, j)) p nxt i j = Some y" + proof (rule measure_while_option_Some[of "\(nxt, i, j). buildtab_invariant p nxt i j" _ _ + "(\p (nxt, i, j). 2 * \p\ - 2 * i + j) p"], + clarify, rule conjI, goal_cases) + case (2 nxt i j) + then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def matches_def is_next_def) + qed (fastforce simp: assms buildtab_invariant)+ + then show ?thesis unfolding buildtab_def by blast +qed lemma buildtab_correct: assumes k: "0 k < \p\" and ini: "buildtab_invariant p nxt i j" - shows "is_next p k (buildtab p nxt i j !! k)" - using ini -proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) - case (1 nxt i j) - show ?case - proof (cases "Suc i < \p\") - case True - then obtain nxt' i' j' - where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'" - using "1.prems" buildtab_invariant by (metis surj_pair) - then have "j>0 \ nxt'!!j < j" - using "1.prems" - by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) - then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" - using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm) - show ?thesis - using "1.hyps" [OF decreasing invar'] "1.prems" eq True - by(auto simp add: buildtab.simps[of p nxt] split: prod.splits) - next - case False - with 1 k show ?thesis - by (auto simp: buildtab_invariant_def buildtab.simps) - qed + shows "is_next p k (the (buildtab p nxt i j) !! k)" +proof - + obtain nxt' i' j' where \: + "while_option (\(_, i', _). Suc i' < \p\) (\(nxt', i', j'). buildtab_step p nxt' i' j') (nxt, i, j) = Some (nxt', i', j')" + using buildtab_halts[OF ini] unfolding buildtab_def by fast + from while_option_rule[OF _ \, of "\(nxt, i, j). buildtab_invariant p nxt i j"] + have "buildtab_invariant p nxt' i' j'" using buildtab_invariant ini by fastforce + with while_option_stop[OF \] \ show ?thesis + using assms k by (auto simp: is_next_def matches_def buildtab_invariant_def buildtab_def) qed text \Before building the table, check for the degenerate case\ definition table :: "'a array \ nat array" where - "table p = (if \p\ > 1 then buildtab p (array 0 \p\) 1 0 + "table p = (if \p\ > 1 then the (buildtab p (array 0 \p\) 1 0) else array 0 \p\)" declare table_def[code] lemma is_next_table: assumes "0 < j \ j < \p\" shows "is_next p j (table p !!j)" - using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp add: table_def) + using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp cong: table_def) subsubsection \Linearity of @{term buildtab}\ -partial_function (tailrec) T_buildtab :: "'a array \ nat array \ nat \ nat \ nat \ nat" where - "T_buildtab p nxt i j t = - (if Suc i < \p\ - then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab p nxt' i' j' (Suc t) - else t)" +definition T_buildtab :: "'a array \ nat array \ nat \ nat \ nat \ nat option" where + "T_buildtab p nxt i j t \ map_option (\(_, _, _, r). r) + (while_option (\(_, i, _, _). Suc i < \p\) + (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) + (nxt, i, j, t))" + +lemma T_buildtab_halts: + assumes "buildtab_invariant p nxt i j" + shows "\y. T_buildtab p nxt i j t = Some y" +proof - + have "\y. (while_option (\(_, i, _, _). Suc i < \p\) + (\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t)) + (nxt, i, j, t)) = Some y" + proof (intro measure_while_option_Some[of "\(nxt, i, j, t). buildtab_invariant p nxt i j" _ _ + "(\p (nxt, i, j, t). 2 * \p\ - 2 * i + j) p"], clarify, rule conjI, goal_cases) + case (2 nxt i j t) + then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def is_next_def) + qed (fastforce simp: assms buildtab_invariant split: prod.splits)+ + then show ?thesis unfolding T_buildtab_def by blast +qed lemma T_buildtab_correct: assumes ini: "buildtab_invariant p nxt i j" - shows "T_buildtab p nxt i j t \ 2*\p\ - 2*i + j + t" - using ini -proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) - case 1 - have *: "Suc (T_buildtab p nxt' i' j' t) \ 2*\p\ - 2*i + j + t" - if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < \p\" - for nxt' i' j' t - proof - - have invar': "buildtab_invariant p nxt' i' j'" - using "1.prems" buildtab_invariant that by fastforce - then have nextj: "j>0 \ nxt'!!j < j" - using eq "1.prems" - by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) - then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" - using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm) - then have "T_buildtab p nxt' i' j' t \ 2 * \p\ - 2 * i' + j' + t" - using "1.hyps" invar' by blast - then show ?thesis - using "1.prems" that nextj - by (force simp: T_buildtab.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm) - qed - show ?case - using * [where t = "Suc t"] by (auto simp add: T_buildtab.simps split: prod.split) + shows "the (T_buildtab p nxt i j t) \ 2*\p\ - 2*i + j + t" +proof - + let ?b = "(\(nxt', i', j', t'). Suc i' < \p\)" + let ?c = "(\(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))" + let ?s = "(nxt, i, j, t)" + let ?P1 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' + \ (if Suc i' < \p\ then Suc t' else t') \ 2 * \p\ - (2 * i' - j') + t'" + let ?P2 = "\(nxt', i', j', t'). buildtab_invariant p nxt' i' j' + \ 2 * \p\ - 2 * i' + j' + t' \ 2 * \p\ - 2 * i + j + t" + + obtain nxt' i' j' t' where \: "(while_option ?b ?c ?s) = Some (nxt', i', j', t')" + using T_buildtab_halts[OF ini] unfolding T_buildtab_def by fast + have 1: "(\s. ?P1 s \ ?b s \ ?P1 (?c s))" proof (clarify, intro conjI, goal_cases) + case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + then show ?case + by (auto simp: buildtab_step_def split: if_split_asm) + qed (insert buildtab_invariant, fastforce split: prod.splits) + have P1: "?P1 ?s" using ini by auto + from while_option_rule[OF 1 \ P1] + have invar1: "buildtab_invariant p nxt' i' j'" and + invar2: "t' \ 2 * \p\ - (2 * i' - j') + t'" by blast (simp add: while_option_stop[OF \]) + have "?P2 (nxt', i', j', t')" proof (rule while_option_rule[OF _ \], clarify, intro conjI, goal_cases) + case (1 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + with buildtab_invariant[OF 1(3)] show invar: ?case by (auto split: prod.splits) + next + case (2 nxt\<^sub>1 i\<^sub>1 j\<^sub>1 t\<^sub>1 nxt\<^sub>2 i\<^sub>2 j\<^sub>2 t\<^sub>2) + with 2(4) show ?case + by (auto 0 2 simp: buildtab_step_def buildtab_invariant_def is_next_def split: if_split_asm) + qed (use ini in simp) + with invar1 invar2 \ have "t' \ 2 * \p\ - 2 * i + j + t" by simp + with \ show ?thesis by (simp add: T_buildtab_def) qed lemma T_buildtab_linear: - assumes "2 \ \p\" - shows "T_buildtab p (array 0 \p\) 1 0 0 \ 2*(\p\ - 1)" - using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by auto + assumes "2 \ \p\" + shows "the (T_buildtab p (array 0 \p\) 1 0 0) \ 2*(\p\ - 1)" + using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by linarith subsection \The actual string search algorithm\ definition "KMP_step p nxt a i j = (if a!!i = p!!j then (Suc i, Suc j) else if j=0 then (Suc i, 0) else (i, nxt!!j))" text \The conjunction of the invariants given in the Why3 development\ definition KMP_invariant :: "'a array \ 'a array \ nat \ nat \ bool" where "KMP_invariant p a i j = (j \ \p\ \ j\i \ i \ \a\ \ matches a (i-j) p 0 j \ (\k < i-j. \ matches a k p 0 \p\))" text \The invariant trivially holds upon initialisation\ lemma KMP_invariant_init: "KMP_invariant p a 0 0" by (auto simp: KMP_invariant_def) text \The invariant holds after an iteration\ lemma KMP_invariant: assumes ini: "KMP_invariant p a i j" and j: "j < \p\" and i: "i < \a\" shows "let (i',j') = KMP_step p (table p) a i j in KMP_invariant p a i' j'" proof (cases "a!!i = p!!j") case True then show ?thesis using assms by (simp add: KMP_invariant_def KMP_step_def matches_right_extension) next case neq: False show ?thesis proof (cases "j=0") case True with neq assms show ?thesis by (simp add: matches_contradiction_at_first KMP_invariant_def KMP_step_def less_Suc_eq) next case False then have is_nxt: "is_next p j (table p !!j)" using assms is_next_table j by blast then have "table p !! j \ j" by (simp add: is_next_def) moreover have "matches a (i - table p !! j) p 0 (table p !! j)" by (meson is_nxt KMP_invariant_def ini next_iteration) moreover have False if k: "k < i - table p !! j" and ma: "matches a k p 0 \p\" for k proof - have "k \ i-j" by (metis KMP_invariant_def add_0 ini j le_add_diff_inverse2 ma matches_contradiction_at_i neq) then show False by (meson KMP_invariant_def ini is_nxt k linorder_cases ma next_is_maximal') qed ultimately show ?thesis using neq assms False by (auto simp: KMP_invariant_def KMP_step_def) qed qed text \The first three arguments are precomputed so that they are not part of the inner loop.\ -partial_function (tailrec) search :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ nat * nat" where - "search m n nxt p a i j = - (if j < m \ i < n then let (i',j') = KMP_step p nxt a i j in search m n nxt p a i' j' - else (i,j))" -declare search.simps[code] +definition search :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ (nat * nat) option" where + "search m n nxt p a i j = while_option (\(i, j). j < m \ i < n) (\(i,j). KMP_step p nxt a i j) (i,j)" -definition "rel_KMP n = lex_prod (measure (\i. n-i)) (measure id)" +lemma search_halts: + assumes "KMP_invariant p a i j" + shows "\y. search \p\ \a\ (table p) p a i j = Some y" + unfolding search_def +proof ((intro measure_while_option_Some[of "\(i,j). KMP_invariant p a i j" _ _ + "\(i, j). 2 * \a\ - 2 * i + j"], rule conjI; clarify), goal_cases) + case (2 i j) + moreover obtain i' j' where \: "(i', j') = KMP_step p (table p) a i j" by (metis surj_pair) + moreover have "KMP_invariant p a i' j'" using \ KMP_invariant[OF 2(1) 2(2) 2(3)] by auto + ultimately have "2 * \a\ - 2 * i' + j' < 2 * \a\ - 2 * i + j" + using is_next_table[of j p] + by (auto simp: KMP_invariant_def KMP_step_def matches_def is_next_def split: if_split_asm) + then show ?case using \ by (auto split: prod.splits) +qed (use KMP_invariant assms in fastforce)+ -lemma wf_rel_KMP: "wf (rel_KMP n)" - unfolding rel_KMP_def by (auto intro: wf_same_fst) text \Also expresses the absence of a match, when @{term "r = \a\"}\ definition first_occur :: "'a array \ 'a array \ nat \ bool" where "first_occur p a r = ((r < \a\ \ matches a r p 0 \p\) \ (\k matches a k p 0 \p\))" lemma KMP_correct: - assumes ini: "KMP_invariant p a i j" - defines [simp]: "nxt \ table p" - shows "let (i',j') = search \p\ \a\ nxt p a i j in first_occur p a (if j' = \p\ then i' - \p\ else i')" - using ini -proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "\a\"]]) - case (1 i j) - then have ij: "j \ \p\" "j \ i" "i \ \a\" - and match: "matches a (i - j) p 0 j" - and nomatch: "(\k matches a k p 0 \p\)" - by (auto simp: KMP_invariant_def) - show ?case - proof (cases "j < \p\ \ i < \a\") - case True - have "first_occur p a (if j'' = \p\ then i'' - \p\ else i'')" - if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search \p\ \a\ nxt p a i' j' = (i'',j'')" - for i' j' i'' j'' - proof - - have decreasing: "((i',j'), i, j) \ rel_KMP \a\" - using that is_next_table [of j] True - by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm) - show ?thesis - using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce - qed - with True show ?thesis - by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search.simps) - next - case False - have "False" if "matches a k p 0 \p\" "j < \p\" "i = \a\" for k - proof - - have "\p\+k \ i" - using that by (simp add: matches_def) - with that nomatch show False by auto - qed - with False ij show ?thesis - apply (simp add: first_occur_def split: prod.split) - by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search.simps) - qed + assumes ini: "KMP_invariant p a i j" + defines [simp]: "nxt \ table p" + shows "let (i',j') = the (search \p\ \a\ nxt p a i j) in first_occur p a (if j' = \p\ then i' - \p\ else i')" +proof - + obtain i' j' where \: "while_option (\(i, j). j < \p\ \ i < \a\) (\(i,j). KMP_step p nxt a i j) (i,j) = Some (i', j')" + using search_halts[OF ini] by (auto simp: search_def) + have "KMP_invariant p a i' j'" + using while_option_rule[OF _ \, of "\(i', j'). KMP_invariant p a i' j'"] ini KMP_invariant by fastforce + with \ while_option_stop[OF \] show ?thesis + by (auto simp: search_def KMP_invariant_def first_occur_def matches_def) qed + definition KMP_search :: "'a array \ 'a array \ nat \ nat" where - "KMP_search p a = search \p\ \a\ (table p) p a 0 0" + "KMP_search p a = the (search \p\ \a\ (table p) p a 0 0)" declare KMP_search_def[code] lemma KMP_search: "(i,j) = KMP_search p a \ first_occur p a (if j = \p\ then i - \p\ else i)" unfolding KMP_search_def using KMP_correct[OF KMP_invariant_init[of p a]] by auto subsection \Examples\ definition "Knuth_pattern = array_of_list [1,2,3,1,2,3,1,3,1,2::nat]" value "list_of_array (table Knuth_pattern)" definition "CLR_pattern = array_of_list [1,2,1,2,1,2,1,2,3,1::nat]" value "list_of_array (table CLR_pattern)" definition bad_list :: "nat \ nat list" where "bad_list n = replicate n 0 @ [Suc 0]" definition "bad_pattern = array_of_list (bad_list 1000)" definition "bad_string = array_of_list (bad_list 2000000)" definition "worse_string = array_of_list (replicate 2000000 (0::nat))" definition "lousy_string = array_of_list (concat (replicate 2002 (bad_list 999)))" value "list_of_array (table bad_pattern)" text \A successful search\ value "KMP_search bad_pattern bad_string" lemma "matches bad_string (2000001-1001) bad_pattern 0 1001" by eval text \Unsuccessful searches\ value "KMP_search bad_pattern worse_string" lemma "\k<2000000. \ matches worse_string k bad_pattern 0 1001" by eval value "KMP_search lousy_string bad_string" lemma "\k < \lousy_string\. \ matches lousy_string k bad_pattern 0 1001" by eval +subsection \Alternative approach, by declaring tail-recursive functions\ + +text \Declaring a partial recursive function with the tailrec option +relaxes the need for a termination proof, because a tail-recursive recursion +equation can never cause inconsistency.\ + +subsubsection \The build-table loop and its correctness\ + +partial_function (tailrec) buildtab' :: "'a array \ nat array \ nat \ nat \ nat array" where + "buildtab' p nxt i j = + (if Suc i < \p\ + then let (nxt',i',j') = buildtab_step p nxt i j in buildtab' p nxt' i' j' + else nxt)" +declare buildtab'.simps[code] + +text \Nevertheless, termination must eventually be shown: + to use induction to reason about executions. We do so by defining +a well founded relation. Termination proofs are by well-founded induction.\ + +definition "rel_buildtab m = inv_image (lex_prod (measure (\i. m-i)) (measure id)) snd" + +lemma wf_rel_buildtab: "wf (rel_buildtab m)" + unfolding rel_buildtab_def + by (auto intro: wf_same_fst) + +lemma buildtab_correct': + assumes k: "0 k < \p\" and ini: "buildtab_invariant p nxt i j" + shows "is_next p k (buildtab' p nxt i j !! k)" + using ini +proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) + case (1 nxt i j) + show ?case + proof (cases "Suc i < \p\") + case True + then obtain nxt' i' j' + where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'" + using "1.prems" buildtab_invariant by (metis surj_pair) + then have "j>0 \ nxt'!!j < j" + using "1.prems" + by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) + then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" + using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm) + show ?thesis + using "1.hyps" [OF decreasing invar'] "1.prems" eq True + by(auto simp add: buildtab'.simps[of p nxt] split: prod.splits) + next + case False + with 1 k show ?thesis + by (auto simp: buildtab_invariant_def buildtab'.simps) + qed +qed + +subsubsection \Linearity of @{term buildtab}\ + +partial_function (tailrec) T_buildtab' :: "'a array \ nat array \ nat \ nat \ nat \ nat" where + "T_buildtab' p nxt i j t = + (if Suc i < \p\ + then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab' p nxt' i' j' (Suc t) + else t)" + +lemma T_buildtab_correct': + assumes ini: "buildtab_invariant p nxt i j" + shows "T_buildtab' p nxt i j t \ 2*\p\ - 2*i + j + t" + using ini +proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "\p\"]]) + case 1 + have *: "Suc (T_buildtab' p nxt' i' j' t) \ 2*\p\ - 2*i + j + t" + if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < \p\" + for nxt' i' j' t + proof - + have invar': "buildtab_invariant p nxt' i' j'" + using "1.prems" buildtab_invariant that by fastforce + then have nextj: "j>0 \ nxt'!!j < j" + using eq "1.prems" + by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm) + then have decreasing: "((nxt', i', j'), nxt, i, j) \ rel_buildtab \p\" + using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm) + then have "T_buildtab' p nxt' i' j' t \ 2 * \p\ - 2 * i' + j' + t" + using "1.hyps" invar' by blast + then show ?thesis + using "1.prems" that nextj + by (force simp: T_buildtab'.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm) + qed + show ?case + using * [where t = "Suc t"] by (auto simp add: T_buildtab'.simps split: prod.split) +qed + +lemma T_buildtab_linear': + assumes "2 \ \p\" + shows "T_buildtab' p (array 0 \p\) 1 0 0 \ 2*(\p\ - 1)" + using assms T_buildtab_correct' [OF buildtab_invariant_init, of p 0] by auto + +subsection \The actual string search algorithm\ + +partial_function (tailrec) search' :: "nat \ nat \ nat array \ 'a array \ 'a array \ nat \ nat \ nat * nat" where + "search' m n nxt p a i j = + (if j < m \ i < n then let (i',j') = KMP_step p nxt a i j in search' m n nxt p a i' j' + else (i,j))" +declare search'.simps[code] + +definition "rel_KMP n = lex_prod (measure (\i. n-i)) (measure id)" + +lemma wf_rel_KMP: "wf (rel_KMP n)" + unfolding rel_KMP_def by (auto intro: wf_same_fst) + +lemma KMP_correct': + assumes ini: "KMP_invariant p a i j" + defines [simp]: "nxt \ table p" + shows "let (i',j') = search' \p\ \a\ nxt p a i j in first_occur p a (if j' = \p\ then i' - \p\ else i')" + using ini +proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "\a\"]]) + case (1 i j) + then have ij: "j \ \p\" "j \ i" "i \ \a\" + and match: "matches a (i - j) p 0 j" + and nomatch: "(\k matches a k p 0 \p\)" + by (auto simp: KMP_invariant_def) + show ?case + proof (cases "j < \p\ \ i < \a\") + case True + have "first_occur p a (if j'' = \p\ then i'' - \p\ else i'')" + if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search' \p\ \a\ nxt p a i' j' = (i'',j'')" + for i' j' i'' j'' + proof - + have decreasing: "((i',j'), i, j) \ rel_KMP \a\" + using that is_next_table [of j] True + by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm) + show ?thesis + using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce + qed + with True show ?thesis + by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search'.simps) + next + case False + have "False" if "matches a k p 0 \p\" "j < \p\" "i = \a\" for k + proof - + have "\p\+k \ i" + using that by (simp add: matches_def) + with that nomatch show False by auto + qed + with False ij show ?thesis + apply (simp add: first_occur_def split: prod.split) + by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search'.simps) + qed +qed + end diff --git a/thys/KnuthMorrisPratt/document/root.tex b/thys/KnuthMorrisPratt/document/root.tex --- a/thys/KnuthMorrisPratt/document/root.tex +++ b/thys/KnuthMorrisPratt/document/root.tex @@ -1,51 +1,52 @@ \documentclass[11pt,a4paper]{article} \usepackage[T1]{fontenc} \usepackage{isabelle,isabellesym} \usepackage{amssymb} % this should be the last package used \usepackage{pdfsetup} % urls in roman style, theory text in math-similar italics \urlstyle{rm} \isabellestyle{it} \newcommand{\lenarray}[1]{|#1|} \begin{document} \title{Knuth--Morris--Pratt String Search} \author{Lawrence C. Paulson} \maketitle \begin{abstract} The naive algorithm to search for a pattern $p$ within a string $a$ compares corresponding characters from left to right, and in case of a mismatch, shifts one position along $a$ and starts again. The worst-case time is $O(\lenarray{p}\lenarray{a})$. Knuth--Morris--Pratt~\cite{knuth-fast-pattern} exploits the knowledge gained from the partial match, never re-comparing characters that matched and thereby achieving linear time. At the first mismatched character, it shifts $p$ as far to the right as safely possible. To do so, it consults a precomputed table, based on the pattern~$p$. The KMP algorithm is proved correct. \end{abstract} \newpage \tableofcontents \paragraph*{Acknowledgements} This development closely follows a formal verification of the Knuth--Morris--Pratt algorithm by Jean-Christophe Filliâtre using Why3. +Christian Zimmerer reworked the algorithms and termination proofs to use while loops. Tobias Nipkow made helpful suggestions. \newpage % include generated text of all theories \input{session} \bibliographystyle{abbrv} \bibliography{root} \end{document}