Movatterモバイル変換


[0]ホーム

URL:


Source filesrc/strings/search.go

     1  // Copyright 2012 The Go Authors. All rights reserved.     2  // Use of this source code is governed by a BSD-style     3  // license that can be found in the LICENSE file.     4       5  package strings     6       7  // stringFinder efficiently finds strings in a source text. It's implemented     8  // using the Boyer-Moore string search algorithm:     9  // https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm    10  // https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged    11  // document uses 1-based indexing)    12  type stringFinder struct {    13  // pattern is the string that we are searching for in the text.    14  pattern string    15      16  // badCharSkip[b] contains the distance between the last byte of pattern    17  // and the rightmost occurrence of b in pattern. If b is not in pattern,    18  // badCharSkip[b] is len(pattern).    19  //    20  // Whenever a mismatch is found with byte b in the text, we can safely    21  // shift the matching frame at least badCharSkip[b] until the next time    22  // the matching char could be in alignment.    23  badCharSkip [256]int    24      25  // goodSuffixSkip[i] defines how far we can shift the matching frame given    26  // that the suffix pattern[i+1:] matches, but the byte pattern[i] does    27  // not. There are two cases to consider:    28  //    29  // 1. The matched suffix occurs elsewhere in pattern (with a different    30  // byte preceding it that we might possibly match). In this case, we can    31  // shift the matching frame to align with the next suffix chunk. For    32  // example, the pattern "mississi" has the suffix "issi" next occurring    33  // (in right-to-left order) at index 1, so goodSuffixSkip[3] ==    34  // shift+len(suffix) == 3+4 == 7.    35  //    36  // 2. If the matched suffix does not occur elsewhere in pattern, then the    37  // matching frame may share part of its prefix with the end of the    38  // matching suffix. In this case, goodSuffixSkip[i] will contain how far    39  // to shift the frame to align this portion of the prefix to the    40  // suffix. For example, in the pattern "abcxxxabc", when the first    41  // mismatch from the back is found to be in position 3, the matching    42  // suffix "xxabc" is not found elsewhere in the pattern. However, its    43  // rightmost "abc" (at position 6) is a prefix of the whole pattern, so    44  // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.    45  goodSuffixSkip []int    46  }    47      48  func makeStringFinder(pattern string) *stringFinder {    49  f := &stringFinder{    50  pattern:        pattern,    51  goodSuffixSkip: make([]int, len(pattern)),    52  }    53  // last is the index of the last character in the pattern.    54  last := len(pattern) - 1    55      56  // Build bad character table.    57  // Bytes not in the pattern can skip one pattern's length.    58  for i := range f.badCharSkip {    59  f.badCharSkip[i] = len(pattern)    60  }    61  // The loop condition is < instead of <= so that the last byte does not    62  // have a zero distance to itself. Finding this byte out of place implies    63  // that it is not in the last position.    64  for i := 0; i < last; i++ {    65  f.badCharSkip[pattern[i]] = last - i    66  }    67      68  // Build good suffix table.    69  // First pass: set each value to the next index which starts a prefix of    70  // pattern.    71  lastPrefix := last    72  for i := last; i >= 0; i-- {    73  if HasPrefix(pattern, pattern[i+1:]) {    74  lastPrefix = i + 1    75  }    76  // lastPrefix is the shift, and (last-i) is len(suffix).    77  f.goodSuffixSkip[i] = lastPrefix + last - i    78  }    79  // Second pass: find repeats of pattern's suffix starting from the front.    80  for i := 0; i < last; i++ {    81  lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])    82  if pattern[i-lenSuffix] != pattern[last-lenSuffix] {    83  // (last-i) is the shift, and lenSuffix is len(suffix).    84  f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i    85  }    86  }    87      88  return f    89  }    90      91  func longestCommonSuffix(a, b string) (i int) {    92  for ; i < len(a) && i < len(b); i++ {    93  if a[len(a)-1-i] != b[len(b)-1-i] {    94  break    95  }    96  }    97  return    98  }    99     100  // next returns the index in text of the first occurrence of the pattern. If   101  // the pattern is not found, it returns -1.   102  func (f *stringFinder) next(text string) int {   103  i := len(f.pattern) - 1   104  for i < len(text) {   105  // Compare backwards from the end until the first unmatching character.   106  j := len(f.pattern) - 1   107  for j >= 0 && text[i] == f.pattern[j] {   108  i--   109  j--   110  }   111  if j < 0 {   112  return i + 1// match   113  }   114  i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])   115  }   116  return -1   117  }   118  

View as plain text

go.dev uses cookies from Google to deliver and enhance the quality of its services and to analyze traffic.Learn more.

[8]ページ先頭

©2009-2025 Movatter.jp