@@ -1702,28 +1702,73 @@ ve.dm.Document.prototype.findText = function ( query, options = {} ) {
17021702return [ ] ;
17031703}
17041704
1705+ let normalizedQuery = query ;
17051706if ( ! options . caseSensitiveString ) {
1706- query = new Set ( Array . from ( query ) . map ( ( s ) => s . toLocaleLowerCase ( this . lang ) ) ) ;
1707+ normalizedQuery = new Set ( Array . from ( query ) . map ( ( s ) => s . toLocaleLowerCase ( this . lang ) ) ) ;
17071708}
17081709
17091710let minLen = Infinity ,
17101711maxLen = 0 ;
1711- query . forEach ( ( s ) => {
1712+ normalizedQuery . forEach ( ( s ) => {
17121713minLen = Math . min ( minLen , s . length ) ;
17131714maxLen = Math . max ( maxLen , s . length ) ;
17141715} ) ;
17151716
1717+ /**
1718+ * Map from offset in case-folded string to offset in original string
1719+ * In some cases, case-folding can change string length
1720+ * For example, if s = '\u0130', then s.length === 1 but s.toLocaleLowerCase( 'en' ).length === 2
1721+ *
1722+ *@param {string } s
1723+ *@param {number } offsetLower in lowercased string
1724+ *@return {number } corresponding offset in original string
1725+ */
1726+ const fixOffset = function ( s , offsetLower ) {
1727+ // Start by guessing that lowercasing didn't change the offset,
1728+ // except when the offset is out of bounds in the original string
1729+ let guess = Math . min ( offsetLower , s . length ) ;
1730+
1731+ let diff = s . slice ( 0 , guess ) . toLocaleLowerCase ( this . lang ) . length - offsetLower ;
1732+ if ( diff === 0 ) {
1733+ // Optimization note: this will almost always be true
1734+ // Only rare characters change length of substr when case folding
1735+ return guess ;
1736+ }
1737+
1738+ while ( diff > 0 ) {
1739+ // The lowercase substr is longer than original
1740+ guess -- ;
1741+ diff = s . slice ( 0 , guess ) . toLocaleLowerCase ( this . lang ) . length - offsetLower ;
1742+ }
1743+
1744+ while ( diff < 0 ) {
1745+ // The lowercase substr is shorter than original
1746+ guess ++ ;
1747+ diff = s . slice ( 0 , guess ) . toLocaleLowerCase ( this . lang ) . length - offsetLower ;
1748+ }
1749+ // In some rare situations the diff might be positive now
1750+ // (which would correspond to no offset in the original string mapping to the desired offset)
1751+ return guess ;
1752+ } ;
1753+
17161754data . forEachRunOfContent ( searchRange , ( off , line ) => {
1755+ let normalizedLine = line ;
17171756if ( ! options . caseSensitiveString ) {
1718- line = line . toLocaleLowerCase ( this . lang ) ;
1757+ normalizedLine = line . toLocaleLowerCase ( this . lang ) ;
17191758}
17201759
17211760// For each possible length, do a sliding window search on the normalized line
17221761for ( let len = minLen ; len <= maxLen ; len ++ ) {
1723- for ( let i = 0 ; i <= line . length - len ; i ++ ) {
1724- const substr = line . slice ( i , i + len ) ;
1725- if ( query . has ( substr ) ) {
1726- ranges . push ( new ve . Range ( off + i , off + i + len ) ) ;
1762+ for ( let i = 0 ; i <= normalizedLine . length - len ; i ++ ) {
1763+ const substr = normalizedLine . slice ( i , i + len ) ;
1764+ if ( normalizedQuery . has ( substr ) ) {
1765+ let start = i ;
1766+ let end = i + len ;
1767+ if ( ! options . caseSensitiveString ) {
1768+ start = fixOffset ( line , start ) ;
1769+ end = fixOffset ( line , end ) ;
1770+ }
1771+ ranges . push ( new ve . Range ( off + start , off + end ) ) ;
17271772if ( options . noOverlaps ) {
17281773i += len - 1 ;
17291774}