35private $DTopen =
false;
39private $lastParagraph =
'';
45 # State constants for the definition list colon extraction 46privateconst COLON_STATE_TEXT = 0;
47privateconst COLON_STATE_TAG = 1;
48privateconst COLON_STATE_TAGSTART = 2;
49privateconst COLON_STATE_CLOSETAG = 3;
50privateconst COLON_STATE_TAGSLASH = 4;
51privateconst COLON_STATE_COMMENT = 5;
52privateconst COLON_STATE_COMMENTDASH = 6;
53privateconst COLON_STATE_COMMENTDASHDASH = 7;
54privateconst COLON_STATE_LC = 8;
65 $pass =
newself( $text, $lineStart );
66return $pass->execute();
73privatefunction __construct( $text, $lineStart ) {
75 $this->lineStart = $lineStart;
81privatefunction hasOpenParagraph() {
82return $this->lastParagraph !==
'';
91privatefunction closeParagraph( $atTheEnd =
false ) {
93if ( $this->hasOpenParagraph() ) {
94 $result =
'</' . $this->lastParagraph .
'>';
100 $this->lastParagraph =
'';
113privatefunction getCommon( $st1, $st2 ) {
114 $shorter = min( strlen( $st1 ), strlen( $st2 ) );
116for ( $i = 0; $i < $shorter; ++$i ) {
117if ( $st1[$i] !== $st2[$i] ) {
131privatefunction openList( $char ) {
132 $result = $this->closeParagraph();
135 $result .=
"<ul><li>";
136 } elseif ( $char ===
'#' ) {
137 $result .=
"<ol><li>";
138 } elseif ( $char ===
':' ) {
139 $result .=
"<dl><dd>";
140 } elseif ( $char ===
';' ) {
141 $result .=
"<dl><dt>";
144 $result =
'<!-- ERR 1 -->';
156privatefunction nextItem( $char ) {
157if ( $char ===
'*' || $char ===
'#' ) {
159 } elseif ( $char ===
':' || $char ===
';' ) {
161if ( $this->DTopen ) {
166return $close .
'<dt>';
168 $this->DTopen =
false;
169return $close .
'<dd>';
172return'<!-- ERR 2 -->';
181privatefunction closeList( $char ) {
184 } elseif ( $char ===
'#' ) {
186 } elseif ( $char ===
':' ) {
187if ( $this->DTopen ) {
188 $this->DTopen =
false;
194return'<!-- ERR 3 -->';
203privatefunction execute() {
205 # Parsing through the text line by line. The main thing 206 # happening here is handling of block-level elements p, pre, 207 # and making lists from lines starting with * # : etc. 208 $textLines = StringUtils::explode(
"\n", $text );
210 $lastPrefix = $output =
'';
211 $this->DTopen = $inBlockElem =
false;
214 $inBlockquote =
false;
216for ( $textLines->rewind(); $textLines->valid(); ) {
217 $inputLine = $textLines->current();
219 $notLastLine = $textLines->valid();
222if ( !$this->lineStart ) {
223 $output .= $inputLine;
224 $this->lineStart =
true;
232 $lastPrefixLength = strlen( $lastPrefix );
233 $preCloseMatch = preg_match(
'/<\\/pre/i', $inputLine );
234 $preOpenMatch = preg_match(
'/<pre/i', $inputLine );
235 # If not in a <pre> element, scan for and figure out what prefixes are there. 236if ( !$this->inPre ) {
237 # Multiple prefixes may abut each other for nested lists. 238 $prefixLength = strspn( $inputLine,
'*#:;' );
239 $prefix = substr( $inputLine, 0, $prefixLength );
242 # ; and : are both from definition-lists, so they're equivalent 243 # for the purposes of determining whether or not we need to open/close 245 $prefix2 = str_replace(
';',
':', $prefix );
246$t = substr( $inputLine, $prefixLength );
247 $this->inPre = (bool)$preOpenMatch;
249 # Don't interpret any other prefixes in preformatted text 251 $prefix = $prefix2 =
'';
256if ( $prefixLength && $lastPrefix === $prefix2 ) {
257 # Same as the last item, so no need to deal with nesting or opening stuff 258 $output .= $this->nextItem( substr( $prefix, -1 ) );
261if ( substr( $prefix, -1 ) ===
';' ) {
262 # The one nasty exception: definition lists work like this: 263 # ; title : definition text 264 # So we check for : in the remainder text to split up the 265 # title and definition, without b0rking links. 267if ( $this->findColonNoLinks( $t, $term, $t2 ) !==
false ) {
269// Trim whitespace in list items 270 $output .= trim( $term ) . $this->nextItem(
':' );
273 } elseif ( $prefixLength || $lastPrefixLength ) {
274 # We need to open or close prefixes, or both. 276 # Either open or close a level... 277 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
280 # Close all the prefixes which aren't shared. 281while ( $commonPrefixLength < $lastPrefixLength ) {
282// @phan-suppress-next-line PhanTypeInvalidDimOffset 283 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
287 # Continue the current prefix if appropriate. 288if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
289 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
292 # Close an open <dt> if we have a <dd> (":") starting on this line 293if ( $this->DTopen && $commonPrefixLength > 0 && $prefix[$commonPrefixLength - 1] ===
':' ) {
294 $output .= $this->nextItem(
':' );
297 # Open prefixes where appropriate. 298if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
301while ( $prefixLength > $commonPrefixLength ) {
302 $char = $prefix[$commonPrefixLength];
303 $output .= $this->openList( $char );
306 # @todo FIXME: This is dupe of code above 307if ( $this->findColonNoLinks( $t, $term, $t2 ) !==
false ) {
309// Trim whitespace in list items 310 $output .= trim( $term ) . $this->nextItem(
':' );
313 ++$commonPrefixLength;
315if ( !$prefixLength && $lastPrefix ) {
318 $lastPrefix = $prefix2;
321 # If we have no prefixes, go to paragraph mode. 322if ( $prefixLength == 0 ) {
323 # No prefix (not in list)--go to paragraph mode 324 # @todo consider using a stack for nestable elements like span, table and div 326// P-wrapping and indent-pre are suppressed inside, not outside 327 $blockElems =
'table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl';
328// P-wrapping and indent-pre are suppressed outside, not inside 329 $antiBlockElems =
'td|th';
331 $openMatch = preg_match(
333 .
"({$blockElems})|\\/({$antiBlockElems})|" 335 .
'\\/?(tr|caption|dt|dd|li)' 339 $closeMatch = preg_match(
341 .
"\\/({$blockElems})|({$antiBlockElems})|" 343 .
'\\/?(center|blockquote|div|hr|mw:|aside|figure)|' 344// Used as Parser::TOC_PLACEHOLDER 345 .
'meta property="mw:' 350// Any match closes the paragraph, but only when `!$closeMatch` 351// do we enter block mode. The oddities with table rows and 352// cells are to avoid paragraph wrapping in interstitial spaces 353// leading to fostered content. 355if ( $openMatch || $closeMatch ) {
357// Only close the paragraph if we're not inside a <pre> tag, or if 358// that <pre> tag has just been opened 359if ( !$this->inPre || $preOpenMatch ) {
360// @todo T7718: paragraph closed 361 $output .= $this->closeParagraph();
363if ( $preOpenMatch && !$preCloseMatch ) {
367while ( preg_match(
'/<(\\/?)blockquote[\s>]/i', $t,
368 $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
370 $inBlockquote = !$bqMatch[1][0];
// is this a close tag? 371 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
373 $inBlockElem = !$closeMatch;
374 } elseif ( !$inBlockElem && !$this->inPre ) {
375if ( substr( $t, 0, 1 ) ==
' ' 376 && ( $this->lastParagraph ===
'pre' || trim( $t ) !=
'' )
380if ( $this->lastParagraph !==
'pre' ) {
382 $output .= $this->closeParagraph() .
'<pre>';
383 $this->lastParagraph =
'pre';
386 } elseif ( preg_match(
'/^(?:<style\\b[^>]*>.*?<\\/style>\s*|<link\\b[^>]*>\s*)+$/iS', $t ) ) {
387 # T186965: <style> or <link> by itself on a line shouldn't open or close paragraphs. 388 # But it should clear $pendingPTag. 390 $output .= $this->closeParagraph();
395if ( trim( $t ) ===
'' ) {
397 $output .= $pendingPTag .
'<br />';
399 $this->lastParagraph =
'p';
400 } elseif ( $this->lastParagraph !==
'p' ) {
401 $output .= $this->closeParagraph();
404 $pendingPTag =
'</p><p>';
406 } elseif ( $pendingPTag ) {
407 $output .= $pendingPTag;
409 $this->lastParagraph =
'p';
410 } elseif ( $this->lastParagraph !==
'p' ) {
411 $output .= $this->closeParagraph() .
'<p>';
412 $this->lastParagraph =
'p';
417 # somewhere above we forget to get out of pre block (T2785) 418if ( $preCloseMatch && $this->inPre ) {
421if ( $pendingPTag ===
false ) {
422if ( $prefixLength === 0 ) {
424// Add a newline if there's an open paragraph 425// or we've yet to reach the last line. 426if ( $notLastLine || $this->hasOpenParagraph() ) {
430// Trim whitespace in list items 431 $output .= trim( $t );
435while ( $prefixLength ) {
436// @phan-suppress-next-line PhanTypeArraySuspicious $prefix set if $prefixLength is set 437 $output .= $this->closeList( $prefix2[$prefixLength - 1] );
439// Note that a paragraph is only ever opened when `prefixLength` 440// is zero, but we'll choose to be overly cautious. 441if ( !$prefixLength && $this->hasOpenParagraph() ) {
445 $output .= $this->closeParagraph(
true );
458privatefunction findColonNoLinks( $str, &$before, &$after ) {
459if ( !preg_match(
'/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {
464if ( $m[0][0] ===
':' ) {
465 # Easy; no tag nesting to worry about 466 $colonPos = $m[0][1];
467 $before = substr( $str, 0, $colonPos );
468 $after = substr( $str, $colonPos + 1 );
472 # Ugly state machine to walk through avoiding tags. 473 $state = self::COLON_STATE_TEXT;
478 $len = strlen( $str );
479for ( $i = $m[0][1]; $i < $len; $i++ ) {
483case self::COLON_STATE_TEXT:
486 # Could be either a <start> tag or an </end> tag 487 $state = self::COLON_STATE_TAGSTART;
492if ( $ltLevel === 0 ) {
494 $before = substr( $str, 0, $i );
495 $after = substr( $str, $i + 1 );
498 # Embedded in a tag; don't break it. 501 # Skip ahead looking for something interesting 502if ( !preg_match(
'/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
503 # Nothing else interesting 506if ( $m[0][0] ===
'-{' ) {
507 $state = self::COLON_STATE_LC;
511 # Skip ahead to next interesting character. 517case self::COLON_STATE_LC:
518 # In language converter markup -{ ... }- 519if ( !preg_match(
'/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
520 # Nothing else interesting to find; abort! 521 # We're nested in language converter markup, but there 522 # are no close tags left. Abort! 525if ( $m[0][0] ===
'-{' ) {
528 } elseif ( $m[0][0] ===
'}-' ) {
531if ( $lcLevel === 0 ) {
532 $state = self::COLON_STATE_TEXT;
536case self::COLON_STATE_TAG:
543if ( !isset( HTMLData::TAGS[
'void'][strtolower( $tagName )] ) ) {
546 $state = self::COLON_STATE_TEXT;
549 # Slash may be followed by >? 550 $state = self::COLON_STATE_TAGSLASH;
559case self::COLON_STATE_TAGSTART:
562 $state = self::COLON_STATE_CLOSETAG;
565 $state = self::COLON_STATE_COMMENT;
568 # Illegal early close? This shouldn't happen D: 569 $state = self::COLON_STATE_TEXT;
575 $state = self::COLON_STATE_TAG;
578case self::COLON_STATE_CLOSETAG:
584 # ignore the excess close tag, but keep looking for 585 # colons. (This matches Parsoid behavior.) 586wfDebug( __METHOD__ .
": Invalid input; too many close tags" );
588 $state = self::COLON_STATE_TEXT;
591case self::COLON_STATE_TAGSLASH:
593 # Yes, a self-closed tag <blah/> 594 $state = self::COLON_STATE_TEXT;
596 # Probably we're jumping the gun, and this is an attribute 597 $state = self::COLON_STATE_TAG;
600case self::COLON_STATE_COMMENT:
602 $state = self::COLON_STATE_COMMENTDASH;
605case self::COLON_STATE_COMMENTDASH:
607 $state = self::COLON_STATE_COMMENTDASHDASH;
609 $state = self::COLON_STATE_COMMENT;
612case self::COLON_STATE_COMMENTDASHDASH:
614 $state = self::COLON_STATE_TEXT;
616 $state = self::COLON_STATE_COMMENT;
620thrownew LogicException(
"State machine error in " . __METHOD__ );
623if ( $ltLevel > 0 || $lcLevel > 0 ) {
625 __METHOD__ .
": Invalid input; not enough close tags " .
626"(level $ltLevel/$lcLevel, state $state)"