Movatterモバイル変換


[0]ホーム

URL:


MediaWiki master
BlockLevelPass.php
Go to the documentation of this file.
1<?php
2
27namespaceMediaWiki\Parser;
28
29use LogicException;
30use Wikimedia\RemexHtml\HTMLData;
31useWikimedia\StringUtils\StringUtils;
32
33classBlockLevelPass {
35private $DTopen =false;
37private $inPre =false;
39private $lastParagraph ='';
41private $lineStart;
43private $text;
44
45 # State constants for the definition list colon extraction
46privateconst COLON_STATE_TEXT = 0;
47privateconst COLON_STATE_TAG = 1;
48privateconst COLON_STATE_TAGSTART = 2;
49privateconst COLON_STATE_CLOSETAG = 3;
50privateconst COLON_STATE_TAGSLASH = 4;
51privateconst COLON_STATE_COMMENT = 5;
52privateconst COLON_STATE_COMMENTDASH = 6;
53privateconst COLON_STATE_COMMENTDASHDASH = 7;
54privateconst COLON_STATE_LC = 8;
55
64publicstaticfunctiondoBlockLevels( $text, $lineStart ) {
65 $pass =newself( $text, $lineStart );
66return $pass->execute();
67 }
68
73privatefunction __construct( $text, $lineStart ) {
74 $this->text = $text;
75 $this->lineStart = $lineStart;
76 }
77
81privatefunction hasOpenParagraph() {
82return $this->lastParagraph !=='';
83 }
84
91privatefunction closeParagraph( $atTheEnd =false ) {
92 $result ='';
93if ( $this->hasOpenParagraph() ) {
94 $result ='</' . $this->lastParagraph .'>';
95if ( !$atTheEnd ) {
96 $result .="\n";
97 }
98 }
99 $this->inPre =false;
100 $this->lastParagraph ='';
101return $result;
102 }
103
113privatefunction getCommon( $st1, $st2 ) {
114 $shorter = min( strlen( $st1 ), strlen( $st2 ) );
115
116for ( $i = 0; $i < $shorter; ++$i ) {
117if ( $st1[$i] !== $st2[$i] ) {
118break;
119 }
120 }
121return $i;
122 }
123
131privatefunction openList( $char ) {
132 $result = $this->closeParagraph();
133
134if ( $char ==='*' ) {
135 $result .="<ul><li>";
136 } elseif ( $char ==='#' ) {
137 $result .="<ol><li>";
138 } elseif ( $char ===':' ) {
139 $result .="<dl><dd>";
140 } elseif ( $char ===';' ) {
141 $result .="<dl><dt>";
142 $this->DTopen =true;
143 }else {
144 $result ='<!-- ERR 1 -->';
145 }
146
147return $result;
148 }
149
156privatefunction nextItem( $char ) {
157if ( $char ==='*' || $char ==='#' ) {
158return"</li>\n<li>";
159 } elseif ( $char ===':' || $char ===';' ) {
160 $close ="</dd>\n";
161if ( $this->DTopen ) {
162 $close ="</dt>\n";
163 }
164if ( $char ===';' ) {
165 $this->DTopen =true;
166return $close .'<dt>';
167 }else {
168 $this->DTopen =false;
169return $close .'<dd>';
170 }
171 }
172return'<!-- ERR 2 -->';
173 }
174
181privatefunction closeList( $char ) {
182if ( $char ==='*' ) {
183 $text ="</li></ul>";
184 } elseif ( $char ==='#' ) {
185 $text ="</li></ol>";
186 } elseif ( $char ===':' ) {
187if ( $this->DTopen ) {
188 $this->DTopen =false;
189 $text ="</dt></dl>";
190 }else {
191 $text ="</dd></dl>";
192 }
193 }else {
194return'<!-- ERR 3 -->';
195 }
196return $text;
197 }
198
203privatefunction execute() {
204 $text = $this->text;
205 # Parsing through the text line by line. The main thing
206 # happening here is handling of block-level elements p, pre,
207 # and making lists from lines starting with * # : etc.
208 $textLines = StringUtils::explode("\n", $text );
209
210 $lastPrefix = $output ='';
211 $this->DTopen = $inBlockElem =false;
212 $prefixLength = 0;
213 $pendingPTag =false;
214 $inBlockquote =false;
215
216for ( $textLines->rewind(); $textLines->valid(); ) {
217 $inputLine = $textLines->current();
218 $textLines->next();
219 $notLastLine = $textLines->valid();
220
221 # Fix up $lineStart
222if ( !$this->lineStart ) {
223 $output .= $inputLine;
224 $this->lineStart =true;
225continue;
226 }
227 # * = ul
228 # # = ol
229 # ; = dt
230 # : = dd
231
232 $lastPrefixLength = strlen( $lastPrefix );
233 $preCloseMatch = preg_match('/<\\/pre/i', $inputLine );
234 $preOpenMatch = preg_match('/<pre/i', $inputLine );
235 # If not in a <pre> element, scan for and figure out what prefixes are there.
236if ( !$this->inPre ) {
237 # Multiple prefixes may abut each other for nested lists.
238 $prefixLength = strspn( $inputLine,'*#:;' );
239 $prefix = substr( $inputLine, 0, $prefixLength );
240
241 # eh?
242 # ; and : are both from definition-lists, so they're equivalent
243 # for the purposes of determining whether or not we need to open/close
244 # elements.
245 $prefix2 = str_replace(';',':', $prefix );
246$t = substr( $inputLine, $prefixLength );
247 $this->inPre = (bool)$preOpenMatch;
248 }else {
249 # Don't interpret any other prefixes in preformatted text
250 $prefixLength = 0;
251 $prefix = $prefix2 ='';
252$t = $inputLine;
253 }
254
255 # List generation
256if ( $prefixLength && $lastPrefix === $prefix2 ) {
257 # Same as the last item, so no need to deal with nesting or opening stuff
258 $output .= $this->nextItem( substr( $prefix, -1 ) );
259 $pendingPTag =false;
260
261if ( substr( $prefix, -1 ) ===';' ) {
262 # The one nasty exception: definition lists work like this:
263 # ; title : definition text
264 # So we check for : in the remainder text to split up the
265 # title and definition, without b0rking links.
266 $term = $t2 ='';
267if ( $this->findColonNoLinks( $t, $term, $t2 ) !==false ) {
268$t = $t2;
269// Trim whitespace in list items
270 $output .= trim( $term ) . $this->nextItem(':' );
271 }
272 }
273 } elseif ( $prefixLength || $lastPrefixLength ) {
274 # We need to open or close prefixes, or both.
275
276 # Either open or close a level...
277 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
278 $pendingPTag =false;
279
280 # Close all the prefixes which aren't shared.
281while ( $commonPrefixLength < $lastPrefixLength ) {
282// @phan-suppress-next-line PhanTypeInvalidDimOffset
283 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
284 --$lastPrefixLength;
285 }
286
287 # Continue the current prefix if appropriate.
288if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
289 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
290 }
291
292 # Close an open <dt> if we have a <dd> (":") starting on this line
293if ( $this->DTopen && $commonPrefixLength > 0 && $prefix[$commonPrefixLength - 1] ===':' ) {
294 $output .= $this->nextItem(':' );
295 }
296
297 # Open prefixes where appropriate.
298if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
299 $output .="\n";
300 }
301while ( $prefixLength > $commonPrefixLength ) {
302 $char = $prefix[$commonPrefixLength];
303 $output .= $this->openList( $char );
304
305if ( $char ===';' ) {
306 # @todo FIXME: This is dupe of code above
307if ( $this->findColonNoLinks( $t, $term, $t2 ) !==false ) {
308$t = $t2;
309// Trim whitespace in list items
310 $output .= trim( $term ) . $this->nextItem(':' );
311 }
312 }
313 ++$commonPrefixLength;
314 }
315if ( !$prefixLength && $lastPrefix ) {
316 $output .="\n";
317 }
318 $lastPrefix = $prefix2;
319 }
320
321 # If we have no prefixes, go to paragraph mode.
322if ( $prefixLength == 0 ) {
323 # No prefix (not in list)--go to paragraph mode
324 # @todo consider using a stack for nestable elements like span, table and div
325
326// P-wrapping and indent-pre are suppressed inside, not outside
327 $blockElems ='table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl';
328// P-wrapping and indent-pre are suppressed outside, not inside
329 $antiBlockElems ='td|th';
330
331 $openMatch = preg_match(
332'/<('
333 ."({$blockElems})|\\/({$antiBlockElems})|"
334// Always suppresses
335 .'\\/?(tr|caption|dt|dd|li)'
336 .')\\b/iS',
337 $t
338 );
339 $closeMatch = preg_match(
340'/<('
341 ."\\/({$blockElems})|({$antiBlockElems})|"
342// Never suppresses
343 .'\\/?(center|blockquote|div|hr|mw:|aside|figure)|'
344// Used as Parser::TOC_PLACEHOLDER
345 .'meta property="mw:'
346 .')\\b/iS',
347 $t
348 );
349
350// Any match closes the paragraph, but only when `!$closeMatch`
351// do we enter block mode. The oddities with table rows and
352// cells are to avoid paragraph wrapping in interstitial spaces
353// leading to fostered content.
354
355if ( $openMatch || $closeMatch ) {
356 $pendingPTag =false;
357// Only close the paragraph if we're not inside a <pre> tag, or if
358// that <pre> tag has just been opened
359if ( !$this->inPre || $preOpenMatch ) {
360// @todo T7718: paragraph closed
361 $output .= $this->closeParagraph();
362 }
363if ( $preOpenMatch && !$preCloseMatch ) {
364 $this->inPre =true;
365 }
366 $bqOffset = 0;
367while ( preg_match('/<(\\/?)blockquote[\s>]/i', $t,
368 $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
369 ) {
370 $inBlockquote = !$bqMatch[1][0];// is this a close tag?
371 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
372 }
373 $inBlockElem = !$closeMatch;
374 } elseif ( !$inBlockElem && !$this->inPre ) {
375if ( substr( $t, 0, 1 ) ==' '
376 && ( $this->lastParagraph ==='pre' || trim( $t ) !='' )
377 && !$inBlockquote
378 ) {
379 # pre
380if ( $this->lastParagraph !=='pre' ) {
381 $pendingPTag =false;
382 $output .= $this->closeParagraph() .'<pre>';
383 $this->lastParagraph ='pre';
384 }
385$t = substr( $t, 1 );
386 } elseif ( preg_match('/^(?:<style\\b[^>]*>.*?<\\/style>\s*|<link\\b[^>]*>\s*)+$/iS', $t ) ) {
387 # T186965: <style> or <link> by itself on a line shouldn't open or close paragraphs.
388 # But it should clear $pendingPTag.
389if ( $pendingPTag ) {
390 $output .= $this->closeParagraph();
391 $pendingPTag =false;
392 }
393 }else {
394 # paragraph
395if ( trim( $t ) ==='' ) {
396if ( $pendingPTag ) {
397 $output .= $pendingPTag .'<br />';
398 $pendingPTag =false;
399 $this->lastParagraph ='p';
400 } elseif ( $this->lastParagraph !=='p' ) {
401 $output .= $this->closeParagraph();
402 $pendingPTag ='<p>';
403 }else {
404 $pendingPTag ='</p><p>';
405 }
406 } elseif ( $pendingPTag ) {
407 $output .= $pendingPTag;
408 $pendingPTag =false;
409 $this->lastParagraph ='p';
410 } elseif ( $this->lastParagraph !=='p' ) {
411 $output .= $this->closeParagraph() .'<p>';
412 $this->lastParagraph ='p';
413 }
414 }
415 }
416 }
417 # somewhere above we forget to get out of pre block (T2785)
418if ( $preCloseMatch && $this->inPre ) {
419 $this->inPre =false;
420 }
421if ( $pendingPTag ===false ) {
422if ( $prefixLength === 0 ) {
423 $output .=$t;
424// Add a newline if there's an open paragraph
425// or we've yet to reach the last line.
426if ( $notLastLine || $this->hasOpenParagraph() ) {
427 $output .="\n";
428 }
429 }else {
430// Trim whitespace in list items
431 $output .= trim( $t );
432 }
433 }
434 }
435while ( $prefixLength ) {
436// @phan-suppress-next-line PhanTypeArraySuspicious $prefix set if $prefixLength is set
437 $output .= $this->closeList( $prefix2[$prefixLength - 1] );
438 --$prefixLength;
439// Note that a paragraph is only ever opened when `prefixLength`
440// is zero, but we'll choose to be overly cautious.
441if ( !$prefixLength && $this->hasOpenParagraph() ) {
442 $output .="\n";
443 }
444 }
445 $output .= $this->closeParagraph(true );
446return $output;
447 }
448
458privatefunction findColonNoLinks( $str, &$before, &$after ) {
459if ( !preg_match('/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {
460 # Nothing to find!
461returnfalse;
462 }
463
464if ( $m[0][0] ===':' ) {
465 # Easy; no tag nesting to worry about
466 $colonPos = $m[0][1];
467 $before = substr( $str, 0, $colonPos );
468 $after = substr( $str, $colonPos + 1 );
469return $colonPos;
470 }
471
472 # Ugly state machine to walk through avoiding tags.
473 $state = self::COLON_STATE_TEXT;
474 $ltLevel = 0;
475 $lcLevel = 0;
476 $captureName =false;
477 $tagName ='';
478 $len = strlen( $str );
479for ( $i = $m[0][1]; $i < $len; $i++ ) {
480 $c = $str[$i];
481
482switch ( $state ) {
483case self::COLON_STATE_TEXT:
484switch ( $c ) {
485case"<":
486 # Could be either a <start> tag or an </end> tag
487 $state = self::COLON_STATE_TAGSTART;
488 $captureName =true;
489 $tagName ='';
490break;
491case":":
492if ( $ltLevel === 0 ) {
493 # We found it!
494 $before = substr( $str, 0, $i );
495 $after = substr( $str, $i + 1 );
496return $i;
497 }
498 # Embedded in a tag; don't break it.
499break;
500default:
501 # Skip ahead looking for something interesting
502if ( !preg_match('/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
503 # Nothing else interesting
504returnfalse;
505 }
506if ( $m[0][0] ==='-{' ) {
507 $state = self::COLON_STATE_LC;
508 $lcLevel++;
509 $i = $m[0][1] + 1;
510 }else {
511 # Skip ahead to next interesting character.
512 $i = $m[0][1] - 1;
513 }
514break;
515 }
516break;
517case self::COLON_STATE_LC:
518 # In language converter markup -{ ... }-
519if ( !preg_match('/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {
520 # Nothing else interesting to find; abort!
521 # We're nested in language converter markup, but there
522 # are no close tags left. Abort!
523break 2;
524 }
525if ( $m[0][0] ==='-{' ) {
526 $i = $m[0][1] + 1;
527 $lcLevel++;
528 } elseif ( $m[0][0] ==='}-' ) {
529 $i = $m[0][1] + 1;
530 $lcLevel--;
531if ( $lcLevel === 0 ) {
532 $state = self::COLON_STATE_TEXT;
533 }
534 }
535break;
536case self::COLON_STATE_TAG:
537 # In a <tag>
538switch ( $c ) {
539case" ":
540 $captureName =false;
541break;
542case">":
543if ( !isset( HTMLData::TAGS['void'][strtolower( $tagName )] ) ) {
544 $ltLevel++;
545 }
546 $state = self::COLON_STATE_TEXT;
547break;
548case"/":
549 # Slash may be followed by >?
550 $state = self::COLON_STATE_TAGSLASH;
551break;
552default:
553if ( $captureName ) {
554 $tagName .= $c;
555 }
556 # ignore
557 }
558break;
559case self::COLON_STATE_TAGSTART:
560switch ( $c ) {
561case"/":
562 $state = self::COLON_STATE_CLOSETAG;
563break;
564case"!":
565 $state = self::COLON_STATE_COMMENT;
566break;
567case">":
568 # Illegal early close? This shouldn't happen D:
569 $state = self::COLON_STATE_TEXT;
570break;
571default:
572if ( $captureName ) {
573 $tagName .= $c;
574 }
575 $state = self::COLON_STATE_TAG;
576 }
577break;
578case self::COLON_STATE_CLOSETAG:
579 # In a </tag>
580if ( $c ===">" ) {
581if ( $ltLevel > 0 ) {
582 $ltLevel--;
583 }else {
584 # ignore the excess close tag, but keep looking for
585 # colons. (This matches Parsoid behavior.)
586wfDebug( __METHOD__ .": Invalid input; too many close tags" );
587 }
588 $state = self::COLON_STATE_TEXT;
589 }
590break;
591case self::COLON_STATE_TAGSLASH:
592if ( $c ===">" ) {
593 # Yes, a self-closed tag <blah/>
594 $state = self::COLON_STATE_TEXT;
595 }else {
596 # Probably we're jumping the gun, and this is an attribute
597 $state = self::COLON_STATE_TAG;
598 }
599break;
600case self::COLON_STATE_COMMENT:
601if ( $c ==="-" ) {
602 $state = self::COLON_STATE_COMMENTDASH;
603 }
604break;
605case self::COLON_STATE_COMMENTDASH:
606if ( $c ==="-" ) {
607 $state = self::COLON_STATE_COMMENTDASHDASH;
608 }else {
609 $state = self::COLON_STATE_COMMENT;
610 }
611break;
612case self::COLON_STATE_COMMENTDASHDASH:
613if ( $c ===">" ) {
614 $state = self::COLON_STATE_TEXT;
615 }else {
616 $state = self::COLON_STATE_COMMENT;
617 }
618break;
619default:
620thrownew LogicException("State machine error in " . __METHOD__ );
621 }
622 }
623if ( $ltLevel > 0 || $lcLevel > 0 ) {
624wfDebug(
625 __METHOD__ .": Invalid input; not enough close tags " .
626"(level $ltLevel/$lcLevel, state $state)"
627 );
628 }
629returnfalse;
630 }
631}
632
634class_alias( BlockLevelPass::class,'BlockLevelPass' );
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
DefinitionGlobalFunctions.php:671
MediaWiki\Parser\BlockLevelPass
DefinitionBlockLevelPass.php:33
MediaWiki\Parser\BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
DefinitionBlockLevelPass.php:64
Wikimedia\StringUtils\StringUtils
A collection of static methods to play with strings.
DefinitionStringUtils.php:35
MediaWiki\Parser\CoreParserFunctions\null\$t
$t
DefinitionCoreParserFunctions.php:713
MediaWiki\Parser
DefinitionBlockLevelPass.php:27

[8]ページ先頭

©2009-2025 Movatter.jp