Movatterモバイル変換

BlockLevelPass.php

Go to the documentation of this file.

1<?php

27namespaceMediaWiki\Parser;

29use LogicException;

30use Wikimedia\RemexHtml\HTMLData;

31useWikimedia\StringUtils\StringUtils;

33classBlockLevelPass {

35private $DTopen =false;

37private $inPre =false;

39private $lastParagraph ='';

41private $lineStart;

43private $text;

45 # State constants for the definition list colon extraction

46privateconst COLON_STATE_TEXT = 0;

47privateconst COLON_STATE_TAG = 1;

48privateconst COLON_STATE_TAGSTART = 2;

49privateconst COLON_STATE_CLOSETAG = 3;

50privateconst COLON_STATE_TAGSLASH = 4;

51privateconst COLON_STATE_COMMENT = 5;

52privateconst COLON_STATE_COMMENTDASH = 6;

53privateconst COLON_STATE_COMMENTDASHDASH = 7;

54privateconst COLON_STATE_LC = 8;

64publicstaticfunctiondoBlockLevels( $text, $lineStart ) {

65 $pass =newself( $text, $lineStart );

66return $pass->execute();

67 }

73privatefunction __construct( $text, $lineStart ) {

74 $this->text = $text;

75 $this->lineStart = $lineStart;

76 }

81privatefunction hasOpenParagraph() {

82return $this->lastParagraph !=='';

83 }

91privatefunction closeParagraph( $atTheEnd =false ) {

92 $result ='';

93if ( $this->hasOpenParagraph() ) {

94 $result ='</' . $this->lastParagraph .'>';

95if ( !$atTheEnd ) {

96 $result .="\n";

97 }

98 }

99 $this->inPre =false;

100 $this->lastParagraph ='';

101return $result;

102 }

103

113privatefunction getCommon( $st1, $st2 ) {

114 $shorter = min( strlen( $st1 ), strlen( $st2 ) );

115

116for ( $i = 0; $i < $shorter; ++$i ) {

117if ( $st1[$i] !== $st2[$i] ) {

118break;

119 }

120 }

121return $i;

122 }

123

131privatefunction openList( $char ) {

132 $result = $this->closeParagraph();

133

134if ( $char ==='*' ) {

135 $result .="<ul><li>";

136 } elseif ( $char ==='#' ) {

137 $result .="<ol><li>";

138 } elseif ( $char ===':' ) {

139 $result .="<dl><dd>";

140 } elseif ( $char ===';' ) {

141 $result .="<dl><dt>";

142 $this->DTopen =true;

143 }else {

144 $result ='';

145 }

146

147return $result;

148 }

149

156privatefunction nextItem( $char ) {

157if ( $char ==='*' || $char ==='#' ) {

158return"</li>\n<li>";

159 } elseif ( $char ===':' || $char ===';' ) {

160 $close ="</dd>\n";

161if ( $this->DTopen ) {

162 $close ="</dt>\n";

163 }

164if ( $char ===';' ) {

165 $this->DTopen =true;

166return $close .'<dt>';

167 }else {

168 $this->DTopen =false;

169return $close .'<dd>';

170 }

171 }

172return'';

173 }

174

181privatefunction closeList( $char ) {

182if ( $char ==='*' ) {

183 $text ="</li></ul>";

184 } elseif ( $char ==='#' ) {

185 $text ="</li></ol>";

186 } elseif ( $char ===':' ) {

187if ( $this->DTopen ) {

188 $this->DTopen =false;

189 $text ="</dt></dl>";

190 }else {

191 $text ="</dd></dl>";

192 }

193 }else {

194return'';

195 }

196return $text;

197 }

198

203privatefunction execute() {

204 $text = $this->text;

205 # Parsing through the text line by line. The main thing

206 # happening here is handling of block-level elements p, pre,

207 # and making lists from lines starting with * # : etc.

208 $textLines = StringUtils::explode("\n", $text );

209

210 $lastPrefix = $output ='';

211 $this->DTopen = $inBlockElem =false;

212 $prefixLength = 0;

213 $pendingPTag =false;

214 $inBlockquote =false;

215

216for ( $textLines->rewind(); $textLines->valid(); ) {

217 $inputLine = $textLines->current();

218 $textLines->next();

219 $notLastLine = $textLines->valid();

220

221 # Fix up $lineStart

222if ( !$this->lineStart ) {

223 $output .= $inputLine;

224 $this->lineStart =true;

225continue;

226 }

227 # * = ul

228 # # = ol

229 # ; = dt

230 # : = dd

231

232 $lastPrefixLength = strlen( $lastPrefix );

233 $preCloseMatch = preg_match('/<\\/pre/i', $inputLine );

234 $preOpenMatch = preg_match('/<pre/i', $inputLine );

235 # If not in a <pre> element, scan for and figure out what prefixes are there.

236if ( !$this->inPre ) {

237 # Multiple prefixes may abut each other for nested lists.

238 $prefixLength = strspn( $inputLine,'*#:;' );

239 $prefix = substr( $inputLine, 0, $prefixLength );

240

241 # eh?

242 # ; and : are both from definition-lists, so they're equivalent

243 # for the purposes of determining whether or not we need to open/close

244 # elements.

245 $prefix2 = str_replace(';',':', $prefix );

246$t = substr( $inputLine, $prefixLength );

247 $this->inPre = (bool)$preOpenMatch;

248 }else {

249 # Don't interpret any other prefixes in preformatted text

250 $prefixLength = 0;

251 $prefix = $prefix2 ='';

252$t = $inputLine;

253 }

254

255 # List generation

256if ( $prefixLength && $lastPrefix === $prefix2 ) {

257 # Same as the last item, so no need to deal with nesting or opening stuff

258 $output .= $this->nextItem( substr( $prefix, -1 ) );

259 $pendingPTag =false;

260

261if ( substr( $prefix, -1 ) ===';' ) {

262 # The one nasty exception: definition lists work like this:

263 # ; title : definition text

264 # So we check for : in the remainder text to split up the

265 # title and definition, without b0rking links.

266 $term = $t2 ='';

267if ( $this->findColonNoLinks( $t, $term, $t2 ) !==false ) {

268$t = $t2;

269// Trim whitespace in list items

270 $output .= trim( $term ) . $this->nextItem(':' );

271 }

272 }

273 } elseif ( $prefixLength || $lastPrefixLength ) {

274 # We need to open or close prefixes, or both.

275

276 # Either open or close a level...

277 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );

278 $pendingPTag =false;

279

280 # Close all the prefixes which aren't shared.

281while ( $commonPrefixLength < $lastPrefixLength ) {

282// @phan-suppress-next-line PhanTypeInvalidDimOffset

283 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );

284 --$lastPrefixLength;

285 }

286

287 # Continue the current prefix if appropriate.

288if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {

289 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );

290 }

291

292 # Close an open <dt> if we have a <dd> (":") starting on this line

293if ( $this->DTopen && $commonPrefixLength > 0 && $prefix[$commonPrefixLength - 1] ===':' ) {

294 $output .= $this->nextItem(':' );

295 }

296

297 # Open prefixes where appropriate.

298if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {

299 $output .="\n";

300 }

301while ( $prefixLength > $commonPrefixLength ) {

302 $char = $prefix[$commonPrefixLength];

303 $output .= $this->openList( $char );

304

305if ( $char ===';' ) {

306 # @todo FIXME: This is dupe of code above

307if ( $this->findColonNoLinks( $t, $term, $t2 ) !==false ) {

308$t = $t2;

309// Trim whitespace in list items

310 $output .= trim( $term ) . $this->nextItem(':' );

311 }

312 }

313 ++$commonPrefixLength;

314 }

315if ( !$prefixLength && $lastPrefix ) {

316 $output .="\n";

317 }

318 $lastPrefix = $prefix2;

319 }

320

321 # If we have no prefixes, go to paragraph mode.

322if ( $prefixLength == 0 ) {

323 # No prefix (not in list)--go to paragraph mode

324 # @todo consider using a stack for nestable elements like span, table and div

325

326// P-wrapping and indent-pre are suppressed inside, not outside

327 $blockElems ='table|h1|h2|h3|h4|h5|h6|pre|p|ul|ol|dl';

328// P-wrapping and indent-pre are suppressed outside, not inside

329 $antiBlockElems ='td|th';

330

331 $openMatch = preg_match(

332'/<('

333 ."({$blockElems})|\\/({$antiBlockElems})|"

334// Always suppresses

335 .'\\/?(tr|caption|dt|dd|li)'

336 .')\\b/iS',

337 $t

338 );

339 $closeMatch = preg_match(

340'/<('

341 ."\\/({$blockElems})|({$antiBlockElems})|"

342// Never suppresses

343 .'\\/?(center|blockquote|div|hr|mw:|aside|figure)|'

344// Used as Parser::TOC_PLACEHOLDER

345 .'meta property="mw:'

346 .')\\b/iS',

347 $t

348 );

349

350// Any match closes the paragraph, but only when `!$closeMatch`

351// do we enter block mode. The oddities with table rows and

352// cells are to avoid paragraph wrapping in interstitial spaces

353// leading to fostered content.

354

355if ( $openMatch || $closeMatch ) {

356 $pendingPTag =false;

357// Only close the paragraph if we're not inside a <pre> tag, or if

358// that <pre> tag has just been opened

359if ( !$this->inPre || $preOpenMatch ) {

360// @todo T7718: paragraph closed

361 $output .= $this->closeParagraph();

362 }

363if ( $preOpenMatch && !$preCloseMatch ) {

364 $this->inPre =true;

365 }

366 $bqOffset = 0;

367while ( preg_match('/<(\\/?)blockquote[\s>]/i', $t,

368 $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )

369 ) {

370 $inBlockquote = !$bqMatch[1][0];// is this a close tag?

371 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );

372 }

373 $inBlockElem = !$closeMatch;

374 } elseif ( !$inBlockElem && !$this->inPre ) {

375if ( substr( $t, 0, 1 ) ==' '

376 && ( $this->lastParagraph ==='pre' || trim( $t ) !='' )

377 && !$inBlockquote

378 ) {

379 # pre

380if ( $this->lastParagraph !=='pre' ) {

381 $pendingPTag =false;

382 $output .= $this->closeParagraph() .'<pre>';

383 $this->lastParagraph ='pre';

384 }

385$t = substr( $t, 1 );

386 } elseif ( preg_match('/^(?:<style\\b[^>]*>.*?<\\/style>\s*|<link\\b[^>]*>\s*)+$/iS', $t ) ) {

387 # T186965: <style> or <link> by itself on a line shouldn't open or close paragraphs.

388 # But it should clear $pendingPTag.

389if ( $pendingPTag ) {

390 $output .= $this->closeParagraph();

391 $pendingPTag =false;

392 }

393 }else {

394 # paragraph

395if ( trim( $t ) ==='' ) {

396if ( $pendingPTag ) {

397 $output .= $pendingPTag .'<br />';

398 $pendingPTag =false;

399 $this->lastParagraph ='p';

400 } elseif ( $this->lastParagraph !=='p' ) {

401 $output .= $this->closeParagraph();

402 $pendingPTag ='<p>';

403 }else {

404 $pendingPTag ='</p><p>';

405 }

406 } elseif ( $pendingPTag ) {

407 $output .= $pendingPTag;

408 $pendingPTag =false;

409 $this->lastParagraph ='p';

410 } elseif ( $this->lastParagraph !=='p' ) {

411 $output .= $this->closeParagraph() .'<p>';

412 $this->lastParagraph ='p';

413 }

414 }

415 }

416 }

417 # somewhere above we forget to get out of pre block (T2785)

418if ( $preCloseMatch && $this->inPre ) {

419 $this->inPre =false;

420 }

421if ( $pendingPTag ===false ) {

422if ( $prefixLength === 0 ) {

423 $output .=$t;

424// Add a newline if there's an open paragraph

425// or we've yet to reach the last line.

426if ( $notLastLine || $this->hasOpenParagraph() ) {

427 $output .="\n";

428 }

429 }else {

430// Trim whitespace in list items

431 $output .= trim( $t );

432 }

433 }

434 }

435while ( $prefixLength ) {

436// @phan-suppress-next-line PhanTypeArraySuspicious $prefix set if $prefixLength is set

437 $output .= $this->closeList( $prefix2[$prefixLength - 1] );

438 --$prefixLength;

439// Note that a paragraph is only ever opened when `prefixLength`

440// is zero, but we'll choose to be overly cautious.

441if ( !$prefixLength && $this->hasOpenParagraph() ) {

442 $output .="\n";

443 }

444 }

445 $output .= $this->closeParagraph(true );

446return $output;

447 }

448

458privatefunction findColonNoLinks( $str, &$before, &$after ) {

459if ( !preg_match('/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) {

460 # Nothing to find!

461returnfalse;

462 }

463

464if ( $m[0][0] ===':' ) {

465 # Easy; no tag nesting to worry about

466 $colonPos = $m[0][1];

467 $before = substr( $str, 0, $colonPos );

468 $after = substr( $str, $colonPos + 1 );

469return $colonPos;

470 }

471

472 # Ugly state machine to walk through avoiding tags.

473 $state = self::COLON_STATE_TEXT;

474 $ltLevel = 0;

475 $lcLevel = 0;

476 $captureName =false;

477 $tagName ='';

478 $len = strlen( $str );

479for ( $i = $m[0][1]; $i < $len; $i++ ) {

480 $c = $str[$i];

481

482switch ( $state ) {

483case self::COLON_STATE_TEXT:

484switch ( $c ) {

485case"<":

486 # Could be either a <start> tag or an </end> tag

487 $state = self::COLON_STATE_TAGSTART;

488 $captureName =true;

489 $tagName ='';

490break;

491case":":

492if ( $ltLevel === 0 ) {

493 # We found it!

494 $before = substr( $str, 0, $i );

495 $after = substr( $str, $i + 1 );

496return $i;

497 }

498 # Embedded in a tag; don't break it.

499break;

500default:

501 # Skip ahead looking for something interesting

502if ( !preg_match('/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {

503 # Nothing else interesting

504returnfalse;

505 }

506if ( $m[0][0] ==='-{' ) {

507 $state = self::COLON_STATE_LC;

508 $lcLevel++;

509 $i = $m[0][1] + 1;

510 }else {

511 # Skip ahead to next interesting character.

512 $i = $m[0][1] - 1;

513 }

514break;

515 }

516break;

517case self::COLON_STATE_LC:

518 # In language converter markup -{ ... }-

519if ( !preg_match('/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) {

520 # Nothing else interesting to find; abort!

521 # We're nested in language converter markup, but there

522 # are no close tags left. Abort!

523break 2;

524 }

525if ( $m[0][0] ==='-{' ) {

526 $i = $m[0][1] + 1;

527 $lcLevel++;

528 } elseif ( $m[0][0] ==='}-' ) {

529 $i = $m[0][1] + 1;

530 $lcLevel--;

531if ( $lcLevel === 0 ) {

532 $state = self::COLON_STATE_TEXT;

533 }

534 }

535break;

536case self::COLON_STATE_TAG:

537 # In a <tag>

538switch ( $c ) {

539case" ":

540 $captureName =false;

541break;

542case">":

543if ( !isset( HTMLData::TAGS['void'][strtolower( $tagName )] ) ) {

544 $ltLevel++;

545 }

546 $state = self::COLON_STATE_TEXT;

547break;

548case"/":

549 # Slash may be followed by >?

550 $state = self::COLON_STATE_TAGSLASH;

551break;

552default:

553if ( $captureName ) {

554 $tagName .= $c;

555 }

556 # ignore

557 }

558break;

559case self::COLON_STATE_TAGSTART:

560switch ( $c ) {

561case"/":

562 $state = self::COLON_STATE_CLOSETAG;

563break;

564case"!":

565 $state = self::COLON_STATE_COMMENT;

566break;

567case">":

568 # Illegal early close? This shouldn't happen D:

569 $state = self::COLON_STATE_TEXT;

570break;

571default:

572if ( $captureName ) {

573 $tagName .= $c;

574 }

575 $state = self::COLON_STATE_TAG;

576 }

577break;

578case self::COLON_STATE_CLOSETAG:

579 # In a </tag>

580if ( $c ===">" ) {

581if ( $ltLevel > 0 ) {

582 $ltLevel--;

583 }else {

584 # ignore the excess close tag, but keep looking for

585 # colons. (This matches Parsoid behavior.)

586wfDebug( __METHOD__ .": Invalid input; too many close tags" );

587 }

588 $state = self::COLON_STATE_TEXT;

589 }

590break;

591case self::COLON_STATE_TAGSLASH:

592if ( $c ===">" ) {

593 # Yes, a self-closed tag <blah/>

594 $state = self::COLON_STATE_TEXT;

595 }else {

596 # Probably we're jumping the gun, and this is an attribute

597 $state = self::COLON_STATE_TAG;

598 }

599break;

600case self::COLON_STATE_COMMENT:

601if ( $c ==="-" ) {

602 $state = self::COLON_STATE_COMMENTDASH;

603 }

604break;

605case self::COLON_STATE_COMMENTDASH:

606if ( $c ==="-" ) {

607 $state = self::COLON_STATE_COMMENTDASHDASH;

608 }else {

609 $state = self::COLON_STATE_COMMENT;

610 }

611break;

612case self::COLON_STATE_COMMENTDASHDASH:

613if ( $c ===">" ) {

614 $state = self::COLON_STATE_TEXT;

615 }else {

616 $state = self::COLON_STATE_COMMENT;

617 }

618break;

619default:

620thrownew LogicException("State machine error in " . __METHOD__ );

621 }

622 }

623if ( $ltLevel > 0 || $lcLevel > 0 ) {

624wfDebug(

625 __METHOD__ .": Invalid input; not enough close tags " .

626"(level $ltLevel/$lcLevel, state $state)"

627 );

628 }

629returnfalse;

630 }

631}

632

634class_alias( BlockLevelPass::class,'BlockLevelPass' );

wfDebug

wfDebug( $text, $dest='all', array $context=[])

Sends a line to the debug log if enabled or, optionally, to a comment in output.

DefinitionGlobalFunctions.php:671

MediaWiki\Parser\BlockLevelPass

DefinitionBlockLevelPass.php:33

MediaWiki\Parser\BlockLevelPass\doBlockLevels

static doBlockLevels( $text, $lineStart)

Make lists from lines starting with ':', '*', '#', etc.

DefinitionBlockLevelPass.php:64

Wikimedia\StringUtils\StringUtils

A collection of static methods to play with strings.

DefinitionStringUtils.php:35

MediaWiki\Parser\CoreParserFunctions\null\$t

DefinitionCoreParserFunctions.php:713

MediaWiki\Parser

DefinitionBlockLevelPass.php:27