64publicfunctionadd( $name ): void {
65 $this->names[] = $name;
66 $this->hash = $this->baseRegex = $this->regex =
null;
75if ( $this->hash === null ) {
76 $this->hash = [ 0 => [], 1 => [] ];
77foreach ( $this->names as $name ) {
78 $magic = $this->factory->get( $name );
79 $case = intval( $magic->isCaseSensitive() );
80foreach ( $magic->getSynonyms() as $syn ) {
82 $syn = $this->factory->getContentLanguage()->lc( $syn );
84 $this->hash[$case][$syn] = $name;
102publicfunctiongetBaseRegex(
bool $capture =
true,
string $delimiter =
'/' ): array {
103if ( $capture && $delimiter ===
'/' && $this->baseRegex !== null ) {
104return $this->baseRegex;
106 $regex = [ 0 => [], 1 => [] ];
107foreach ( $this->names as $name ) {
108 $magic = $this->factory->get( $name );
109 $case = $magic->isCaseSensitive() ? 1 : 0;
110foreach ( $magic->getSynonyms() as $i => $syn ) {
112// Group name must start with a non-digit in PCRE 8.34+ 113 $it = strtr( $i,
'0123456789',
'abcdefghij' );
114 $groupName = $it .
'_' . $name;
115 $group =
'(?P<' . $groupName .
'>' . preg_quote( $syn, $delimiter ) .
')';
116 $regex[$case][] = $group;
118 $regex[$case][] = preg_quote( $syn, $delimiter );
122'@phan-var array<int,string[]> $regex';
123foreach ( $regex as $case => &$re ) {
124 $re = count( $re ) ? implode(
'|', $re ) :
'(?!)';
129'@phan-var array<int,string> $regex';
131if ( $capture && $delimiter ===
'/' ) {
132 $this->baseRegex = $regex;
142privatefunction getRegex(): array {
143if ( $this->regex === null ) {
145 $base = $this->getBaseRegex(
true,
'/' );
146foreach ( $base as $case => $re ) {
147 $this->regex[$case] =
"/$re/JS";
149// As a performance optimization, turn on unicode mode only for 150// case-insensitive matching. 151 $this->regex[0] .=
'u';
161privatefunction getRegexStart(): array {
163 $base = $this->getBaseRegex(
true,
'/' );
164foreach ( $base as $case => $re ) {
165 $newRegex[$case] =
"/^(?:$re)/JS";
167// As a performance optimization, turn on unicode mode only for 168// case-insensitive matching. 178privatefunction getVariableStartToEndRegex(): array {
180 $base = $this->getBaseRegex(
true,
'/' );
181foreach ( $base as $case => $re ) {
182 $newRegex[$case] = str_replace(
'\$1',
'(.*?)',
"/^(?:$re)$/JS" );
184// As a performance optimization, turn on unicode mode only for 185// case-insensitive matching. 205privatefunction parseMatch( array
$matches ): array {
207foreach (
$matches as $key => $match ) {
208if ( $magicName !==
null ) {
209// The structure we found at this point is [ …, 210// 'a_magicWordName' => 'matchedSynonym', 211// n => 'matchedSynonym (again)', 212// n + 1 => 'parameterValue', 214return [ $magicName,
$matches[$key + 1] ?? false ];
216// Skip the initial full match and any non-matching group 217if ( $match !==
'' && $key !== 0 ) {
218 $parts = explode(
'_', $key, 2 );
219if ( !isset( $parts[1] ) ) {
220thrownew LogicException(
'Unexpected group name' );
222 $magicName = $parts[1];
225thrownew LogicException(
'Unexpected $m array with no match' );
236 $regexes = $this->getVariableStartToEndRegex();
237foreach ( $regexes as $regex ) {
239if ( preg_match( $regex, $text, $m ) ) {
240return $this->parseMatch( $m );
243return [
false, false ];
254 $hash = $this->getHash();
255if ( isset( $hash[1][$text] ) ) {
256return $hash[1][$text];
258 $lc = $this->factory->getContentLanguage()->lc( $text );
259return $hash[0][$lc] ??
false;
274 $regexes = $this->getRegex();
275 $res = preg_replace_callback( $regexes,
function ( $m ) use ( &$found ) {
276 [ $name, $param ] = $this->parseMatch( $m );
277 $found[$name] = $param;
280// T321234: Don't try to fix old revisions with broken UTF-8, just return $text as is 282 $error = preg_last_error();
283 $errorText = preg_last_error_msg();
284 LoggerFactory::getInstance(
'parser' )->warning(
'preg_match_all error: {code} {errorText}', [
288'errorText' => $errorText
290if ( $error !== PREG_BAD_UTF8_ERROR ) {
291thrownew LogicException(
"preg_match_all error $error: $errorText" );
310 $regexes = $this->getRegexStart();
311foreach ( $regexes as $regex ) {
312if ( preg_match( $regex, $text, $m ) ) {
313 [ $id, ] = $this->parseMatch( $m );
314if ( strlen( $m[0] ) >= strlen( $text ) ) {
317 $text = substr( $text, strlen( $m[0] ) );
if(!defined('MW_SETUP_CALLBACK'))
Create PSR-3 logger objects.
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Class for handling an array of magic words.
matchVariableStartToEnd( $text)
Match some text, with parameter capture.
__construct( $names=[], ?MagicWordFactory $factory=null)
matchAndRemove(&$text)
Return an associative array for all items that match.
matchStartAndRemove(&$text)
Return the ID of the magic word at the start of $text, and remove the prefix from $text.
getBaseRegex(bool $capture=true, string $delimiter='/')
Get the base regex.
matchStartToEnd( $text)
Match some text, without parameter capture.
add( $name)
Add a magic word by name.
getHash()
Get a 2-d hashtable for this array.
Store information about magic words, and create/cache MagicWord objects.