Module:Wikitext Parsing

Edit links

require("strict")--Helper functionslocalfunctionstartswith(text,subtext)returnstring.sub(text,1,#subtext)==subtextendlocalfunctionendswith(text,subtext)returnstring.sub(text,-#subtext,-1)==subtextendlocalfunctionallcases(s)returns:gsub("%a",function(c)return"["..c:upper()..c:lower().."]"end)endlocaltrimcache={}localwhitespace={[" "]=1,["\n"]=1,["\t"]=1,["\r"]=1}localfunctioncheaptrim(str)--mw.text.trim is surprisingly expensive, so here's an alternative approachlocalquick=trimcache[str]ifquickthenreturnquickelse-- local out = string.gsub(str, "^%s*(.-)%s*$", "%1")locallowEndlocalstrlen=#strfori=1,strlendoifnotwhitespace[string.sub(str,i,i)]thenlowEnd=ibreakendendifnotlowEndthentrimcache[str]=""return""endfori=strlen,1,-1doifnotwhitespace[string.sub(str,i,i)]thenlocalout=string.sub(str,lowEnd,i)trimcache[str]=outreturnoutendendendend--[=[ Implementation notes---- NORMAL HTML TAGS ----Tags are very strict on how they want to start, but loose on how they end.The start must strictly follow <[tAgNaMe](%s|>) with no room for whitespace inthe tag's name, but may then flow as they want afterwards, making<div\nclass\n=\n"\nerror\n"\n> validThere's no sense of escaping < or >E.g. <div class="error\>"> will end at \> despite it being inside a quote <div class="<span class="error">error</span>"> will not process the larger divIf a tag has no end, it will consume all text instead of not processing---- NOPROCESSING TAGS (nowiki, pre, syntaxhighlight, source, etc.) ----(In most comments, <source> will not be mentioned. This is because it is thedeprecated version of <syntaxhighlight>)No-Processing tags have some interesting differences to the above rules.For example, their syntax is a lot stricter. While an opening tag appears tofollow the same set of rules, A closing tag can't have any sort of extraformatting period. While </div a/a> is valid, </nowiki a/a> isn't - onlynewlines and spaces/tabs are allowed in closing tags.Note that, even though <pre> tags cause a visual change when the ending tag hasextra formatting, it won't cause the no-processing effects. For some reason, theformat must be strict for that to apply.Both the content inside the tag pair and the content inside each side of thepair is not processed. E.g. <nowiki |}}>|}}</nowiki> would have both of the |}}escaped in practice.When something in the code is referenced to as a "Nowiki Tag", it means a tagwhich causes wiki text to not be processed, which includes <nowiki>, <pre>,and <syntaxhighlight>Since we only care about these tags, we can ignore the idea of an interceptingtag preventing processing, and just go straight for the first ending we can findIf there is no ending to find, the tag will NOT consume the rest of the text interms of processing behaviour (though <pre> will appear to have an effect).Even if there is no end of the tag, the content inside the opening half willstill be unprocessed, meaning {{X20|<nowiki }}>}} wouldn't end at the first }}despite there being no ending to the tag.Note that there are some tags, like <math>, which also function like <nowiki>which are included in this aswell. Some other tags, like <ref>, have far toounpredictable behaviour to be handled currently (they'd have to be split andprocessed as something seperate - its complicated, but maybe not impossible.)I suspect that every tag listed in [[Special:Version]] may behave somewhat likethis, but that's far too many cases worth checking for rarely used tags that maynot even have a good reason to contain {{ or }} anyways, so we leave them alone.---- HTML COMMENTS AND INCLUDEONLY ----HTML Comments are about as basic as it could get for thisStart at , no extra conditions. Simple enoughIf a comment has no end, it will eat all text instead of not being processedincludeonly tags function mostly like a regular nowiki tag, with the exceptionthat the tag will actually consume all future text if not given an ending asopposed to simply giving up and not changing anything. Due to complications andthe fact that this is far less likely to be present on a page, aswell as beingsomething that may not want to be escaped, includeonly tags are ignored duringour processing--]=]localvalidtags={nowiki=1,pre=1,syntaxhighlight=1,source=1,math=1}--This function expects the string to start with the taglocalfunctionTestForNowikiTag(text,scanPosition)localtagName=(string.match(text,"^<([^\n />]+)",scanPosition)or""):lower()ifnotvalidtags[tagName]thenreturnnilendlocalnextOpener=string.find(text,"<",scanPosition+1)or-1localnextCloser=string.find(text,">",scanPosition+1)or-1ifnextCloser>-1and(nextOpener==-1ornextCloser<nextOpener)thenlocalstartingTag=string.sub(text,scanPosition,nextCloser)--We have our starting tag (E.g. '<pre style="color:red">')--Now find our ending...ifendswith(startingTag,"/>")then--self-closing tag (we are our own ending)return{Tag=tagName,Start=startingTag,Content="",End="",Length=#startingTag}elselocalendingTagStart,endingTagEnd=string.find(text,"</"..allcases(tagName).."[\t\n]*>",scanPosition)ifendingTagStartthen--Regular tag formationlocalendingTag=string.sub(text,endingTagStart,endingTagEnd)localtagContent=string.sub(text,nextCloser+1,endingTagStart-1)return{Tag=tagName,Start=startingTag,Content=tagContent,End=endingTag,Length=#startingTag+#tagContent+#endingTag}else--Content inside still needs escaping (also linter error!)return{Tag=tagName,Start=startingTag,Content="",End="",Length=#startingTag}endendendreturnnilendlocalfunctionTestForComment(text,scanPosition)--Like TestForNowikiTag but for ifstring.match(text,"^<!%-%-",scanPosition)thenlocalcommentEnd=string.find(text,"-->",scanPosition+4,true)ifcommentEndthenreturn{Start="",Content=string.sub(text,scanPosition+4,commentEnd-1),Length=commentEnd-scanPosition+3}else--Consumes all text if not given an endingreturn{Start="Should see|Shouldn't see}}]=]local out = p.PrepareText(s)mw.logObject(out)local s = [=[BA]=]local out = p.TestForComment(s, 2)mw.logObject(out); mw.log(string.sub(s, 2, out.Length))local a = p.ParseTemplates([=[{{User:Aidan9382/templates/dummy|A|B|C {{{A|B}}} { } } {|<nowiki>D</nowiki>|<pre>E|F</pre>|G|=|a=|A = [[{{PAGENAME}}|A=B]]{{Text|1==<nowiki>}}</nowiki>}}|A B=Success}}]=])mw.logObject(a)]==]

Movatterモバイル変換

PrepareText

ParseTemplates