@@ -124,11 +124,10 @@ class BaseParser
124124}
125125
126126module Private
127- INSTRUCTION_END = /#{ NAME } (\s +.*?)?\? >/um
128127TAG_PATTERN = /((?>#{ QNAME_STR } ))\s */um
129128CLOSE_PATTERN = /(#{ QNAME_STR } )\s *>/um
130129ATTLISTDECL_END = /\s +#{ NAME } (?:#{ ATTDEF } )*\s *>/um
131- NAME_PATTERN = /\s * #{ NAME } /um
130+ NAME_PATTERN = /#{ NAME } /um
132131GEDECL_PATTERN = "\\ s+#{ NAME } \\ s+#{ ENTITYDEF } \\ s*>"
133132PEDECL_PATTERN = "\\ s+(%)\\ s+#{ NAME } \\ s+#{ PEDEF } \\ s*>"
134133ENTITYDECL_PATTERN = /(?:#{ GEDECL_PATTERN } )|(?:#{ PEDECL_PATTERN } )/um
@@ -242,7 +241,7 @@ def pull_event
242241if @document_status ==nil
243242start_position = @source . position
244243if @source . match ( "<?" , true )
245- return process_instruction ( start_position )
244+ return process_instruction
246245elsif @source . match ( "<!" , true )
247246if @source . match ( "--" , true )
248247md = @source . match ( /(.*?)-->/um , true )
@@ -442,7 +441,7 @@ def pull_event
442441raise REXML ::ParseException . new ( "Declarations can only occur " +
443442"in the doctype declaration." , @source )
444443elsif @source . match ( "?" , true )
445- return process_instruction ( start_position )
444+ return process_instruction
446445else
447446# Get the next tag
448447md = @source . match ( Private ::TAG_PATTERN , true )
@@ -588,14 +587,14 @@ def need_source_encoding_update?(xml_declaration_encoding)
588587def parse_name ( base_error_message )
589588md = @source . match ( Private ::NAME_PATTERN , true )
590589unless md
591- if @source . match ( /\s * \ S /um )
590+ if @source . match ( /\S /um )
592591message = "#{ base_error_message } : invalid name"
593592else
594593message = "#{ base_error_message } : name is missing"
595594end
596595raise REXML ::ParseException . new ( message , @source )
597596end
598- md [ 1 ]
597+ md [ 0 ]
599598end
600599
601600def parse_id ( base_error_message ,
@@ -664,18 +663,24 @@ def parse_id_invalid_details(accept_external_id:,
664663end
665664end
666665
667- def process_instruction ( start_position )
668- match_data = @source . match ( Private ::INSTRUCTION_END , true )
669- unless match_data
670- message = "Invalid processing instruction node"
671- @source . position = start_position
672- raise REXML ::ParseException . new ( message , @source )
666+ def process_instruction
667+ name = parse_name ( "Malformed XML: Invalid processing instruction node" )
668+ if @source . match ( /\s +/um , true )
669+ match_data = @source . match ( /(.*?)\? >/um , true )
670+ unless match_data
671+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
672+ end
673+ content = match_data [ 1 ]
674+ else
675+ content = nil
676+ unless @source . match ( "?>" , true )
677+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
678+ end
673679end
674- if match_data [ 1 ] =="xml"
680+ if name =="xml"
675681if @document_status
676682raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
677683end
678- content = match_data [ 2 ]
679684version = VERSION . match ( content )
680685version = version [ 1 ] unless version . nil?
681686encoding = ENCODING . match ( content )
@@ -690,7 +695,7 @@ def process_instruction(start_position)
690695standalone = standalone [ 1 ] unless standalone . nil?
691696return [ :xmldecl , version , encoding , standalone ]
692697end
693- [ :processing_instruction , match_data [ 1 ] , match_data [ 2 ] ]
698+ [ :processing_instruction , name , content ]
694699end
695700
696701def parse_attributes ( prefixes , curr_ns )