@@ -71,20 +71,20 @@ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_
7171
7272switch (flags ) {
7373case DATA_STATE :
74- hs -> state = h5_state_data ;
75- break ;
74+ hs -> state = h5_state_data ;
75+ break ;
7676case VALUE_NO_QUOTE :
77- hs -> state = h5_state_before_attribute_name ;
78- break ;
77+ hs -> state = h5_state_before_attribute_name ;
78+ break ;
7979case VALUE_SINGLE_QUOTE :
80- hs -> state = h5_state_attribute_value_single_quote ;
81- break ;
80+ hs -> state = h5_state_attribute_value_single_quote ;
81+ break ;
8282case VALUE_DOUBLE_QUOTE :
83- hs -> state = h5_state_attribute_value_double_quote ;
84- break ;
83+ hs -> state = h5_state_attribute_value_double_quote ;
84+ break ;
8585case VALUE_BACK_QUOTE :
86- hs -> state = h5_state_attribute_value_back_quote ;
87- break ;
86+ hs -> state = h5_state_attribute_value_back_quote ;
87+ break ;
8888 }
8989}
9090
@@ -100,10 +100,18 @@ int libinjection_h5_next(h5_state_t* hs)
100100/**
101101 * Everything below here is private
102102 *
103- */
103+ */
104+
104105
105106static int h5_is_white (char ch )
106107{
108+ /*
109+ * \t = horizontal tab = 0x09
110+ * \n = newline = 0x0A
111+ * \v = vertical tab = 0x0B
112+ * \f = form feed = 0x0C
113+ * \r = cr = 0x0D
114+ */
107115return strchr (" \t\n\v\f\r" ,ch )!= NULL ;
108116}
109117
@@ -112,19 +120,19 @@ static int h5_skip_white(h5_state_t* hs)
112120char ch ;
113121while (hs -> pos < hs -> len ) {
114122ch = hs -> s [hs -> pos ];
115- switch (ch ) {
116- case 0x00 :/* IE only */
117- case 0x20 :
118- case 0x09 :
119- case 0x0A :
120- case 0x0B :/* IE only */
121- case 0x0C :
123+ switch (ch ) {
124+ case 0x00 :/* IE only */
125+ case 0x20 :
126+ case 0x09 :
127+ case 0x0A :
128+ case 0x0B :/* IE only */
129+ case 0x0C :
122130case 0x0D :/* IE only */
123131hs -> pos += 1 ;
124- break ;
125- default :
132+ break ;
133+ default :
126134return ch ;
127- }
135+ }
128136 }
129137return CHAR_EOF ;
130138}
@@ -259,12 +267,12 @@ static int h5_state_tag_name(h5_state_t* hs)
259267pos = hs -> pos ;
260268while (pos < hs -> len ) {
261269ch = hs -> s [pos ];
262- if (ch == 0 ) {
263- /* special non-standard case */
264- /* allow nulls in tag name */
265- /* some old browsers apparently allow and ignore them */
266- pos += 1 ;
267- }else if (h5_is_white (ch )) {
270+ if (ch == 0 ) {
271+ /* special non-standard case */
272+ /* allow nulls in tag name */
273+ /* some old browsers apparently allow and ignore them */
274+ pos += 1 ;
275+ }else if (h5_is_white (ch )) {
268276hs -> token_start = hs -> s + hs -> pos ;
269277hs -> token_len = pos - hs -> pos ;
270278hs -> token_type = TAG_NAME_OPEN ;
@@ -332,7 +340,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
332340default : {
333341return h5_state_attribute_name (hs );
334342 }
335- }
343+ }
336344}
337345
338346static int h5_state_attribute_name (h5_state_t * hs )
@@ -450,12 +458,12 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
450458TRACE ();
451459
452460/* skip initial quote in normal case.
453- *dont do thisis pos == 0 since it means we have started
461+ *don't do this"if ( pos == 0)" since it means we have started
454462 * in a non-data state. given an input of '><foo
455463 * we want to make 0-length attribute name
456464 */
457465if (hs -> pos > 0 ) {
458- hs -> pos += 1 ;
466+ hs -> pos += 1 ;
459467 }
460468
461469
@@ -705,10 +713,13 @@ static int h5_state_comment(h5_state_t* hs)
705713char ch ;
706714const char * idx ;
707715size_t pos ;
716+ size_t offset ;
717+ const char * end = hs -> s + hs -> len ;
708718
709719TRACE ();
710720pos = hs -> pos ;
711721while (1 ) {
722+
712723idx = (const char * )memchr (hs -> s + pos ,CHAR_DASH ,hs -> len - pos );
713724
714725/* did not find anything or has less than 3 chars left */
@@ -719,21 +730,62 @@ static int h5_state_comment(h5_state_t* hs)
719730hs -> token_type = TAG_COMMENT ;
720731return 1 ;
721732 }
722- ch = * (idx + 1 );
733+ offset = 1 ;
734+
735+ /* skip all nulls */
736+ while (idx + offset < end && * (idx + offset )== 0 ) {
737+ offset += 1 ;
738+ }
739+ if (idx + offset == end ) {
740+ hs -> state = h5_state_eof ;
741+ hs -> token_start = hs -> s + hs -> pos ;
742+ hs -> token_len = hs -> len - hs -> pos ;
743+ hs -> token_type = TAG_COMMENT ;
744+ return 1 ;
745+ }
746+
747+ ch = * (idx + offset );
723748if (ch != CHAR_DASH && ch != CHAR_BANG ) {
724749pos = (size_t )(idx - hs -> s )+ 1 ;
725750continue ;
726751 }
727- ch = * (idx + 2 );
752+
753+ /* need to test */
754+ #if 0
755+ /* skip all nulls */
756+ while (idx + offset < end && * (idx + offset )== 0 ) {
757+ offset += 1 ;
758+ }
759+ if (idx + offset == end ) {
760+ hs -> state = h5_state_eof ;
761+ hs -> token_start = hs -> s + hs -> pos ;
762+ hs -> token_len = hs -> len - hs -> pos ;
763+ hs -> token_type = TAG_COMMENT ;
764+ return 1 ;
765+ }
766+ #endif
767+
768+ offset += 1 ;
769+ if (idx + offset == end ) {
770+ hs -> state = h5_state_eof ;
771+ hs -> token_start = hs -> s + hs -> pos ;
772+ hs -> token_len = hs -> len - hs -> pos ;
773+ hs -> token_type = TAG_COMMENT ;
774+ return 1 ;
775+ }
776+
777+
778+ ch = * (idx + offset );
728779if (ch != CHAR_GT ) {
729780pos = (size_t )(idx - hs -> s )+ 1 ;
730781continue ;
731782 }
783+ offset += 1 ;
732784
733785/* ends in --> or -!> */
734786hs -> token_start = hs -> s + hs -> pos ;
735787hs -> token_len = (size_t )(idx - hs -> s )- hs -> pos ;
736- hs -> pos = (size_t )(idx - hs -> s )+ 3 ;
788+ hs -> pos = (size_t )(idx + offset - hs -> s );
737789hs -> state = h5_state_data ;
738790hs -> token_type = TAG_COMMENT ;
739791return 1 ;