1212
1313
1414#define CHAR_EOF -1
15- #define CHAR_NULL 0
1615#define CHAR_BANG 33
1716#define CHAR_DOUBLE 34
1817#define CHAR_PERCENT 37
2423#define CHAR_GT 62
2524#define CHAR_QUESTION 63
2625#define CHAR_RIGHTB 93
27- #define CHAR_TICK 96
2826
2927/* prototypes */
3028
@@ -43,7 +41,6 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
4341static int h5_state_before_attribute_value (h5_state_t * hs );
4442static int h5_state_attribute_value_double_quote (h5_state_t * hs );
4543static int h5_state_attribute_value_single_quote (h5_state_t * hs );
46- static int h5_state_attribute_value_back_quote (h5_state_t * hs );
4744static int h5_state_attribute_value_no_quote (h5_state_t * hs );
4845static int h5_state_after_attribute_value_quoted_state (h5_state_t * hs );
4946static int h5_state_comment (h5_state_t * hs );
@@ -63,28 +60,16 @@ static int h5_state_doctype(h5_state_t* hs);
6360/**
6461 * public function
6562 */
66- void libinjection_h5_init (h5_state_t * hs ,const char * s ,size_t len ,enum html5_flags flags )
63+ void libinjection_h5_init (h5_state_t * hs ,const char * s ,size_t len ,int flags )
6764{
6865memset (hs ,0 ,sizeof (h5_state_t ));
6966hs -> s = s ;
7067hs -> len = len ;
71-
72- switch (flags ) {
73- case DATA_STATE :
68+ hs -> state = h5_state_data ;
69+ if (flags == 0 ) {
7470hs -> state = h5_state_data ;
75- break ;
76- case VALUE_NO_QUOTE :
77- hs -> state = h5_state_before_attribute_name ;
78- break ;
79- case VALUE_SINGLE_QUOTE :
80- hs -> state = h5_state_attribute_value_single_quote ;
81- break ;
82- case VALUE_DOUBLE_QUOTE :
83- hs -> state = h5_state_attribute_value_double_quote ;
84- break ;
85- case VALUE_BACK_QUOTE :
86- hs -> state = h5_state_attribute_value_back_quote ;
87- break ;
71+ }else {
72+ assert (0 );
8873 }
8974}
9075
@@ -100,18 +85,10 @@ int libinjection_h5_next(h5_state_t* hs)
10085/**
10186 * Everything below here is private
10287 *
103- */
104-
88+ */
10589
10690static int h5_is_white (char ch )
10791{
108- /*
109- * \t = horizontal tab = 0x09
110- * \n = newline = 0x0A
111- * \v = vertical tab = 0x0B
112- * \f = form feed = 0x0C
113- * \r = cr = 0x0D
114- */
11592return strchr (" \t\n\v\f\r" ,ch )!= NULL ;
11693}
11794
@@ -120,17 +97,9 @@ static int h5_skip_white(h5_state_t* hs)
12097char ch ;
12198while (hs -> pos < hs -> len ) {
12299ch = hs -> s [hs -> pos ];
123- switch (ch ) {
124- case 0x00 :/* IE only */
125- case 0x20 :
126- case 0x09 :
127- case 0x0A :
128- case 0x0B :/* IE only */
129- case 0x0C :
130- case 0x0D :/* IE only */
100+ if (ch == ' ' ) {
131101hs -> pos += 1 ;
132- break ;
133- default :
102+ }else {
134103return ch ;
135104 }
136105 }
@@ -198,9 +167,6 @@ static int h5_state_tag_open(h5_state_t* hs)
198167return h5_state_bogus_comment2 (hs );
199168 }else if ((ch >='a' && ch <='z' )|| (ch >='A' && ch <='Z' )) {
200169return h5_state_tag_name (hs );
201- }else if (ch == CHAR_NULL ) {
202- /* IE-ism NULL characters are ignored */
203- return h5_state_tag_name (hs );
204170 }else {
205171/* user input mistake in configuring state */
206172if (hs -> pos == 0 ) {
@@ -231,9 +197,7 @@ static int h5_state_end_tag_open(h5_state_t* hs)
231197 }else if ((ch >='a' && ch <='z' )|| (ch >='A' && ch <='Z' )) {
232198return h5_state_tag_name (hs );
233199 }
234-
235- hs -> is_close = 0 ;
236- return h5_state_bogus_comment (hs );
200+ return h5_state_data (hs );
237201}
238202/*
239203 *
@@ -267,12 +231,7 @@ static int h5_state_tag_name(h5_state_t* hs)
267231pos = hs -> pos ;
268232while (pos < hs -> len ) {
269233ch = hs -> s [pos ];
270- if (ch == 0 ) {
271- /* special non-standard case */
272- /* allow nulls in tag name */
273- /* some old browsers apparently allow and ignore them */
274- pos += 1 ;
275- }else if (h5_is_white (ch )) {
234+ if (h5_is_white (ch )) {
276235hs -> token_start = hs -> s + hs -> pos ;
277236hs -> token_len = pos - hs -> pos ;
278237hs -> token_type = TAG_NAME_OPEN ;
@@ -340,7 +299,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
340299default : {
341300return h5_state_attribute_name (hs );
342301 }
343- }
302+ }
344303}
345304
346305static int h5_state_attribute_name (h5_state_t * hs )
@@ -349,7 +308,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
349308size_t pos ;
350309
351310TRACE ();
352- pos = hs -> pos + 1 ;
311+ pos = hs -> pos ;
353312while (pos < hs -> len ) {
354313ch = hs -> s [pos ];
355314if (h5_is_white (ch )) {
@@ -399,19 +358,21 @@ static int h5_state_attribute_name(h5_state_t* hs)
399358static int h5_state_after_attribute_name (h5_state_t * hs )
400359{
401360int c ;
361+ size_t pos ;
402362
403363TRACE ();
364+ pos = hs -> pos ;
404365c = h5_skip_white (hs );
405366switch (c ) {
406367case CHAR_EOF : {
407368return 0 ;
408369 }
409370case CHAR_SLASH : {
410- hs -> pos += 1 ;
371+ hs -> pos = pos + 1 ;
411372return h5_state_self_closing_start_tag (hs );
412373 }
413374case CHAR_EQUALS : {
414- hs -> pos += 1 ;
375+ hs -> pos = pos + 1 ;
415376return h5_state_before_attribute_value (hs );
416377 }
417378case CHAR_GT : {
@@ -442,9 +403,6 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
442403return h5_state_attribute_value_double_quote (hs );
443404 }else if (c == CHAR_SINGLE ) {
444405return h5_state_attribute_value_single_quote (hs );
445- }else if (c == CHAR_TICK ) {
446- /* NON STANDARD IE */
447- return h5_state_attribute_value_back_quote (hs );
448406 }else {
449407return h5_state_attribute_value_no_quote (hs );
450408 }
@@ -457,16 +415,8 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
457415
458416TRACE ();
459417
460- /* skip initial quote in normal case.
461- * don't do this "if (pos == 0)" since it means we have started
462- * in a non-data state. given an input of '><foo
463- * we want to make 0-length attribute name
464- */
465- if (hs -> pos > 0 ) {
466- hs -> pos += 1 ;
467- }
468-
469-
418+ /* skip quote */
419+ hs -> pos += 1 ;
470420idx = (const char * )memchr (hs -> s + hs -> pos ,qchar ,hs -> len - hs -> pos );
471421if (idx == NULL ) {
472422hs -> token_start = hs -> s + hs -> pos ;
@@ -497,13 +447,6 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
497447return h5_state_attribute_value_quote (hs ,CHAR_SINGLE );
498448}
499449
500- static
501- int h5_state_attribute_value_back_quote (h5_state_t * hs )
502- {
503- TRACE ();
504- return h5_state_attribute_value_quote (hs ,CHAR_TICK );
505- }
506-
507450static int h5_state_attribute_value_no_quote (h5_state_t * hs )
508451{
509452char ch ;
@@ -713,13 +656,10 @@ static int h5_state_comment(h5_state_t* hs)
713656char ch ;
714657const char * idx ;
715658size_t pos ;
716- size_t offset ;
717- const char * end = hs -> s + hs -> len ;
718659
719660TRACE ();
720661pos = hs -> pos ;
721662while (1 ) {
722-
723663idx = (const char * )memchr (hs -> s + pos ,CHAR_DASH ,hs -> len - pos );
724664
725665/* did not find anything or has less than 3 chars left */
@@ -730,62 +670,21 @@ static int h5_state_comment(h5_state_t* hs)
730670hs -> token_type = TAG_COMMENT ;
731671return 1 ;
732672 }
733- offset = 1 ;
734-
735- /* skip all nulls */
736- while (idx + offset < end && * (idx + offset )== 0 ) {
737- offset += 1 ;
738- }
739- if (idx + offset == end ) {
740- hs -> state = h5_state_eof ;
741- hs -> token_start = hs -> s + hs -> pos ;
742- hs -> token_len = hs -> len - hs -> pos ;
743- hs -> token_type = TAG_COMMENT ;
744- return 1 ;
745- }
746-
747- ch = * (idx + offset );
673+ ch = * (idx + 1 );
748674if (ch != CHAR_DASH && ch != CHAR_BANG ) {
749675pos = (size_t )(idx - hs -> s )+ 1 ;
750676continue ;
751677 }
752-
753- /* need to test */
754- #if 0
755- /* skip all nulls */
756- while (idx + offset < end && * (idx + offset )== 0 ) {
757- offset += 1 ;
758- }
759- if (idx + offset == end ) {
760- hs -> state = h5_state_eof ;
761- hs -> token_start = hs -> s + hs -> pos ;
762- hs -> token_len = hs -> len - hs -> pos ;
763- hs -> token_type = TAG_COMMENT ;
764- return 1 ;
765- }
766- #endif
767-
768- offset += 1 ;
769- if (idx + offset == end ) {
770- hs -> state = h5_state_eof ;
771- hs -> token_start = hs -> s + hs -> pos ;
772- hs -> token_len = hs -> len - hs -> pos ;
773- hs -> token_type = TAG_COMMENT ;
774- return 1 ;
775- }
776-
777-
778- ch = * (idx + offset );
678+ ch = * (idx + 2 );
779679if (ch != CHAR_GT ) {
780680pos = (size_t )(idx - hs -> s )+ 1 ;
781681continue ;
782682 }
783- offset += 1 ;
784683
785684/* ends in --> or -!> */
786685hs -> token_start = hs -> s + hs -> pos ;
787686hs -> token_len = (size_t )(idx - hs -> s )- hs -> pos ;
788- hs -> pos = (size_t )(idx + offset - hs -> s );
687+ hs -> pos = (size_t )(idx - hs -> s )+ 3 ;
789688hs -> state = h5_state_data ;
790689hs -> token_type = TAG_COMMENT ;
791690return 1 ;