7171WEND ,
7272BOL ,
7373EOL ,
74+ // Other (special) instructions
75+ SAVE ,
7476// Instructions which take relative offset as arg
7577JMP ,
7678SPLIT ,
7779RSPLIT ,
78- // Other (special) instructions
79- SAVE ,
8080};
8181
8282// Return codes for re_sizecode() and re_comp()
@@ -129,15 +129,11 @@ void re_dumpcode(rcode *prog)
129129printf ("%4d: " ,pc );i ++ ;
130130switch (code [pc ++ ]) {
131131default :
132- pc = prog -> unilen ;
133- break ;
134- case SPLIT :
135- printf ("split %d (%d) #%d\n" ,pc + code [pc ]+ 2 ,code [pc ],code [pc + 1 ]);
136- pc += 2 ;
137- break ;
138- case RSPLIT :
139- printf ("rsplit %d (%d) #%d\n" ,pc + code [pc ]+ 2 ,code [pc ],code [pc + 1 ]);
140- pc += 2 ;
132+ if (code [pc - 1 ]< 0 )
133+ printf ("rsplit %d (%d) #%d\n" ,pc + code [pc ]+ 1 ,code [pc ],code [pc - 1 ]);
134+ else
135+ printf ("split %d (%d) #%d\n" ,pc + code [pc ]+ 1 ,code [pc ],code [pc - 1 ]);
136+ pc ++ ;
141137break ;
142138case JMP :
143139printf ("jmp %d (%d)\n" ,pc + code [pc ]+ 1 ,code [pc ]);
@@ -278,8 +274,8 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
278274re ++ ;
279275if (* re == '}' ) {
280276EMIT (PC ,RSPLIT );
281- EMIT (PC + 1 ,REL (PC ,PC - size - 1 ));
282- PC += 3 ;
277+ EMIT (PC + 1 ,REL (PC ,PC - size ));
278+ PC += 2 ;
283279maxcnt = mincnt ;
284280}
285281while (isdigit ((unsignedchar )* re ))
@@ -293,27 +289,26 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
293289}
294290for (i = maxcnt - mincnt ;i > 0 ;i -- ) {
295291EMIT (PC ++ ,SPLIT );
296- EMIT (PC ++ ,REL (PC - 1 ,PC + ((size + 3 )* i )));
297- PC ++ ;
292+ EMIT (PC ++ ,REL (PC ,PC + ((size + 2 )* i )));
298293if (code )
299294memcpy (& code [PC ],& code [term ],size * sizeof (int ));
300295PC += size ;
301296}
302297break ;
303298case '?' :
304299if (PC == term ) gotosyntax_error ;
305- INSERT_CODE (term ,3 ,PC );
300+ INSERT_CODE (term ,2 ,PC );
306301if (re [1 ]== '?' ) {
307302EMIT (term ,RSPLIT );
308303re ++ ;
309304}else
310305EMIT (term ,SPLIT );
311- EMIT (term + 1 ,REL (term ,PC - 1 ));
306+ EMIT (term + 1 ,REL (term ,PC ));
312307term = PC ;
313308break ;
314309case '*' :
315310if (PC == term ) gotosyntax_error ;
316- INSERT_CODE (term ,3 ,PC );
311+ INSERT_CODE (term ,2 ,PC );
317312EMIT (PC ,JMP );
318313EMIT (PC + 1 ,REL (PC ,term ));
319314PC += 2 ;
@@ -322,7 +317,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
322317re ++ ;
323318}else
324319EMIT (term ,SPLIT );
325- EMIT (term + 1 ,REL (term ,PC - 1 ));
320+ EMIT (term + 1 ,REL (term ,PC ));
326321term = PC ;
327322break ;
328323case '+' :
@@ -332,18 +327,18 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
332327re ++ ;
333328}else
334329EMIT (PC ,RSPLIT );
335- EMIT (PC + 1 ,REL (PC - 1 ,term ));
336- PC += 3 ;
330+ EMIT (PC + 1 ,REL (PC ,term ));
331+ PC += 2 ;
337332term = PC ;
338333break ;
339334case '|' :
340335if (alt_label )
341336alt_stack [altc ++ ]= alt_label ;
342- INSERT_CODE (start ,3 ,PC );
337+ INSERT_CODE (start ,2 ,PC );
343338EMIT (PC ++ ,JMP );
344339alt_label = PC ++ ;
345340EMIT (start ,SPLIT );
346- EMIT (start + 1 ,REL (start ,PC - 1 ));
341+ EMIT (start + 1 ,REL (start ,PC ));
347342term = PC ;
348343break ;
349344case '^' :
@@ -360,7 +355,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
360355if (code && alt_label ) {
361356EMIT (alt_label ,REL (alt_label ,PC )+ 1 );
362357for (int alts = altc ;altc ;altc -- ) {
363- int at = alt_stack [alts - altc ]+ altc * 3 ;
358+ int at = alt_stack [alts - altc ]+ altc * 2 ;
364359EMIT (at ,REL (at ,PC )+ 1 );
365360}
366361}
@@ -397,17 +392,19 @@ int re_comp(rcode *prog, const char *re, int nsubs)
397392if (res < 0 )return res ;
398393// If unparsed chars left
399394if (* re )return RE_SYNTAX_ERROR ;
400- int icnt = 0 ,scnt = 0 ;
395+ int icnt = 0 ,scnt = SPLIT ;
401396for (int i = 0 ;i < prog -> unilen ;i ++ )
402397switch (prog -> insts [i ]) {
403398case CLASS :
404399i += prog -> insts [i + 2 ]* 2 + 2 ;
405400icnt ++ ;
406401break ;
407402case SPLIT :
403+ prog -> insts [i ++ ]= scnt ++ ;
404+ icnt ++ ;
405+ break ;
408406case RSPLIT :
409- prog -> insts [i + 2 ]= scnt ++ ;
410- i ++ ;
407+ prog -> insts [i ]= - scnt ++ ;
411408case JMP :
412409case SAVE :
413410case CHAR :
@@ -440,11 +437,11 @@ if (--csub->ref == 0) { \
440437
441438#define onclist (nn )
442439#define onnlist (nn ) \
443- if (sparse[npc[2] ] < sparsesz) \
444- if (sdense[sparse[npc[2]]] ==npc[2] ) \
440+ if (sparse[spc ] < sparsesz) \
441+ if (sdense[sparse[spc]] ==spc ) \
445442deccheck(nn) \
446- sdense[sparsesz] =npc[2] ; \
447- sparse[npc[2] ] = sparsesz++; \
443+ sdense[sparsesz] =spc ; \
444+ sparse[spc ] = sparsesz++; \
448445
449446#define fastrec (nn ,list ,listidx ) \
450447nsub->ref++; \
@@ -486,7 +483,7 @@ else if (spc == JMP) { \
486483si = 0; \
487484rec##nn: \
488485spc = *npc; \
489- if (spc < WBEG) { \
486+ if ((unsigned int) spc < WBEG) { \
490487list[listidx].sub = nsub; \
491488list[listidx++].pc = npc; \
492489rec_check##nn: \
@@ -498,10 +495,10 @@ if (spc < WBEG) { \
498495continue; \
499496} \
500497next##nn: \
501- if (spc== SPLIT ) { \
498+ if (spc> JMP ) { \
502499on##list(nn) \
503- npc +=3 ; \
504- pcs[si] = npc + npc[-2 ]; \
500+ npc +=2 ; \
501+ pcs[si] = npc + npc[-1 ]; \
505502fastrec(nn, list, listidx) \
506503} else if (spc == SAVE) { \
507504if (nsub->ref > 1) { \
@@ -518,11 +515,12 @@ if (spc == SPLIT) { \
518515|| !isword(_sp)) \
519516deccheck(nn) \
520517npc++; goto rec##nn; \
521- } else if (spc == RSPLIT) { \
518+ } else if (spc < 0) { \
519+ spc = -spc; \
522520on##list(nn) \
523- npc +=3 ; \
521+ npc +=2 ; \
524522pcs[si] = npc; \
525- npc += npc[-2 ]; \
523+ npc += npc[-1 ]; \
526524fastrec(nn, list, listidx) \
527525} else if (spc == WEND) { \
528526if (isword(_sp)) \
@@ -553,7 +551,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
553551int * pcs [prog -> splits ];
554552unsignedint sdense [prog -> splits ],sparse [prog -> splits ];
555553rsub * subs [prog -> splits ];
556- char nsubs [rsubsize * (prog -> len - prog -> splits + 3 )];
554+ char nsubs [rsubsize * (prog -> len - prog -> splits + 14 )];
557555rsub * nsub ,* s1 ,* matched = NULL ,* freesub = NULL ;
558556rthread _clist [prog -> len + 1 ],_nlist [prog -> len + 1 ];
559557_clist [0 ].pc = insts ,_nlist [0 ].pc = insts ;