diff options
-rw-r--r-- | otherlibs/str/str.ml | 10 | ||||
-rw-r--r-- | otherlibs/str/strstubs.c | 34 |
2 files changed, 37 insertions, 7 deletions
diff --git a/otherlibs/str/str.ml b/otherlibs/str/str.ml index 71bccc71e..864452474 100644 --- a/otherlibs/str/str.ml +++ b/otherlibs/str/str.ml @@ -137,6 +137,8 @@ let op_SIMPLESTAR = 13 let op_SIMPLEPLUS = 14 let op_GOTO = 15 let op_PUSHBACK = 16 +let op_GOTO_STAR = 17 +let op_GOTO_PLUS = 18 (* Encoding of bytecode instructions *) @@ -279,25 +281,25 @@ let compile fold_case re = (* Implement longest match semantics for compatibility with old Str *) (* lbl1: PUSHBACK lbl2 <match r> - GOTO lbl1 + GOTO_STAR lbl1 lbl2: *) let lbl1 = emit_hole() in emit_code r; - emit_instr op_GOTO (displ lbl1 !progpos); + emit_instr op_GOTO_STAR (displ lbl1 !progpos); let lbl2 = !progpos in patch_instr lbl1 op_PUSHBACK lbl2 | Plus r -> (* Implement longest match semantics for compatibility with old Str *) (* lbl1: <match r> PUSHBACK lbl2 - GOTO lbl1 + GOTO_PLUS lbl1 lbl2: *) let lbl1 = !progpos in emit_code r; let pos_pushback = emit_hole() in - emit_instr op_GOTO (displ lbl1 !progpos); + emit_instr op_GOTO_PLUS (displ lbl1 !progpos); let lbl2 = !progpos in patch_instr pos_pushback op_PUSHBACK lbl2 | Option r -> diff --git a/otherlibs/str/strstubs.c b/otherlibs/str/strstubs.c index f64080f9b..820da980c 100644 --- a/otherlibs/str/strstubs.c +++ b/otherlibs/str/strstubs.c @@ -24,7 +24,7 @@ /* The backtracking NFA interpreter */ struct backtrack_point { - char * txt; + unsigned char * txt; value * pc; int mask; }; @@ -57,8 +57,12 @@ enum { SIMPLESTAR, /* match a character class 0, 1 or several times */ SIMPLEPLUS, /* match a character class 1 or several times */ GOTO, /* unconditional branch */ - PUSHBACK /* record a backtrack point -- + PUSHBACK, /* record a backtrack point -- where to jump in case of failure */ + GOTO_STAR, /* like goto, except that we backtrack if no + characters were consumed since last PUSHBACK */ + GOTO_PLUS, /* like goto, except that we backtrack if no + characters were consumed since penultimate PUSHBACK */ }; /* Accessors in a compiled regexp */ @@ -102,8 +106,20 @@ static unsigned char re_word_letters[32] = { }; #define Is_word_letter(c) ((re_word_letters[(c) >> 3] >> ((c) & 7)) & 1) -/* The bytecode interpreter for the NFA */ +/* Return the n-th previous backtrack point. + Must have n < BACKTRACK_STACK_BLOCK_SIZE. */ +static struct backtrack_point * +re_previous_backtrack_point(struct backtrack_point * sp, + struct backtrack_stack * stack, + int n) +{ + if (sp >= stack->point + n) return sp - n; + stack = stack->previous; + if (stack == NULL) return NULL; + return stack->point + BACKTRACK_STACK_BLOCK_SIZE - n; +} +/* The bytecode interpreter for the NFA */ static int re_match(value re, unsigned char * starttxt, register unsigned char * txt, @@ -251,6 +267,18 @@ static int re_match(value re, sp->mask = re_mask; sp++; break; + case GOTO_STAR: { + struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 1); + if (p != NULL && txt == p->txt) goto backtrack; + pc = pc + SignedArg(instr); + break; + } + case GOTO_PLUS: { + struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 2); + if (p != NULL && txt == p->txt) goto backtrack; + pc = pc + SignedArg(instr); + break; + } default: assert(0); } |