diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2002-12-13 17:06:19 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2002-12-13 17:06:19 +0000 |
commit | 5e32950066a2da33f1b9a095a7cb91a06befaef3 (patch) | |
tree | 57aa4d2d94e364bab1e1c180b12799e2f86863d5 | |
parent | 1ceb86a2eac58b21c0ff8c3daeff87425cbdf56f (diff) |
S'assurer que l'on progresse lors du 'greedy matching' de * et +
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5341 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | otherlibs/str/str.ml | 10 | ||||
-rw-r--r-- | otherlibs/str/strstubs.c | 34 |
2 files changed, 37 insertions, 7 deletions
diff --git a/otherlibs/str/str.ml b/otherlibs/str/str.ml index 71bccc71e..864452474 100644 --- a/otherlibs/str/str.ml +++ b/otherlibs/str/str.ml @@ -137,6 +137,8 @@ let op_SIMPLESTAR = 13 let op_SIMPLEPLUS = 14 let op_GOTO = 15 let op_PUSHBACK = 16 +let op_GOTO_STAR = 17 +let op_GOTO_PLUS = 18 (* Encoding of bytecode instructions *) @@ -279,25 +281,25 @@ let compile fold_case re = (* Implement longest match semantics for compatibility with old Str *) (* lbl1: PUSHBACK lbl2 <match r> - GOTO lbl1 + GOTO_STAR lbl1 lbl2: *) let lbl1 = emit_hole() in emit_code r; - emit_instr op_GOTO (displ lbl1 !progpos); + emit_instr op_GOTO_STAR (displ lbl1 !progpos); let lbl2 = !progpos in patch_instr lbl1 op_PUSHBACK lbl2 | Plus r -> (* Implement longest match semantics for compatibility with old Str *) (* lbl1: <match r> PUSHBACK lbl2 - GOTO lbl1 + GOTO_PLUS lbl1 lbl2: *) let lbl1 = !progpos in emit_code r; let pos_pushback = emit_hole() in - emit_instr op_GOTO (displ lbl1 !progpos); + emit_instr op_GOTO_PLUS (displ lbl1 !progpos); let lbl2 = !progpos in patch_instr pos_pushback op_PUSHBACK lbl2 | Option r -> diff --git a/otherlibs/str/strstubs.c b/otherlibs/str/strstubs.c index f64080f9b..820da980c 100644 --- a/otherlibs/str/strstubs.c +++ b/otherlibs/str/strstubs.c @@ -24,7 +24,7 @@ /* The backtracking NFA interpreter */ struct backtrack_point { - char * txt; + unsigned char * txt; value * pc; int mask; }; @@ -57,8 +57,12 @@ enum { SIMPLESTAR, /* match a character class 0, 1 or several times */ SIMPLEPLUS, /* match a character class 1 or several times */ GOTO, /* unconditional branch */ - PUSHBACK /* record a backtrack point -- + PUSHBACK, /* record a backtrack point -- where to jump in case of failure */ + GOTO_STAR, /* like goto, except that we backtrack if no + characters were consumed since last PUSHBACK */ + GOTO_PLUS, /* like goto, except that we backtrack if no + characters were consumed since penultimate PUSHBACK */ }; /* Accessors in a compiled regexp */ @@ -102,8 +106,20 @@ static unsigned char re_word_letters[32] = { }; #define Is_word_letter(c) ((re_word_letters[(c) >> 3] >> ((c) & 7)) & 1) -/* The bytecode interpreter for the NFA */ +/* Return the n-th previous backtrack point. + Must have n < BACKTRACK_STACK_BLOCK_SIZE. */ +static struct backtrack_point * +re_previous_backtrack_point(struct backtrack_point * sp, + struct backtrack_stack * stack, + int n) +{ + if (sp >= stack->point + n) return sp - n; + stack = stack->previous; + if (stack == NULL) return NULL; + return stack->point + BACKTRACK_STACK_BLOCK_SIZE - n; +} +/* The bytecode interpreter for the NFA */ static int re_match(value re, unsigned char * starttxt, register unsigned char * txt, @@ -251,6 +267,18 @@ static int re_match(value re, sp->mask = re_mask; sp++; break; + case GOTO_STAR: { + struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 1); + if (p != NULL && txt == p->txt) goto backtrack; + pc = pc + SignedArg(instr); + break; + } + case GOTO_PLUS: { + struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 2); + if (p != NULL && txt == p->txt) goto backtrack; + pc = pc + SignedArg(instr); + break; + } default: assert(0); } |