summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>2002-12-13 17:06:19 +0000
committerXavier Leroy <xavier.leroy@inria.fr>2002-12-13 17:06:19 +0000
commit5e32950066a2da33f1b9a095a7cb91a06befaef3 (patch)
tree57aa4d2d94e364bab1e1c180b12799e2f86863d5
parent1ceb86a2eac58b21c0ff8c3daeff87425cbdf56f (diff)
S'assurer que l'on progresse lors du 'greedy matching' de * et +
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5341 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--otherlibs/str/str.ml10
-rw-r--r--otherlibs/str/strstubs.c34
2 files changed, 37 insertions, 7 deletions
diff --git a/otherlibs/str/str.ml b/otherlibs/str/str.ml
index 71bccc71e..864452474 100644
--- a/otherlibs/str/str.ml
+++ b/otherlibs/str/str.ml
@@ -137,6 +137,8 @@ let op_SIMPLESTAR = 13
let op_SIMPLEPLUS = 14
let op_GOTO = 15
let op_PUSHBACK = 16
+let op_GOTO_STAR = 17
+let op_GOTO_PLUS = 18
(* Encoding of bytecode instructions *)
@@ -279,25 +281,25 @@ let compile fold_case re =
(* Implement longest match semantics for compatibility with old Str *)
(* lbl1: PUSHBACK lbl2
<match r>
- GOTO lbl1
+ GOTO_STAR lbl1
lbl2:
*)
let lbl1 = emit_hole() in
emit_code r;
- emit_instr op_GOTO (displ lbl1 !progpos);
+ emit_instr op_GOTO_STAR (displ lbl1 !progpos);
let lbl2 = !progpos in
patch_instr lbl1 op_PUSHBACK lbl2
| Plus r ->
(* Implement longest match semantics for compatibility with old Str *)
(* lbl1: <match r>
PUSHBACK lbl2
- GOTO lbl1
+ GOTO_PLUS lbl1
lbl2:
*)
let lbl1 = !progpos in
emit_code r;
let pos_pushback = emit_hole() in
- emit_instr op_GOTO (displ lbl1 !progpos);
+ emit_instr op_GOTO_PLUS (displ lbl1 !progpos);
let lbl2 = !progpos in
patch_instr pos_pushback op_PUSHBACK lbl2
| Option r ->
diff --git a/otherlibs/str/strstubs.c b/otherlibs/str/strstubs.c
index f64080f9b..820da980c 100644
--- a/otherlibs/str/strstubs.c
+++ b/otherlibs/str/strstubs.c
@@ -24,7 +24,7 @@
/* The backtracking NFA interpreter */
struct backtrack_point {
- char * txt;
+ unsigned char * txt;
value * pc;
int mask;
};
@@ -57,8 +57,12 @@ enum {
SIMPLESTAR, /* match a character class 0, 1 or several times */
SIMPLEPLUS, /* match a character class 1 or several times */
GOTO, /* unconditional branch */
- PUSHBACK /* record a backtrack point --
+ PUSHBACK, /* record a backtrack point --
where to jump in case of failure */
+ GOTO_STAR, /* like goto, except that we backtrack if no
+ characters were consumed since last PUSHBACK */
+ GOTO_PLUS, /* like goto, except that we backtrack if no
+ characters were consumed since penultimate PUSHBACK */
};
/* Accessors in a compiled regexp */
@@ -102,8 +106,20 @@ static unsigned char re_word_letters[32] = {
};
#define Is_word_letter(c) ((re_word_letters[(c) >> 3] >> ((c) & 7)) & 1)
-/* The bytecode interpreter for the NFA */
+/* Return the n-th previous backtrack point.
+ Must have n < BACKTRACK_STACK_BLOCK_SIZE. */
+static struct backtrack_point *
+re_previous_backtrack_point(struct backtrack_point * sp,
+ struct backtrack_stack * stack,
+ int n)
+{
+ if (sp >= stack->point + n) return sp - n;
+ stack = stack->previous;
+ if (stack == NULL) return NULL;
+ return stack->point + BACKTRACK_STACK_BLOCK_SIZE - n;
+}
+/* The bytecode interpreter for the NFA */
static int re_match(value re,
unsigned char * starttxt,
register unsigned char * txt,
@@ -251,6 +267,18 @@ static int re_match(value re,
sp->mask = re_mask;
sp++;
break;
+ case GOTO_STAR: {
+ struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 1);
+ if (p != NULL && txt == p->txt) goto backtrack;
+ pc = pc + SignedArg(instr);
+ break;
+ }
+ case GOTO_PLUS: {
+ struct backtrack_point * p = re_previous_backtrack_point(sp, stack, 2);
+ if (p != NULL && txt == p->txt) goto backtrack;
+ pc = pc + SignedArg(instr);
+ break;
+ }
default:
assert(0);
}