summaryrefslogtreecommitdiffstats
path: root/byterun/lexing.c
blob: 86387a911830c4d571576680192fd433bd399d3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/***********************************************************************/
/*                                                                     */
/*                           Objective Caml                            */
/*                                                                     */
/*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
/*                                                                     */
/*  Copyright 1996 Institut National de Recherche en Informatique et   */
/*  Automatique.  Distributed only by permission.                      */
/*                                                                     */
/***********************************************************************/

/* $Id$ */

/* The table-driven automaton for lexers generated by camllex. */

#include "fail.h"
#include "mlvalues.h"
#include "stacks.h"
#include "str.h"

struct lexer_buffer {
  value refill_buff;
  value lex_buffer;
  value lex_buffer_len;
  value lex_abs_pos;
  value lex_start_pos;
  value lex_curr_pos;
  value lex_last_pos;
  value lex_saved_state;
  value lex_last_action;
};

struct lexing_table {
  value lex_base;
  value lex_backtrk;
  value lex_default;
  value lex_trans;
  value lex_check;
};

#ifdef BIG_ENDIAN
#define Short(tbl,n) \
  (*((unsigned char *)((tbl) + (n) * sizeof(short))) + \
          (*((schar *)((tbl) + (n) * sizeof(short) + 1)) << 8))
#else
#define Short(tbl,n) (((short *)(tbl))[n])
#endif

value lex_engine(tbl, start_state, lexbuf)     /* ML */
     struct lexing_table * tbl;
     value start_state;
     struct lexer_buffer * lexbuf;
{
  int state, base, backtrk, c;
  
  if (Int_val(lexbuf->lex_saved_state) >= 0) {
    state = Int_val(lexbuf->lex_saved_state);
    lexbuf->lex_saved_state = Val_int(-1);
  } else {
    state = Int_val(start_state);
    lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
    lexbuf->lex_last_action = Val_int(-1);
  }
  while(1) {
    /* Lookup base address or action number for current state */
    base = Short(tbl->lex_base, state);
    if (base < 0) return Val_int(-base-1);
    /* See if it's a backtrack point */
    backtrk = Short(tbl->lex_backtrk, state);
    if (backtrk >= 0) {
      lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
      lexbuf->lex_last_action = Val_int(backtrk);
    }
    /* See if we need a refill */
    if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len) {
      lexbuf->lex_saved_state = Val_int(state);
      return (-1);
    }
    /* Read next input char */
    c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
    lexbuf->lex_curr_pos += 2;
    /* Determine next state */
    if (Short(tbl->lex_check, base + c) == state)
      state = Short(tbl->lex_trans, base + c);
    else
      state = Short(tbl->lex_default, state);
    /* If no transition on this char, return to last backtrack point */
    if (state < 0) {
      lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
      if (lexbuf->lex_last_action == Val_int(-1)) {
        failwith("lexing: empty token");
      } else {
        return lexbuf->lex_last_action;
      }
    }
  }
}