summaryrefslogtreecommitdiffstats
path: root/stdlib/genlex.ml
blob: 276db124d357b1177cf14e2db7301edcd368eafe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
(***********************************************************************)
(*                                                                     *)
(*                         Caml Special Light                          *)
(*                                                                     *)
(*              Xavier Leroy, projet Cristal, INRIA Rocquencourt       *)
(*                                                                     *)
(*  Copyright 1995 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

type token =
    Kwd of string
  | Ident of string
  | Int of int
  | Float of float
  | String of string
  | Char of char


(* The string buffering machinery *)

let initial_buffer = String.create 32

let buffer = ref initial_buffer
let bufpos = ref 0

let reset_buffer () =
  buffer := initial_buffer;
  bufpos := 0

let store c =
  if !bufpos >= String.length !buffer then begin
    let newbuffer = String.create (2 * !bufpos) in
    String.blit !buffer 0 newbuffer 0 !bufpos;
    buffer := newbuffer
  end;
  String.set !buffer !bufpos c;
  incr bufpos

let get_string () =
  let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s

(* The lexer *)

let make_lexer keywords =

  let kwd_table = Hashtbl.create 17 in
  List.iter (fun s -> Hashtbl.add kwd_table s (Kwd s)) keywords;

  let ident_or_keyword id =
    try Hashtbl.find kwd_table id with Not_found -> Ident id

  and keyword_or_error c =
    let s = String.make 1 c in
      try Hashtbl.find kwd_table s
      with Not_found -> raise(Stream.Parse_error("Illegal character " ^ s)) in

  let rec next_token = parser
    [< '  ' '|'\010'|'\013'|'\009'|'\026'|'\012'; s >] ->
      next_token s
  | [< '  'A'..'Z'|'a'..'z'|'\192'..'\255' as c; s>] ->
      reset_buffer(); store c; ident s
  | [< '  '!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
              '~'|'^'|'|'|'*' as c; s >] ->
      reset_buffer(); store c; ident2 s
  | [< '  '0'..'9' as c; s>] ->
      reset_buffer(); store c; number s
  | [< '  '\''; c = char; '  '\'' >] ->
      Some(Char c)
  | [< '  '"' (* '"' *); s >] ->
      reset_buffer(); Some(String(string s))
  | [< '  '-'; s >] ->
      neg_number s
  | [< '  '('; s >] ->
      maybe_comment s
  | [< ' c >] ->
      Some(keyword_or_error c)
  | [< >] ->
      None

  and ident = parser
    [< '  'A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\'' as c; s>] ->
      store c; ident s
  | [< >] ->
      Some(ident_or_keyword(get_string()))

  and ident2 = parser
    [< '  '!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
              '~'|'^'|'|'|'*' as c; s >] ->
      store c; ident2 s
  | [< >] ->
      Some(ident_or_keyword(get_string()))

  and neg_number = parser
    [< '  '0'..'9' as c; s >] ->
      reset_buffer(); store '-'; store c; number s
  | [< s >] ->
      reset_buffer(); store '-'; ident2 s
    
  and number = parser
    [< '  '0'..'9' as c; s >] ->
      store c; number s
  | [< '  '.'; s >] ->
      store '.'; decimal_part s
  | [< '  'e'|'E'; s >] ->
      store 'E'; exponent_part s
  | [< >] ->
      Some(Int(int_of_string(get_string())))

  and decimal_part = parser
    [< '  '0'..'9' as c; s >] ->
      store c; decimal_part s
  | [< '  'e'|'E'; s >] ->
      store 'E'; exponent_part s
  | [< >] ->
      Some(Float(float_of_string(get_string())))

  and exponent_part = parser
    [< '  '+'|'-' as c; s >] ->
      store c; end_exponent_part s
  | [< s >] ->
      end_exponent_part s

  and end_exponent_part = parser
    [< '  '0'..'9' as c; s >] ->
      store c; end_exponent_part s
  | [< >] ->
      Some(Float(float_of_string(get_string())))

  and string = parser
    [< '  '"' (* '"' *)  >] -> get_string()
  | [< '  '\\'; c = escape; s >] -> store c; string s
  | [< ' c; s >] -> store c; string s

  and char = parser
    [< '  '\\'; c = escape >] -> c
  | [< ' c >] -> c

  and escape = parser
    [< '  'n' >] -> '\n'
  | [< '  'r' >] -> '\r'
  | [< '  't' >] -> '\t'
  | [< '  '0'..'9' as c1; '  '0'..'9' as c2; '  '0'..'9' as c3 >] ->
      Char.chr((Char.code c1 - 48) * 100 +
               (Char.code c2 - 48) * 10 + (Char.code c3))
  | [< ' c >] -> c

  and maybe_comment = parser
    [< '  '*'; s >] -> comment s; next_token s
  | [< >] -> Some(keyword_or_error '(')

  and comment = parser
    [< '  '('; s >] -> maybe_nested_comment s
  | [< '  '*'; s >] -> maybe_end_comment s
  | [< ' c; s >] -> comment s

  and maybe_nested_comment = parser
    [< '  '*'; s >] -> comment s; comment s
  | [< ' c; s >] -> comment s

  and maybe_end_comment = parser
    [< '  ')' >] -> ()
  | [< ' c; s >] -> comment s

  in fun input -> Stream.from (fun count -> next_token input)