summaryrefslogtreecommitdiffstats
path: root/ocamlbuild/glob_lexer.mll
diff options
context:
space:
mode:
Diffstat (limited to 'ocamlbuild/glob_lexer.mll')
-rw-r--r--ocamlbuild/glob_lexer.mll114
1 files changed, 114 insertions, 0 deletions
diff --git a/ocamlbuild/glob_lexer.mll b/ocamlbuild/glob_lexer.mll
new file mode 100644
index 000000000..8bf49900d
--- /dev/null
+++ b/ocamlbuild/glob_lexer.mll
@@ -0,0 +1,114 @@
+(***********************************************************************)
+(* ocamlbuild *)
+(* *)
+(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2007 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* $Id$ *)
+(* Original author: Berke Durak *)
+(* Glob *)
+{
+open Bool;;
+open Glob_ast;;
+
+type token =
+| ATOM of pattern atom
+| AND
+| OR
+| NOT
+| LPAR
+| RPAR
+| TRUE
+| FALSE
+| EOF
+;;
+
+let sf = Printf.sprintf;;
+
+let concat_patterns p1 p2 =
+ match (p1,p2) with
+ | (Epsilon,_) -> p2
+ | (_,Epsilon) -> p1
+ | (_,_) -> Concat(p1,p2)
+;;
+
+let slash = Class(Atom('/','/'));;
+let not_slash = Class(Not(Atom('/','/')));;
+let any = Class True;;
+}
+
+let pattern_chars = ['a'-'z']|['A'-'Z']|'_'|'-'|['0'-'9']|'.'
+let space_chars = [' ' '\t' '\n' '\r' '\012']
+
+rule token = parse
+| '<' { ATOM(Pattern(let (p,_) = parse_pattern ['>'] Epsilon lexbuf in p)) }
+| '"' { ATOM(Constant(parse_string (Buffer.create 32) lexbuf)) }
+| "and"|"AND"|"&" { AND }
+| "or"|"OR"|"|" { OR }
+| "not"|"NOT"|"~" { NOT }
+| "true"|"1" { TRUE }
+| "false"|"0" { FALSE }
+| "(" { LPAR }
+| ")" { RPAR }
+| space_chars+ { token lexbuf }
+| eof { EOF }
+
+and parse_pattern eof_chars p = parse
+| (pattern_chars+ as u) { parse_pattern eof_chars (concat_patterns p (Word u)) lexbuf }
+| '{'
+ {
+ let rec loop pl =
+ let (p',c) = parse_pattern ['}';','] Epsilon lexbuf in
+ let pl = p' :: pl in
+ if c = ',' then
+ loop pl
+ else
+ parse_pattern eof_chars (concat_patterns p (Union pl)) lexbuf
+ in
+ loop []
+ }
+| "[^"
+ {
+ let cl = Not(Or(parse_class [] lexbuf)) in
+ parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf
+ }
+| '['
+ {
+ let cl = Or(parse_class [] lexbuf) in
+ parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf
+ }
+| "/**/" (* / | /\Sigma^*/ *)
+ { let q = Union[slash; Concat(slash, Concat(Star any, slash)) ] in
+ parse_pattern eof_chars (concat_patterns p q) lexbuf }
+| "/**" (* \varepsilon | /\Sigma^* *)
+ { let q = Union[Epsilon; Concat(slash, Star any)] in
+ parse_pattern eof_chars (concat_patterns p q) lexbuf }
+| "**/" (* \varepsilon | \Sigma^*/ *)
+ { let q = Union[Epsilon; Concat(Star any, slash)] in
+ parse_pattern eof_chars (concat_patterns p q) lexbuf }
+| "**" { raise (Parse_error("Ambiguous ** pattern not allowed unless surrounded by one or more slashes")) }
+| '*' { parse_pattern eof_chars (concat_patterns p (Star not_slash)) lexbuf }
+| '/' { parse_pattern eof_chars (concat_patterns p slash) lexbuf }
+| '?' { parse_pattern eof_chars (concat_patterns p (Class True)) lexbuf }
+| _ as c
+ { if List.mem c eof_chars then
+ (p,c)
+ else
+ raise (Parse_error(sf "Unexpected character %C in glob pattern" c))
+ }
+
+and parse_string b = parse
+| "\"" { Buffer.contents b }
+| "\\\"" { Buffer.add_char b '"'; parse_string b lexbuf }
+| [^'"' '\\']+ as u { Buffer.add_string b u; parse_string b lexbuf }
+
+and parse_class cl = parse
+| ']' { cl }
+| "-]" { ((Atom('-','-'))::cl) }
+| (_ as c1) '-' (_ as c2) { parse_class ((Atom(c1,c2))::cl) lexbuf }
+| _ as c { parse_class ((Atom(c,c))::cl) lexbuf }