summaryrefslogtreecommitdiffstats
path: root/ocamlbuild/glob.ml
diff options
context:
space:
mode:
Diffstat (limited to 'ocamlbuild/glob.ml')
-rw-r--r--ocamlbuild/glob.ml398
1 files changed, 398 insertions, 0 deletions
diff --git a/ocamlbuild/glob.ml b/ocamlbuild/glob.ml
new file mode 100644
index 000000000..8003dbbb8
--- /dev/null
+++ b/ocamlbuild/glob.ml
@@ -0,0 +1,398 @@
+(***********************************************************************)
+(* ocamlbuild *)
+(* *)
+(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *)
+(* *)
+(* Copyright 2007 Institut National de Recherche en Informatique et *)
+(* en Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the Q Public License version 1.0. *)
+(* *)
+(***********************************************************************)
+
+(* $Id$ *)
+(* Original author: Berke Durak *)
+(* Glob *)
+open My_std;;
+open Bool;;
+include Glob_ast;;
+open Glob_lexer;;
+
+let sf = Printf.sprintf;;
+
+let brute_limit = 10;;
+
+(*** string_of_token *)
+let string_of_token = function
+| ATOM _ -> "ATOM"
+| AND -> "AND"
+| OR -> "OR"
+| NOT -> "NOT"
+| LPAR -> "LPAR"
+| RPAR -> "RPAR"
+| TRUE -> "TRUE"
+| FALSE -> "FALSE"
+| EOF -> "EOF"
+;;
+(* ***)
+(*** match_character_class *)
+let match_character_class cl c =
+ Bool.eval
+ begin function (c1,c2) ->
+ c1 <= c && c <= c2
+ end
+ cl
+;;
+(* ***)
+(*** NFA *)
+module NFA =
+ struct
+ type transition =
+ | QCLASS of character_class
+ | QEPSILON
+ ;;
+
+ module IS = Set.Make(struct type t = int let compare = compare let print = Format.pp_print_int end);;
+ module ISM = Map.Make(struct type t = IS.t let compare = IS.compare let print = IS.print end);;
+
+ type machine = {
+ mc_qi : IS.t;
+ mc_table : (character_class * IS.t) list array;
+ mc_qf : int;
+ mc_power_table : (char, IS.t ISM.t) Hashtbl.t
+ }
+
+ (*** build' *)
+ let build' p =
+ let count = ref 0 in
+ let transitions = ref [] in
+ let epsilons : (int * int) list ref = ref [] in
+ let state () = let id = !count in incr count; id in
+ let ( --> ) q1 t q2 =
+ match t with
+ | QEPSILON -> epsilons := (q1,q2) :: !epsilons; q1
+ | QCLASS cl -> transitions := (q1,cl,q2) :: !transitions; q1
+ in
+ (* Construit les transitions correspondant au motif donné et arrivant
+ * sur l'état qf. Retourne l'état d'origine. *)
+ let rec loop qf = function
+ | Epsilon -> qf
+ | Word u ->
+ let m = String.length u in
+ let q0 = state () in
+ let rec loop q i =
+ if i = m then
+ q0
+ else
+ begin
+ let q' =
+ if i = m - 1 then
+ qf
+ else
+ state ()
+ in
+ let _ = (q --> QCLASS(Atom(u.[i], u.[i]))) q' in
+ loop q' (i + 1)
+ end
+ in
+ loop q0 0
+ | Class cl ->
+ let q1 = state () in
+ (q1 --> QCLASS cl) qf
+ | Star p ->
+ (* The fucking Kleene star *)
+ let q2 = state () in
+ let q1 = loop q2 p in (* q1 -{p}-> q2 *)
+ let _ = (q1 --> QEPSILON) qf in
+ let _ = (q2 --> QEPSILON) q1 in
+ let _ = (q2 --> QEPSILON) q1 in
+ q1
+ | Concat(p1,p2) ->
+ let q12 = state () in
+ let q1 = loop q12 p1 in (* q1 -{p1}-> q12 *)
+ let q2 = loop qf p2 in (* q2 -{p2}-> qf *)
+ let _ = (q12 --> QEPSILON) q2 in
+ q1
+ | Union pl ->
+ let qi = state () in
+ List.iter
+ begin fun p ->
+ let q = loop qf p in (* q -{p2}-> qf *)
+ let _ = (qi --> QEPSILON) q in (* qi -{}---> q *)
+ ()
+ end
+ pl;
+ qi
+ in
+ let qf = state () in
+ let qi = loop qf p in
+ let m = !count in
+
+ (* Compute epsilon closure *)
+ let graph = Array.make m IS.empty in
+ List.iter
+ begin fun (q,q') ->
+ graph.(q) <- IS.add q' graph.(q)
+ end
+ !epsilons;
+
+ let closure = Array.make m IS.empty in
+ let rec transitive past = function
+ | [] -> past
+ | q :: future ->
+ let past' = IS.add q past in
+ let future' =
+ IS.fold
+ begin fun q' future' ->
+ (* q -{}--> q' *)
+ if IS.mem q' past' then
+ future'
+ else
+ q' :: future'
+ end
+ graph.(q)
+ future
+ in
+ transitive past' future'
+ in
+ for i = 0 to m - 1 do
+ closure.(i) <- transitive IS.empty [i] (* O(n^2), I know *)
+ done;
+
+ (* Finally, build the table *)
+ let table = Array.make m [] in
+ List.iter
+ begin fun (q,t,q') ->
+ table.(q) <- (t, closure.(q')) :: table.(q)
+ end
+ !transitions;
+
+ (graph, closure,
+ { mc_qi = closure.(qi);
+ mc_table = table;
+ mc_qf = qf;
+ mc_power_table = Hashtbl.create 37 })
+ ;;
+ let build x = let (_,_, machine) = build' x in machine;;
+ (* ***)
+ (*** run *)
+ let run ?(trace=false) machine u =
+ let m = String.length u in
+ let apply qs c =
+ try
+ let t = Hashtbl.find machine.mc_power_table c in
+ ISM.find qs t
+ with
+ | Not_found ->
+ let qs' =
+ IS.fold
+ begin fun q qs' ->
+ List.fold_left
+ begin fun qs' (cl,qs'') ->
+ if match_character_class cl c then
+ IS.union qs' qs''
+ else
+ qs'
+ end
+ qs'
+ machine.mc_table.(q)
+ end
+ qs
+ IS.empty
+ in
+ let t =
+ try
+ Hashtbl.find machine.mc_power_table c
+ with
+ | Not_found -> ISM.empty
+ in
+ Hashtbl.replace machine.mc_power_table c (ISM.add qs qs' t);
+ qs'
+ in
+ let rec loop qs i =
+ if IS.is_empty qs then
+ false
+ else
+ begin
+ if i = m then
+ IS.mem machine.mc_qf qs
+ else
+ begin
+ let c = u.[i] in
+ if trace then
+ begin
+ Printf.printf "%d %C {" i c;
+ IS.iter (fun q -> Printf.printf " %d" q) qs;
+ Printf.printf " }\n%!"
+ end;
+ let qs' = apply qs c in
+ loop qs' (i + 1)
+ end
+ end
+ in
+ loop machine.mc_qi 0
+ ;;
+ (* ***)
+ end
+;;
+(* ***)
+(*** Brute *)
+module Brute =
+ struct
+ exception Succeed;;
+ exception Fail;;
+ exception Too_hard;;
+
+ (*** match_pattern *)
+ let match_pattern counter p u =
+ let m = String.length u in
+ (** [loop i n p] returns [true] iff the word [u.(i .. i + n - 1)] is in the
+ ** language generated by the pattern [p].
+ ** We must have 0 <= i and i + n <= m *)
+ let rec loop (i,n,p) =
+ assert (0 <= i && 0 <= n && i + n <= m);
+ incr counter;
+ if !counter >= brute_limit then raise Too_hard;
+ match p with
+ | Word v ->
+ String.length v = n &&
+ begin
+ let rec check j = j = n or (v.[j] = u.[i + j] && check (j + 1))
+ in
+ check 0
+ end
+ | Epsilon -> n = 0
+ | Star(Class True) -> true
+ | Star(Class cl) ->
+ let rec check k =
+ if k = n then
+ true
+ else
+ (match_character_class cl u.[i + k]) && check (k + 1)
+ in
+ check 0
+ | Star p -> raise Too_hard
+ | Class cl -> n = 1 && match_character_class cl u.[i]
+ | Concat(p1,p2) ->
+ let rec scan j =
+ j <= n && ((loop (i,j,p1) && loop (i+j, n - j,p2)) || scan (j + 1))
+ in
+ scan 0
+ | Union pl -> List.exists (fun p' -> loop (i,n,p')) pl
+ in
+ loop (0,m,p)
+ ;;
+ (* ***)
+end
+;;
+(* ***)
+(*** fast_pattern, globber *)
+type fast_pattern =
+| Brute of int ref * pattern
+| Machine of NFA.machine
+;;
+
+type globber = fast_pattern ref atom Bool.boolean;;
+(* ***)
+(*** add_dir *)
+let add_dir dir x =
+ match dir with
+ | None -> x
+ | Some(dir) ->
+ match x with
+ | Constant(s) ->
+ Constant(My_std.filename_concat dir s)
+ | Pattern(p) ->
+ Pattern(Concat(Word(My_std.filename_concat dir ""), p))
+;;
+(* ***)
+(*** parse *)
+let parse ?dir u =
+ let l = Lexing.from_string u in
+ let tok = ref None in
+ let f =
+ fun () ->
+ match !tok with
+ | None -> token l
+ | Some x ->
+ tok := None;
+ x
+ in
+ let g t =
+ match !tok with
+ | None -> tok := Some t
+ | Some t' ->
+ raise (Parse_error(sf "Trying to unput token %s while %s is active" (string_of_token t) (string_of_token t')))
+ in
+ let read x =
+ let y = f () in
+ if x = y then
+ ()
+ else
+ raise (Parse_error(sf "Unexpected token, expecting %s, got %s" (string_of_token x) (string_of_token y)))
+ in
+ let rec atomizer continuation = match f () with
+ | NOT -> atomizer (fun x -> continuation (Not x))
+ | ATOM x ->
+ begin
+ let a =
+ match add_dir dir x with
+ | Constant u -> Constant u
+ | Pattern p -> Pattern(ref (Brute(ref 0, p)))
+ in
+ continuation (Atom a)
+ end
+ | TRUE -> continuation True
+ | FALSE -> continuation False
+ | LPAR ->
+ let y = parse_s () in
+ read RPAR;
+ continuation y
+ | t -> raise (Parse_error(sf "Unexpected token %s in atomizer" (string_of_token t)))
+ and parse_s1 x = match f () with
+ | OR -> let y = parse_s () in Or[x; y]
+ | AND -> parse_t x
+ | t -> g t; x
+ and parse_t1 x y = match f () with
+ | OR -> let z = parse_s () in Or[And[x;y]; z]
+ | AND -> parse_t (And[x;y])
+ | t -> g t; And[x;y]
+ and parse_s () = atomizer parse_s1
+ and parse_t x = atomizer (parse_t1 x)
+ in
+ let x = parse_s () in
+ read EOF;
+ x
+;;
+(* ***)
+(*** eval *)
+let eval g u =
+ Bool.eval
+ begin function
+ | Constant v -> u = v
+ | Pattern kind ->
+ match !kind with
+ | Brute(count, p) ->
+ begin
+ let do_nfa () =
+ let m = NFA.build p in
+ kind := Machine m;
+ NFA.run m u
+ in
+ if !count >= brute_limit then
+ do_nfa ()
+ else
+ try
+ Brute.match_pattern count p u
+ with
+ | Brute.Too_hard -> do_nfa ()
+ end
+ | Machine m -> NFA.run m u
+ end
+ g
+(* ***)
+(*** Debug *)
+(*let (Atom(Pattern x)) = parse "<{a,b}>";;
+#install_printer IS.print;;
+#install_printer ISM.print;;
+let (graph, closure, machine) = build' x;;*)
+(* ***)