diff options
Diffstat (limited to 'ocamlbuild/glob.ml')
-rw-r--r-- | ocamlbuild/glob.ml | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/ocamlbuild/glob.ml b/ocamlbuild/glob.ml new file mode 100644 index 000000000..8003dbbb8 --- /dev/null +++ b/ocamlbuild/glob.ml @@ -0,0 +1,398 @@ +(***********************************************************************) +(* ocamlbuild *) +(* *) +(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *) +(* *) +(* Copyright 2007 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id$ *) +(* Original author: Berke Durak *) +(* Glob *) +open My_std;; +open Bool;; +include Glob_ast;; +open Glob_lexer;; + +let sf = Printf.sprintf;; + +let brute_limit = 10;; + +(*** string_of_token *) +let string_of_token = function +| ATOM _ -> "ATOM" +| AND -> "AND" +| OR -> "OR" +| NOT -> "NOT" +| LPAR -> "LPAR" +| RPAR -> "RPAR" +| TRUE -> "TRUE" +| FALSE -> "FALSE" +| EOF -> "EOF" +;; +(* ***) +(*** match_character_class *) +let match_character_class cl c = + Bool.eval + begin function (c1,c2) -> + c1 <= c && c <= c2 + end + cl +;; +(* ***) +(*** NFA *) +module NFA = + struct + type transition = + | QCLASS of character_class + | QEPSILON + ;; + + module IS = Set.Make(struct type t = int let compare = compare let print = Format.pp_print_int end);; + module ISM = Map.Make(struct type t = IS.t let compare = IS.compare let print = IS.print end);; + + type machine = { + mc_qi : IS.t; + mc_table : (character_class * IS.t) list array; + mc_qf : int; + mc_power_table : (char, IS.t ISM.t) Hashtbl.t + } + + (*** build' *) + let build' p = + let count = ref 0 in + let transitions = ref [] in + let epsilons : (int * int) list ref = ref [] in + let state () = let id = !count in incr count; id in + let ( --> ) q1 t q2 = + match t with + | QEPSILON -> epsilons := (q1,q2) :: !epsilons; q1 + | QCLASS cl -> transitions := (q1,cl,q2) :: !transitions; q1 + in + (* Construit les transitions correspondant au motif donné et arrivant + * sur l'état qf. Retourne l'état d'origine. *) + let rec loop qf = function + | Epsilon -> qf + | Word u -> + let m = String.length u in + let q0 = state () in + let rec loop q i = + if i = m then + q0 + else + begin + let q' = + if i = m - 1 then + qf + else + state () + in + let _ = (q --> QCLASS(Atom(u.[i], u.[i]))) q' in + loop q' (i + 1) + end + in + loop q0 0 + | Class cl -> + let q1 = state () in + (q1 --> QCLASS cl) qf + | Star p -> + (* The fucking Kleene star *) + let q2 = state () in + let q1 = loop q2 p in (* q1 -{p}-> q2 *) + let _ = (q1 --> QEPSILON) qf in + let _ = (q2 --> QEPSILON) q1 in + let _ = (q2 --> QEPSILON) q1 in + q1 + | Concat(p1,p2) -> + let q12 = state () in + let q1 = loop q12 p1 in (* q1 -{p1}-> q12 *) + let q2 = loop qf p2 in (* q2 -{p2}-> qf *) + let _ = (q12 --> QEPSILON) q2 in + q1 + | Union pl -> + let qi = state () in + List.iter + begin fun p -> + let q = loop qf p in (* q -{p2}-> qf *) + let _ = (qi --> QEPSILON) q in (* qi -{}---> q *) + () + end + pl; + qi + in + let qf = state () in + let qi = loop qf p in + let m = !count in + + (* Compute epsilon closure *) + let graph = Array.make m IS.empty in + List.iter + begin fun (q,q') -> + graph.(q) <- IS.add q' graph.(q) + end + !epsilons; + + let closure = Array.make m IS.empty in + let rec transitive past = function + | [] -> past + | q :: future -> + let past' = IS.add q past in + let future' = + IS.fold + begin fun q' future' -> + (* q -{}--> q' *) + if IS.mem q' past' then + future' + else + q' :: future' + end + graph.(q) + future + in + transitive past' future' + in + for i = 0 to m - 1 do + closure.(i) <- transitive IS.empty [i] (* O(n^2), I know *) + done; + + (* Finally, build the table *) + let table = Array.make m [] in + List.iter + begin fun (q,t,q') -> + table.(q) <- (t, closure.(q')) :: table.(q) + end + !transitions; + + (graph, closure, + { mc_qi = closure.(qi); + mc_table = table; + mc_qf = qf; + mc_power_table = Hashtbl.create 37 }) + ;; + let build x = let (_,_, machine) = build' x in machine;; + (* ***) + (*** run *) + let run ?(trace=false) machine u = + let m = String.length u in + let apply qs c = + try + let t = Hashtbl.find machine.mc_power_table c in + ISM.find qs t + with + | Not_found -> + let qs' = + IS.fold + begin fun q qs' -> + List.fold_left + begin fun qs' (cl,qs'') -> + if match_character_class cl c then + IS.union qs' qs'' + else + qs' + end + qs' + machine.mc_table.(q) + end + qs + IS.empty + in + let t = + try + Hashtbl.find machine.mc_power_table c + with + | Not_found -> ISM.empty + in + Hashtbl.replace machine.mc_power_table c (ISM.add qs qs' t); + qs' + in + let rec loop qs i = + if IS.is_empty qs then + false + else + begin + if i = m then + IS.mem machine.mc_qf qs + else + begin + let c = u.[i] in + if trace then + begin + Printf.printf "%d %C {" i c; + IS.iter (fun q -> Printf.printf " %d" q) qs; + Printf.printf " }\n%!" + end; + let qs' = apply qs c in + loop qs' (i + 1) + end + end + in + loop machine.mc_qi 0 + ;; + (* ***) + end +;; +(* ***) +(*** Brute *) +module Brute = + struct + exception Succeed;; + exception Fail;; + exception Too_hard;; + + (*** match_pattern *) + let match_pattern counter p u = + let m = String.length u in + (** [loop i n p] returns [true] iff the word [u.(i .. i + n - 1)] is in the + ** language generated by the pattern [p]. + ** We must have 0 <= i and i + n <= m *) + let rec loop (i,n,p) = + assert (0 <= i && 0 <= n && i + n <= m); + incr counter; + if !counter >= brute_limit then raise Too_hard; + match p with + | Word v -> + String.length v = n && + begin + let rec check j = j = n or (v.[j] = u.[i + j] && check (j + 1)) + in + check 0 + end + | Epsilon -> n = 0 + | Star(Class True) -> true + | Star(Class cl) -> + let rec check k = + if k = n then + true + else + (match_character_class cl u.[i + k]) && check (k + 1) + in + check 0 + | Star p -> raise Too_hard + | Class cl -> n = 1 && match_character_class cl u.[i] + | Concat(p1,p2) -> + let rec scan j = + j <= n && ((loop (i,j,p1) && loop (i+j, n - j,p2)) || scan (j + 1)) + in + scan 0 + | Union pl -> List.exists (fun p' -> loop (i,n,p')) pl + in + loop (0,m,p) + ;; + (* ***) +end +;; +(* ***) +(*** fast_pattern, globber *) +type fast_pattern = +| Brute of int ref * pattern +| Machine of NFA.machine +;; + +type globber = fast_pattern ref atom Bool.boolean;; +(* ***) +(*** add_dir *) +let add_dir dir x = + match dir with + | None -> x + | Some(dir) -> + match x with + | Constant(s) -> + Constant(My_std.filename_concat dir s) + | Pattern(p) -> + Pattern(Concat(Word(My_std.filename_concat dir ""), p)) +;; +(* ***) +(*** parse *) +let parse ?dir u = + let l = Lexing.from_string u in + let tok = ref None in + let f = + fun () -> + match !tok with + | None -> token l + | Some x -> + tok := None; + x + in + let g t = + match !tok with + | None -> tok := Some t + | Some t' -> + raise (Parse_error(sf "Trying to unput token %s while %s is active" (string_of_token t) (string_of_token t'))) + in + let read x = + let y = f () in + if x = y then + () + else + raise (Parse_error(sf "Unexpected token, expecting %s, got %s" (string_of_token x) (string_of_token y))) + in + let rec atomizer continuation = match f () with + | NOT -> atomizer (fun x -> continuation (Not x)) + | ATOM x -> + begin + let a = + match add_dir dir x with + | Constant u -> Constant u + | Pattern p -> Pattern(ref (Brute(ref 0, p))) + in + continuation (Atom a) + end + | TRUE -> continuation True + | FALSE -> continuation False + | LPAR -> + let y = parse_s () in + read RPAR; + continuation y + | t -> raise (Parse_error(sf "Unexpected token %s in atomizer" (string_of_token t))) + and parse_s1 x = match f () with + | OR -> let y = parse_s () in Or[x; y] + | AND -> parse_t x + | t -> g t; x + and parse_t1 x y = match f () with + | OR -> let z = parse_s () in Or[And[x;y]; z] + | AND -> parse_t (And[x;y]) + | t -> g t; And[x;y] + and parse_s () = atomizer parse_s1 + and parse_t x = atomizer (parse_t1 x) + in + let x = parse_s () in + read EOF; + x +;; +(* ***) +(*** eval *) +let eval g u = + Bool.eval + begin function + | Constant v -> u = v + | Pattern kind -> + match !kind with + | Brute(count, p) -> + begin + let do_nfa () = + let m = NFA.build p in + kind := Machine m; + NFA.run m u + in + if !count >= brute_limit then + do_nfa () + else + try + Brute.match_pattern count p u + with + | Brute.Too_hard -> do_nfa () + end + | Machine m -> NFA.run m u + end + g +(* ***) +(*** Debug *) +(*let (Atom(Pattern x)) = parse "<{a,b}>";; +#install_printer IS.print;; +#install_printer ISM.print;; +let (graph, closure, machine) = build' x;;*) +(* ***) |