diff options
Diffstat (limited to 'stdlib/string.ml')
-rw-r--r-- | stdlib/string.ml | 223 |
1 files changed, 72 insertions, 151 deletions
diff --git a/stdlib/string.ml b/stdlib/string.ml index fda40b527..93880af26 100644 --- a/stdlib/string.ml +++ b/stdlib/string.ml @@ -2,72 +2,53 @@ (* *) (* OCaml *) (* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* Damien Doligez, projet Gallium, INRIA Rocquencourt *) (* *) -(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the GNU Library General Public License, with *) (* the special exception on linking described in file ../LICENSE. *) (* *) (***********************************************************************) -(* String operations *) +(* String operations, based on byte sequence operations *) external length : string -> int = "%string_length" external get : string -> int -> char = "%string_safe_get" -external set : string -> int -> char -> unit = "%string_safe_set" -external create : int -> string = "caml_create_string" +external set : bytes -> int -> char -> unit = "%string_safe_set" +external create : int -> bytes = "caml_create_string" external unsafe_get : string -> int -> char = "%string_unsafe_get" -external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set" -external unsafe_blit : string -> int -> string -> int -> int -> unit +external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" +external unsafe_blit : string -> int -> bytes -> int -> int -> unit = "caml_blit_string" "noalloc" -external unsafe_fill : string -> int -> int -> char -> unit +external unsafe_fill : bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc" -let make n c = - let s = create n in - unsafe_fill s 0 n c; - s +module B = Bytes -let copy s = - let len = length s in - let r = create len in - unsafe_blit s 0 r 0 len; - r +let bts = B.unsafe_to_string +let bos = B.unsafe_of_string +let make n c = + B.make n c |> bts +let init n f = + B.init n f |> bts +let copy s = + B.copy (bos s) |> bts let sub s ofs len = - if ofs < 0 || len < 0 || ofs > length s - len - then invalid_arg "String.sub" - else begin - let r = create len in - unsafe_blit s ofs r 0 len; - r - end - -let fill s ofs len c = - if ofs < 0 || len < 0 || ofs > length s - len - then invalid_arg "String.fill" - else unsafe_fill s ofs len c - -let blit s1 ofs1 s2 ofs2 len = - if len < 0 || ofs1 < 0 || ofs1 > length s1 - len - || ofs2 < 0 || ofs2 > length s2 - len - then invalid_arg "String.blit" - else unsafe_blit s1 ofs1 s2 ofs2 len - -let iter f a = - for i = 0 to length a - 1 do f(unsafe_get a i) done - -let iteri f a = - for i = 0 to length a - 1 do f i (unsafe_get a i) done + B.sub (bos s) ofs len |> bts +let fill = + B.fill +let blit = + B.blit_string let concat sep l = match l with - [] -> "" + | [] -> "" | hd :: tl -> let num = ref 0 and len = ref 0 in List.iter (fun s -> incr num; len := !len + length s) l; - let r = create (!len + length sep * (!num - 1)) in + let r = B.create (!len + length sep * (!num - 1)) in unsafe_blit hd 0 r 0 (length hd); let pos = ref(length hd) in List.iter @@ -77,128 +58,68 @@ let concat sep l = unsafe_blit s 0 r !pos (length s); pos := !pos + length s) tl; - r + Bytes.unsafe_to_string r + +let iter f s = + B.iter f (bos s) +let iteri f s = + B.iteri f (bos s) +let map f s = + B.map f (bos s) |> bts +let mapi f s = + B.mapi f (bos s) |> bts + +(* Beware: we cannot use B.trim or B.escape because they always make a + copy, but String.mli spells out some cases where we are not allowed + to make a copy. *) external is_printable: char -> bool = "caml_is_printable" -external char_code: char -> int = "%identity" -external char_chr: int -> char = "%identity" let is_space = function | ' ' | '\012' | '\n' | '\r' | '\t' -> true | _ -> false let trim s = - let len = length s in - let i = ref 0 in - while !i < len && is_space (unsafe_get s !i) do - incr i - done; - let j = ref (len - 1) in - while !j >= !i && is_space (unsafe_get s !j) do - decr j - done; - if !i = 0 && !j = len - 1 then - s - else if !j >= !i then - sub s !i (!j - !i + 1) - else - "" + if s = "" then s + else if is_space (unsafe_get s 0) || is_space (unsafe_get s (length s - 1)) + then bts (B.trim (bos s)) + else s let escaped s = - let n = ref 0 in - for i = 0 to length s - 1 do - n := !n + - (match unsafe_get s i with - | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 - | c -> if is_printable c then 1 else 4) - done; - if !n = length s then s else begin - let s' = create !n in - n := 0; - for i = 0 to length s - 1 do - begin - match unsafe_get s i with - | ('"' | '\\') as c -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c - | '\n' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n' - | '\t' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't' - | '\r' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r' - | '\b' -> - unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b' - | c -> - if is_printable c then - unsafe_set s' !n c - else begin - let a = char_code c in - unsafe_set s' !n '\\'; - incr n; - unsafe_set s' !n (char_chr (48 + a / 100)); - incr n; - unsafe_set s' !n (char_chr (48 + (a / 10) mod 10)); - incr n; - unsafe_set s' !n (char_chr (48 + a mod 10)) - end - end; - incr n - done; - s' - end - -let map f s = - let l = length s in - if l = 0 then s else begin - let r = create l in - for i = 0 to l - 1 do unsafe_set r i (f(unsafe_get s i)) done; - r - end - -let uppercase s = map Char.uppercase s -let lowercase s = map Char.lowercase s - -let apply1 f s = - if length s = 0 then s else begin - let r = copy s in - unsafe_set r 0 (f(unsafe_get s 0)); - r - end - -let capitalize s = apply1 Char.uppercase s -let uncapitalize s = apply1 Char.lowercase s - -let rec index_rec s lim i c = - if i >= lim then raise Not_found else - if unsafe_get s i = c then i else index_rec s lim (i + 1) c;; - -let index s c = index_rec s (length s) 0 c;; - -let index_from s i c = - let l = length s in - if i < 0 || i > l then invalid_arg "String.index_from" else - index_rec s l i c;; - -let rec rindex_rec s i c = - if i < 0 then raise Not_found else - if unsafe_get s i = c then i else rindex_rec s (i - 1) c;; - -let rindex s c = rindex_rec s (length s - 1) c;; + let rec needs_escape i = + if i >= length s then false else + match unsafe_get s i with + | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> true + | c when is_printable c -> needs_escape (i+1) + | _ -> true + in + if needs_escape 0 then + bts (B.escaped (bos s)) + else + s +let index s c = + B.index (bos s) c +let rindex s c = + B.rindex (bos s) c +let index_from s i c= + B.index_from (bos s) i c let rindex_from s i c = - if i < -1 || i >= length s then invalid_arg "String.rindex_from" else - rindex_rec s i c;; - + B.rindex_from (bos s) i c +let contains s c = + B.contains (bos s) c let contains_from s i c = - let l = length s in - if i < 0 || i > l then invalid_arg "String.contains_from" else - try ignore (index_rec s l i c); true with Not_found -> false;; - -let contains s c = contains_from s 0 c;; - + B.contains_from (bos s) i c let rcontains_from s i c = - if i < 0 || i >= length s then invalid_arg "String.rcontains_from" else - try ignore (rindex_rec s i c); true with Not_found -> false;; + B.rcontains_from (bos s) i c +let uppercase s = + B.uppercase (bos s) |> bts +let lowercase s = + B.lowercase (bos s) |> bts +let capitalize s = + B.capitalize (bos s) |> bts +let uncapitalize s = + B.uncapitalize (bos s) |> bts type t = string |