summaryrefslogtreecommitdiffstats
path: root/stdlib/string.ml
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/string.ml')
-rw-r--r--stdlib/string.ml223
1 files changed, 72 insertions, 151 deletions
diff --git a/stdlib/string.ml b/stdlib/string.ml
index fda40b527..93880af26 100644
--- a/stdlib/string.ml
+++ b/stdlib/string.ml
@@ -2,72 +2,53 @@
(* *)
(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Damien Doligez, projet Gallium, INRIA Rocquencourt *)
(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
+(* Copyright 2014 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU Library General Public License, with *)
(* the special exception on linking described in file ../LICENSE. *)
(* *)
(***********************************************************************)
-(* String operations *)
+(* String operations, based on byte sequence operations *)
external length : string -> int = "%string_length"
external get : string -> int -> char = "%string_safe_get"
-external set : string -> int -> char -> unit = "%string_safe_set"
-external create : int -> string = "caml_create_string"
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+external create : int -> bytes = "caml_create_string"
external unsafe_get : string -> int -> char = "%string_unsafe_get"
-external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set"
-external unsafe_blit : string -> int -> string -> int -> int -> unit
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_blit : string -> int -> bytes -> int -> int -> unit
= "caml_blit_string" "noalloc"
-external unsafe_fill : string -> int -> int -> char -> unit
+external unsafe_fill : bytes -> int -> int -> char -> unit
= "caml_fill_string" "noalloc"
-let make n c =
- let s = create n in
- unsafe_fill s 0 n c;
- s
+module B = Bytes
-let copy s =
- let len = length s in
- let r = create len in
- unsafe_blit s 0 r 0 len;
- r
+let bts = B.unsafe_to_string
+let bos = B.unsafe_of_string
+let make n c =
+ B.make n c |> bts
+let init n f =
+ B.init n f |> bts
+let copy s =
+ B.copy (bos s) |> bts
let sub s ofs len =
- if ofs < 0 || len < 0 || ofs > length s - len
- then invalid_arg "String.sub"
- else begin
- let r = create len in
- unsafe_blit s ofs r 0 len;
- r
- end
-
-let fill s ofs len c =
- if ofs < 0 || len < 0 || ofs > length s - len
- then invalid_arg "String.fill"
- else unsafe_fill s ofs len c
-
-let blit s1 ofs1 s2 ofs2 len =
- if len < 0 || ofs1 < 0 || ofs1 > length s1 - len
- || ofs2 < 0 || ofs2 > length s2 - len
- then invalid_arg "String.blit"
- else unsafe_blit s1 ofs1 s2 ofs2 len
-
-let iter f a =
- for i = 0 to length a - 1 do f(unsafe_get a i) done
-
-let iteri f a =
- for i = 0 to length a - 1 do f i (unsafe_get a i) done
+ B.sub (bos s) ofs len |> bts
+let fill =
+ B.fill
+let blit =
+ B.blit_string
let concat sep l =
match l with
- [] -> ""
+ | [] -> ""
| hd :: tl ->
let num = ref 0 and len = ref 0 in
List.iter (fun s -> incr num; len := !len + length s) l;
- let r = create (!len + length sep * (!num - 1)) in
+ let r = B.create (!len + length sep * (!num - 1)) in
unsafe_blit hd 0 r 0 (length hd);
let pos = ref(length hd) in
List.iter
@@ -77,128 +58,68 @@ let concat sep l =
unsafe_blit s 0 r !pos (length s);
pos := !pos + length s)
tl;
- r
+ Bytes.unsafe_to_string r
+
+let iter f s =
+ B.iter f (bos s)
+let iteri f s =
+ B.iteri f (bos s)
+let map f s =
+ B.map f (bos s) |> bts
+let mapi f s =
+ B.mapi f (bos s) |> bts
+
+(* Beware: we cannot use B.trim or B.escape because they always make a
+ copy, but String.mli spells out some cases where we are not allowed
+ to make a copy. *)
external is_printable: char -> bool = "caml_is_printable"
-external char_code: char -> int = "%identity"
-external char_chr: int -> char = "%identity"
let is_space = function
| ' ' | '\012' | '\n' | '\r' | '\t' -> true
| _ -> false
let trim s =
- let len = length s in
- let i = ref 0 in
- while !i < len && is_space (unsafe_get s !i) do
- incr i
- done;
- let j = ref (len - 1) in
- while !j >= !i && is_space (unsafe_get s !j) do
- decr j
- done;
- if !i = 0 && !j = len - 1 then
- s
- else if !j >= !i then
- sub s !i (!j - !i + 1)
- else
- ""
+ if s = "" then s
+ else if is_space (unsafe_get s 0) || is_space (unsafe_get s (length s - 1))
+ then bts (B.trim (bos s))
+ else s
let escaped s =
- let n = ref 0 in
- for i = 0 to length s - 1 do
- n := !n +
- (match unsafe_get s i with
- | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
- | c -> if is_printable c then 1 else 4)
- done;
- if !n = length s then s else begin
- let s' = create !n in
- n := 0;
- for i = 0 to length s - 1 do
- begin
- match unsafe_get s i with
- | ('"' | '\\') as c ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c
- | '\n' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n'
- | '\t' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't'
- | '\r' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r'
- | '\b' ->
- unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b'
- | c ->
- if is_printable c then
- unsafe_set s' !n c
- else begin
- let a = char_code c in
- unsafe_set s' !n '\\';
- incr n;
- unsafe_set s' !n (char_chr (48 + a / 100));
- incr n;
- unsafe_set s' !n (char_chr (48 + (a / 10) mod 10));
- incr n;
- unsafe_set s' !n (char_chr (48 + a mod 10))
- end
- end;
- incr n
- done;
- s'
- end
-
-let map f s =
- let l = length s in
- if l = 0 then s else begin
- let r = create l in
- for i = 0 to l - 1 do unsafe_set r i (f(unsafe_get s i)) done;
- r
- end
-
-let uppercase s = map Char.uppercase s
-let lowercase s = map Char.lowercase s
-
-let apply1 f s =
- if length s = 0 then s else begin
- let r = copy s in
- unsafe_set r 0 (f(unsafe_get s 0));
- r
- end
-
-let capitalize s = apply1 Char.uppercase s
-let uncapitalize s = apply1 Char.lowercase s
-
-let rec index_rec s lim i c =
- if i >= lim then raise Not_found else
- if unsafe_get s i = c then i else index_rec s lim (i + 1) c;;
-
-let index s c = index_rec s (length s) 0 c;;
-
-let index_from s i c =
- let l = length s in
- if i < 0 || i > l then invalid_arg "String.index_from" else
- index_rec s l i c;;
-
-let rec rindex_rec s i c =
- if i < 0 then raise Not_found else
- if unsafe_get s i = c then i else rindex_rec s (i - 1) c;;
-
-let rindex s c = rindex_rec s (length s - 1) c;;
+ let rec needs_escape i =
+ if i >= length s then false else
+ match unsafe_get s i with
+ | '"' | '\\' | '\n' | '\t' | '\r' | '\b' -> true
+ | c when is_printable c -> needs_escape (i+1)
+ | _ -> true
+ in
+ if needs_escape 0 then
+ bts (B.escaped (bos s))
+ else
+ s
+let index s c =
+ B.index (bos s) c
+let rindex s c =
+ B.rindex (bos s) c
+let index_from s i c=
+ B.index_from (bos s) i c
let rindex_from s i c =
- if i < -1 || i >= length s then invalid_arg "String.rindex_from" else
- rindex_rec s i c;;
-
+ B.rindex_from (bos s) i c
+let contains s c =
+ B.contains (bos s) c
let contains_from s i c =
- let l = length s in
- if i < 0 || i > l then invalid_arg "String.contains_from" else
- try ignore (index_rec s l i c); true with Not_found -> false;;
-
-let contains s c = contains_from s 0 c;;
-
+ B.contains_from (bos s) i c
let rcontains_from s i c =
- if i < 0 || i >= length s then invalid_arg "String.rcontains_from" else
- try ignore (rindex_rec s i c); true with Not_found -> false;;
+ B.rcontains_from (bos s) i c
+let uppercase s =
+ B.uppercase (bos s) |> bts
+let lowercase s =
+ B.lowercase (bos s) |> bts
+let capitalize s =
+ B.capitalize (bos s) |> bts
+let uncapitalize s =
+ B.uncapitalize (bos s) |> bts
type t = string