1 files changed, 398 insertions, 0 deletions
diff --git a/stdlib/bytes.mli b/stdlib/bytes.mli
new file mode 100644
index 000000000..82b28a28c
--- /dev/null
+++ b/stdlib/bytes.mli
@@ -0,0 +1,398 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the GNU Library General Public License, with    *)
+(*  the special exception on linking described in file ../LICENSE.     *)
+(*                                                                     *)
+(***********************************************************************)
+
+(** Byte sequence operations.
+
+   A byte sequence is a mutable data structure that contains a
+   fixed-length sequence of bytes. Each byte can be indexed in
+   constant time for reading or writing.
+
+   Given a byte sequence [s] of length [l], we can access each of the
+   [l] bytes of [s] via its index in the sequence. Indexes start at
+   [0], and we will call an index valid in [s] if it falls within the
+   range [[0...l-1]] (inclusive). A position is the point between two
+   bytes or at the beginning or end of the sequence.  We call a
+   position valid in [s] if it falls within the range [[0...l]]
+   (inclusive). Note that the byte at index [n] is between positions
+   [n] and [n+1].
+
+   Two parameters [start] and [len] are said to designate a valid
+   range of [s] if [len >= 0] and [start] and [start+len] are valid
+   positions in [s].
+
+   Byte sequences can be modified in place, for instance via the [set]
+   and [blit] functions described below.  See also strings (module
+   {!String}), which are almost the same data structure, but cannot be
+   modified in place.
+
+   Bytes are represented by the OCaml type [char].
+
+   @since 4.02.0
+ *)
+
+external length : bytes -> int = "%string_length"
+(** Return the length (number of bytes) of the argument. *)
+
+external get : bytes -> int -> char = "%string_safe_get"
+(** [get s n] returns the byte at index [n] in argument [s].
+
+    Raise [Invalid_argument] if [n] not a valid index in [s]. *)
+
+external set : bytes -> int -> char -> unit = "%string_safe_set"
+(** [set s n c] modifies [s] in place, replacing the byte at index [n]
+    with [c].
+
+    Raise [Invalid_argument] if [n] is not a valid index in [s]. *)
+
+external create : int -> bytes = "caml_create_string"
+(** [create n] returns a new byte sequence of length [n]. The
+    sequence is uninitialized and contains arbitrary bytes.
+
+    Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val make : int -> char -> bytes
+(** [make n c] returns a new byte sequence of length [n], filled with
+    the byte [c].
+
+    Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val init : int -> (int -> char) -> bytes
+(** [Bytes.init n f] returns a fresh byte sequence of length [n], with
+    character [i] initialized to the result of [f i] (in increasing
+    index order).
+
+    Raise [Invalid_argument] if [n < 0] or [n > ]{!Sys.max_string_length}. *)
+
+val empty : bytes
+(** A byte sequence of size 0. *)
+
+val copy : bytes -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+    argument. *)
+
+val of_string : string -> bytes
+(** Return a new byte sequence that contains the same bytes as the
+    given string. *)
+
+val to_string : bytes -> string
+(** Return a new string that contains the same bytes as the given byte
+    sequence. *)
+
+val sub : bytes -> int -> int -> bytes
+(** [sub s start len] returns a new byte sequence of length [len],
+    containing the subsequence of [s] that starts at position [start]
+    and has length [len].
+
+    Raise [Invalid_argument] if [start] and [len] do not designate a
+    valid range of [s]. *)
+
+val sub_string : bytes -> int -> int -> string
+(** Same as [sub] but return a string instead of a byte sequence. *)
+
+val extend : bytes -> int -> int -> bytes
+(** [extend s left right] returns a new byte sequence that contains
+    the bytes of [s], with [left] uninitialized bytes prepended and
+    [right] uninitialized bytes appended to it. If [left] or [right]
+    is negative, then bytes are removed (instead of appended) from
+    the corresponding side of [s].
+
+    Raise [Invalid_argument] if the result length is negative or
+    longer than {!Sys.max_string_length} bytes. *)
+
+val fill : bytes -> int -> int -> char -> unit
+(** [fill s start len c] modifies [s] in place, replacing [len]
+    characters with [c], starting at [start].
+
+    Raise [Invalid_argument] if [start] and [len] do not designate a
+    valid range of [s]. *)
+
+val blit : bytes -> int -> bytes -> int -> int -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from sequence
+    [src], starting at index [srcoff], to sequence [dst], starting at
+    index [dstoff]. It works correctly even if [src] and [dst] are the
+    same byte sequence, and the source and destination intervals
+    overlap.
+
+    Raise [Invalid_argument] if [srcoff] and [len] do not
+    designate a valid range of [src], or if [dstoff] and [len]
+    do not designate a valid range of [dst]. *)
+
+val blit_string : string -> int -> bytes -> int -> int -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from string
+    [src], starting at index [srcoff], to byte sequence [dst],
+    starting at index [dstoff].
+
+    Raise [Invalid_argument] if [srcoff] and [len] do not
+    designate a valid range of [src], or if [dstoff] and [len]
+    do not designate a valid range of [dst]. *)
+
+val concat : bytes -> bytes list -> bytes
+(** [concat sep sl] concatenates the list of byte sequences [sl],
+    inserting the separator byte sequence [sep] between each, and
+    returns the result as a new byte sequence.
+
+    Raise [Invalid_argument] if the result is longer than
+    {!Sys.max_string_length} bytes. *)
+
+val cat : bytes -> bytes -> bytes
+(** [cat s1 s2] concatenates [s1] and [s2] and returns the result
+     as new byte sequence.
+
+    Raise [Invalid_argument] if the result is longer than
+    {!Sys.max_string_length} bytes. *)
+
+val iter : (char -> unit) -> bytes -> unit
+(** [iter f s] applies function [f] in turn to all the bytes of [s].
+    It is equivalent to [f (get s 0); f (get s 1); ...; f (get s
+    (length s - 1)); ()]. *)
+
+val iteri : (int -> char -> unit) -> bytes -> unit
+(** Same as {!Bytes.iter}, but the function is applied to the index of
+    the byte as first argument and the byte itself as second
+    argument. *)
+
+val map : (char -> char) -> bytes -> bytes
+(** [map f s] applies function [f] in turn to all the bytes of [s]
+    (in increasing index order) and stores the resulting bytes in
+    a new sequence that is returned as the result. *)
+
+val mapi : (int -> char -> char) -> bytes -> bytes
+(** [mapi f s] calls [f] with each character of [s] and its
+    index (in increasing index order) and stores the resulting bytes
+    in a new sequence that is returned as the result. *)
+
+val trim : bytes -> bytes
+(** Return a copy of the argument, without leading and trailing
+    whitespace. The bytes regarded as whitespace are the ASCII
+    characters [' '], ['\012'], ['\n'], ['\r'], and ['\t']. *)
+
+val escaped : bytes -> bytes
+(** Return a copy of the argument, with special characters represented
+    by escape sequences, following the lexical conventions of OCaml.
+
+    Raise [Invalid_argument] if the result is longer than
+    {!Sys.max_string_length} bytes. *)
+
+val index : bytes -> char -> int
+(** [index s c] returns the index of the first occurrence of byte [c]
+    in [s].
+
+    Raise [Not_found] if [c] does not occur in [s]. *)
+
+val rindex : bytes -> char -> int
+(** [rindex s c] returns the index of the last occurrence of byte [c]
+    in [s].
+
+    Raise [Not_found] if [c] does not occur in [s]. *)
+
+val index_from : bytes -> int -> char -> int
+(** [index_from s i c] returns the index of the first occurrence of
+    byte [c] in [s] after position [i].  [Bytes.index s c] is
+    equivalent to [Bytes.index_from s 0 c].
+
+    Raise [Invalid_argument] if [i] is not a valid position in [s].
+    Raise [Not_found] if [c] does not occur in [s] after position [i]. *)
+
+val rindex_from : bytes -> int -> char -> int
+(** [rindex_from s i c] returns the index of the last occurrence of
+    byte [c] in [s] before position [i+1].  [rindex s c] is equivalent
+    to [rindex_from s (Bytes.length s - 1) c].
+
+    Raise [Invalid_argument] if [i+1] is not a valid position in [s].
+    Raise [Not_found] if [c] does not occur in [s] before position [i+1]. *)
+
+val contains : bytes -> char -> bool
+(** [contains s c] tests if byte [c] appears in [s]. *)
+
+val contains_from : bytes -> int -> char -> bool
+(** [contains_from s start c] tests if byte [c] appears in [s] after
+    position [start].  [contains s c] is equivalent to [contains_from
+    s 0 c].
+
+    Raise [Invalid_argument] if [start] is not a valid position in [s]. *)
+
+val rcontains_from : bytes -> int -> char -> bool
+(** [rcontains_from s stop c] tests if byte [c] appears in [s] before
+    position [stop+1].
+
+    Raise [Invalid_argument] if [stop < 0] or [stop+1] is not a valid
+    position in [s]. *)
+
+val uppercase : bytes -> bytes
+(** Return a copy of the argument, with all lowercase letters
+    translated to uppercase, including accented letters of the ISO
+    Latin-1 (8859-1) character set. *)
+
+val lowercase : bytes -> bytes
+(** Return a copy of the argument, with all uppercase letters
+    translated to lowercase, including accented letters of the ISO
+    Latin-1 (8859-1) character set. *)
+
+val capitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+    uppercase. *)
+
+val uncapitalize : bytes -> bytes
+(** Return a copy of the argument, with the first byte set to
+    lowercase. *)
+
+type t = bytes
+(** An alias for the type of byte sequences. *)
+
+val compare: t -> t -> int
+(** The comparison function for byte sequences, with the same
+    specification as {!Pervasives.compare}.  Along with the type [t],
+    this function [compare] allows the module [Bytes] to be passed as
+    argument to the functors {!Set.Make} and {!Map.Make}. *)
+
+
+(** {4 Unsafe conversions (for advanced users)}
+
+    This section describes unsafe, low-level conversion functions
+    between [bytes] and [string]. They do not copy the internal data;
+    used improperly, they can break the immutability invariant on
+    strings provided by the [-safe-string] option. They are available for
+    expert library authors, but for most purposes you should use the
+    always-correct {!Bytes.to_string} and {!Bytes.of_string} instead.
+*)
+
+val unsafe_to_string : bytes -> string
+(** Unsafely convert a byte sequence into a string.
+
+    To reason about the use of [unsafe_to_string], it is convenient to
+    consider an "ownership" discipline. A piece of code that
+    manipulates some data "owns" it; there are several disjoint ownership
+    modes, including:
+    - Unique ownership: the data may be accessed and mutated
+    - Shared ownership: the data has several owners, that may only
+      access it, not mutate it.
+
+    Unique ownership is linear: passing the data to another piece of
+    code means giving up ownership (we cannot write the
+    data again). A unique owner may decide to make the data shared
+    (giving up mutation rights on it), but shared data may not become
+    uniquely-owned again.
+
+   [unsafe_to_string s] can only be used when the caller owns the byte
+   sequence [s] -- either uniquely or as shared immutable data. The
+   caller gives up ownership of [s], and gains ownership of the
+   returned string.
+
+   There are two valid use-cases that respect this ownership
+   discipline:
+
+   1. Creating a string by initializing and mutating a byte sequence
+   that is never changed after initialization is performed.
+
+   {[
+let string_init len f : string =
+  let s = Bytes.create len in
+  for i = 0 to len - 1 do Bytes.set s i (f i) done;
+  Bytes.unsafe_to_string s
+   ]}
+
+   This function is safe because the byte sequence [s] will never be
+   accessed or mutated after [unsafe_to_string] is called. The
+   [string_init] code gives up ownership of [s], and returns the
+   ownership of the resulting string to its caller.
+
+   Note that it would be unsafe if [s] was passed as an additional
+   parameter to the function [f] as it could escape this way and be
+   mutated in the future -- [string_init] would give up ownership of
+   [s] to pass it to [f], and could not call [unsafe_to_string]
+   safely.
+
+   We have provided the {!String.init}, {!String.map} and
+   {!String.mapi} functions to cover most cases of building
+   new strings. You should prefer those over [to_string] or
+   [unsafe_to_string] whenever applicable.
+
+   2. Temporarily giving ownership of a byte sequence to a function
+   that expects a uniquely owned string and returns ownership back, so
+   that we can mutate the sequence again after the call ended.
+
+   {[
+let bytes_length (s : bytes) =
+  String.length (Bytes.unsafe_to_string s)
+   ]}
+
+   In this use-case, we do not promise that [s] will never be mutated
+   after the call to [bytes_length s]. The {!String.length} function
+   temporarily borrows unique ownership of the byte sequence
+   (and sees it as a [string]), but returns this ownership back to
+   the caller, which may assume that [s] is still a valid byte
+   sequence after the call. Note that this is only correct because we
+   know that {!String.length} does not capture its argument -- it could
+   escape by a side-channel such as a memoization combinator.
+
+   The caller may not mutate [s] while the string is borrowed (it has
+   temporarily given up ownership). This affects concurrent programs,
+   but also higher-order functions: if [String.length] returned
+   a closure to be called later, [s] should not be mutated until this
+   closure is fully applied and returns ownership.
+*)
+
+val unsafe_of_string : string -> bytes
+(** Unsafely convert a shared string to a byte sequence that should
+    not be mutated.
+
+    The same ownership discipline that makes [unsafe_to_string]
+    correct applies to [unsafe_of_string]: you may use it if you were
+    the owner of the [string] value, and you will own the return
+    [bytes] in the same mode.
+
+    In practice, unique ownership of string values is extremely
+    difficult to reason about correctly. You should always assume
+    strings are shared, never uniquely owned.
+
+    For example, string literals are implicitly shared by the
+    compiler, so you never uniquely own them.
+
+    {[
+let incorrect = Bytes.unsafe_of_string "hello"
+let s = Bytes.of_string "hello"
+    ]}
+
+    The first declaration is incorrect, because the string literal
+    ["hello"] could be shared by the compiler with other parts of the
+    program, and mutating [incorrect] is a bug. You must always use
+    the second version, which performs a copy and is thus correct.
+
+    Assuming unique ownership of strings that are not string
+    literals, but are (partly) built from string literals, is also
+    incorrect. For example, mutating [unsafe_of_string ("foo" ^ s)]
+    could mutate the shared string ["foo"] -- assuming a rope-like
+    representation of strings. More generally, functions operating on
+    strings will assume shared ownership, they do not preserve unique
+    ownership. It is thus incorrect to assume unique ownership of the
+    result of [unsafe_of_string].
+
+    The only case we have reasonable confidence is safe is if the
+    produced [bytes] is shared -- used as an immutable byte
+    sequence. This is possibly useful for incremental migration of
+    low-level programs that manipulate immutable sequences of bytes
+    (for example {!Marshal.from_bytes}) and previously used the
+    [string] type for this purpose.
+*)
+
+(**/**)
+
+(* The following is for system use only. Do not call directly. *)
+
+external unsafe_get : bytes -> int -> char = "%string_unsafe_get"
+external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set"
+external unsafe_blit :
+  bytes -> int -> bytes -> int -> int -> unit
+  = "caml_blit_string" "noalloc"
+external unsafe_fill :
+  bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc"