summaryrefslogtreecommitdiffstats
path: root/stdlib/bytes.mli
diff options
context:
space:
mode:
Diffstat (limited to 'stdlib/bytes.mli')
-rw-r--r--stdlib/bytes.mli168
1 files changed, 164 insertions, 4 deletions
diff --git a/stdlib/bytes.mli b/stdlib/bytes.mli
index d9c104674..82b28a28c 100644
--- a/stdlib/bytes.mli
+++ b/stdlib/bytes.mli
@@ -99,6 +99,16 @@ val sub : bytes -> int -> int -> bytes
val sub_string : bytes -> int -> int -> string
(** Same as [sub] but return a string instead of a byte sequence. *)
+val extend : bytes -> int -> int -> bytes
+(** [extend s left right] returns a new byte sequence that contains
+ the bytes of [s], with [left] uninitialized bytes prepended and
+ [right] uninitialized bytes appended to it. If [left] or [right]
+ is negative, then bytes are removed (instead of appended) from
+ the corresponding side of [s].
+
+ Raise [Invalid_argument] if the result length is negative or
+ longer than {!Sys.max_string_length} bytes. *)
+
val fill : bytes -> int -> int -> char -> unit
(** [fill s start len c] modifies [s] in place, replacing [len]
characters with [c], starting at [start].
@@ -117,10 +127,29 @@ val blit : bytes -> int -> bytes -> int -> int -> unit
designate a valid range of [src], or if [dstoff] and [len]
do not designate a valid range of [dst]. *)
+val blit_string : string -> int -> bytes -> int -> int -> unit
+(** [blit src srcoff dst dstoff len] copies [len] bytes from string
+ [src], starting at index [srcoff], to byte sequence [dst],
+ starting at index [dstoff].
+
+ Raise [Invalid_argument] if [srcoff] and [len] do not
+ designate a valid range of [src], or if [dstoff] and [len]
+ do not designate a valid range of [dst]. *)
+
val concat : bytes -> bytes list -> bytes
(** [concat sep sl] concatenates the list of byte sequences [sl],
inserting the separator byte sequence [sep] between each, and
- returns the result as a new byte sequence. *)
+ returns the result as a new byte sequence.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
+
+val cat : bytes -> bytes -> bytes
+(** [cat s1 s2] concatenates [s1] and [s2] and returns the result
+ as new byte sequence.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
val iter : (char -> unit) -> bytes -> unit
(** [iter f s] applies function [f] in turn to all the bytes of [s].
@@ -149,7 +178,10 @@ val trim : bytes -> bytes
val escaped : bytes -> bytes
(** Return a copy of the argument, with special characters represented
- by escape sequences, following the lexical conventions of OCaml. *)
+ by escape sequences, following the lexical conventions of OCaml.
+
+ Raise [Invalid_argument] if the result is longer than
+ {!Sys.max_string_length} bytes. *)
val index : bytes -> char -> int
(** [index s c] returns the index of the first occurrence of byte [c]
@@ -223,6 +255,136 @@ val compare: t -> t -> int
this function [compare] allows the module [Bytes] to be passed as
argument to the functors {!Set.Make} and {!Map.Make}. *)
+
+(** {4 Unsafe conversions (for advanced users)}
+
+ This section describes unsafe, low-level conversion functions
+ between [bytes] and [string]. They do not copy the internal data;
+ used improperly, they can break the immutability invariant on
+ strings provided by the [-safe-string] option. They are available for
+ expert library authors, but for most purposes you should use the
+ always-correct {!Bytes.to_string} and {!Bytes.of_string} instead.
+*)
+
+val unsafe_to_string : bytes -> string
+(** Unsafely convert a byte sequence into a string.
+
+ To reason about the use of [unsafe_to_string], it is convenient to
+ consider an "ownership" discipline. A piece of code that
+ manipulates some data "owns" it; there are several disjoint ownership
+ modes, including:
+ - Unique ownership: the data may be accessed and mutated
+ - Shared ownership: the data has several owners, that may only
+ access it, not mutate it.
+
+ Unique ownership is linear: passing the data to another piece of
+ code means giving up ownership (we cannot write the
+ data again). A unique owner may decide to make the data shared
+ (giving up mutation rights on it), but shared data may not become
+ uniquely-owned again.
+
+ [unsafe_to_string s] can only be used when the caller owns the byte
+ sequence [s] -- either uniquely or as shared immutable data. The
+ caller gives up ownership of [s], and gains ownership of the
+ returned string.
+
+ There are two valid use-cases that respect this ownership
+ discipline:
+
+ 1. Creating a string by initializing and mutating a byte sequence
+ that is never changed after initialization is performed.
+
+ {[
+let string_init len f : string =
+ let s = Bytes.create len in
+ for i = 0 to len - 1 do Bytes.set s i (f i) done;
+ Bytes.unsafe_to_string s
+ ]}
+
+ This function is safe because the byte sequence [s] will never be
+ accessed or mutated after [unsafe_to_string] is called. The
+ [string_init] code gives up ownership of [s], and returns the
+ ownership of the resulting string to its caller.
+
+ Note that it would be unsafe if [s] was passed as an additional
+ parameter to the function [f] as it could escape this way and be
+ mutated in the future -- [string_init] would give up ownership of
+ [s] to pass it to [f], and could not call [unsafe_to_string]
+ safely.
+
+ We have provided the {!String.init}, {!String.map} and
+ {!String.mapi} functions to cover most cases of building
+ new strings. You should prefer those over [to_string] or
+ [unsafe_to_string] whenever applicable.
+
+ 2. Temporarily giving ownership of a byte sequence to a function
+ that expects a uniquely owned string and returns ownership back, so
+ that we can mutate the sequence again after the call ended.
+
+ {[
+let bytes_length (s : bytes) =
+ String.length (Bytes.unsafe_to_string s)
+ ]}
+
+ In this use-case, we do not promise that [s] will never be mutated
+ after the call to [bytes_length s]. The {!String.length} function
+ temporarily borrows unique ownership of the byte sequence
+ (and sees it as a [string]), but returns this ownership back to
+ the caller, which may assume that [s] is still a valid byte
+ sequence after the call. Note that this is only correct because we
+ know that {!String.length} does not capture its argument -- it could
+ escape by a side-channel such as a memoization combinator.
+
+ The caller may not mutate [s] while the string is borrowed (it has
+ temporarily given up ownership). This affects concurrent programs,
+ but also higher-order functions: if [String.length] returned
+ a closure to be called later, [s] should not be mutated until this
+ closure is fully applied and returns ownership.
+*)
+
+val unsafe_of_string : string -> bytes
+(** Unsafely convert a shared string to a byte sequence that should
+ not be mutated.
+
+ The same ownership discipline that makes [unsafe_to_string]
+ correct applies to [unsafe_of_string]: you may use it if you were
+ the owner of the [string] value, and you will own the return
+ [bytes] in the same mode.
+
+ In practice, unique ownership of string values is extremely
+ difficult to reason about correctly. You should always assume
+ strings are shared, never uniquely owned.
+
+ For example, string literals are implicitly shared by the
+ compiler, so you never uniquely own them.
+
+ {[
+let incorrect = Bytes.unsafe_of_string "hello"
+let s = Bytes.of_string "hello"
+ ]}
+
+ The first declaration is incorrect, because the string literal
+ ["hello"] could be shared by the compiler with other parts of the
+ program, and mutating [incorrect] is a bug. You must always use
+ the second version, which performs a copy and is thus correct.
+
+ Assuming unique ownership of strings that are not string
+ literals, but are (partly) built from string literals, is also
+ incorrect. For example, mutating [unsafe_of_string ("foo" ^ s)]
+ could mutate the shared string ["foo"] -- assuming a rope-like
+ representation of strings. More generally, functions operating on
+ strings will assume shared ownership, they do not preserve unique
+ ownership. It is thus incorrect to assume unique ownership of the
+ result of [unsafe_of_string].
+
+ The only case we have reasonable confidence is safe is if the
+ produced [bytes] is shared -- used as an immutable byte
+ sequence. This is possibly useful for incremental migration of
+ low-level programs that manipulate immutable sequences of bytes
+ (for example {!Marshal.from_bytes}) and previously used the
+ [string] type for this purpose.
+*)
+
(**/**)
(* The following is for system use only. Do not call directly. *)
@@ -234,5 +396,3 @@ external unsafe_blit :
= "caml_blit_string" "noalloc"
external unsafe_fill :
bytes -> int -> int -> char -> unit = "caml_fill_string" "noalloc"
-external unsafe_to_string : bytes -> string = "%identity"
-external unsafe_of_string : string -> bytes = "%identity"