diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/.depend | 2 | ||||
-rw-r--r-- | stdlib/Makefile | 2 | ||||
-rw-r--r-- | stdlib/marshal.ml | 48 | ||||
-rw-r--r-- | stdlib/marshal.mli | 125 | ||||
-rw-r--r-- | stdlib/obj.ml | 2 | ||||
-rw-r--r-- | stdlib/obj.mli | 2 | ||||
-rw-r--r-- | stdlib/pervasives.ml | 5 | ||||
-rw-r--r-- | stdlib/pervasives.mli | 16 |
8 files changed, 186 insertions, 16 deletions
diff --git a/stdlib/.depend b/stdlib/.depend index b68e07b60..4619dc539 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -26,6 +26,8 @@ list.cmo: list.cmi list.cmx: list.cmi map.cmo: map.cmi map.cmx: map.cmi +marshal.cmo: string.cmi marshal.cmi +marshal.cmx: string.cmx marshal.cmi obj.cmo: obj.cmi obj.cmx: obj.cmi oo.cmo: array.cmi hashtbl.cmi list.cmi map.cmi obj.cmi random.cmi sort.cmi \ diff --git a/stdlib/Makefile b/stdlib/Makefile index 217c987a0..fe75a8924 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -13,7 +13,7 @@ OBJS=pervasives.cmo list.cmo char.cmo string.cmo array.cmo sys.cmo \ hashtbl.cmo sort.cmo filename.cmo obj.cmo lexing.cmo parsing.cmo \ set.cmo map.cmo stack.cmo queue.cmo stream.cmo \ printf.cmo format.cmo arg.cmo printexc.cmo gc.cmo \ - digest.cmo random.cmo oo.cmo genlex.cmo callback.cmo weak.cmo + digest.cmo random.cmo oo.cmo genlex.cmo callback.cmo weak.cmo marshal.cmo all: stdlib.cma std_exit.cmo camlheader diff --git a/stdlib/marshal.ml b/stdlib/marshal.ml new file mode 100644 index 000000000..163acb9f0 --- /dev/null +++ b/stdlib/marshal.ml @@ -0,0 +1,48 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +type extern_flags = + No_sharing + | Closures + +external to_channel: out_channel -> 'a -> extern_flags list -> unit + = "output_value" +external to_string: 'a -> extern_flags list -> string = "output_value_to_string" +external to_buffer_unsafe: + string -> int -> int -> 'a -> extern_flags list -> unit + = "output_value_to_buffer" + +let to_buffer buff ofs len v flags = + if ofs < 0 or len < 0 or ofs + len > String.length buff + then invalid_arg "Marshal.to_buffer: substring out of bounds" + else to_buffer_unsafe buff ofs len v flags + +external from_channel: in_channel -> 'a = "input_value" +external from_string_unsafe: string -> int -> 'a = "input_value_from_string" +external data_size_unsafe: string -> int -> int = "marshal_data_size" + +let header_size = 20 +let data_size buff ofs = + if ofs < 0 || ofs + header_size > String.length buff + then invalid_arg "Marshal.data_size" + else data_size_unsafe buff ofs + +let from_string buff ofs = + if ofs < 0 || ofs + header_size > String.length buff + then invalid_arg "Marshal.from_size" + else begin + let len = data_size_unsafe buff ofs in + if ofs + header_size + len > String.length buff + then invalid_arg "Marshal.from_string" + else from_string_unsafe buff ofs + end diff --git a/stdlib/marshal.mli b/stdlib/marshal.mli new file mode 100644 index 000000000..2049569d5 --- /dev/null +++ b/stdlib/marshal.mli @@ -0,0 +1,125 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +(* Module [Marshal]: marshaling of data structures *) + +(* This module provides functions to encode arbitrary data structures + as sequences of bytes, which can then be written on a file or + sent over a pipe or network connection. The bytes can then + be read back later, possibly in another process, and decoded back + into a data structure. The format for the byte sequences + is compatible across all machines for a given version of Objective Caml. + + Warning: marshaling is currently not type-safe. The type + of marshaled data is not transmitted along the value of the data, + making it impossible to check that the data read back possesses the + type expected by the context. In particular, the result type of + the [Marshal.from_*] functions is given as ['a], but this is + misleading: the returned Caml value does not possess type ['a] + for all ['a]; it has one, unique type which cannot be determined + at compile-type. The programmer should explicitly give the expected + type of the returned value, using the following syntax: + [(Marshal.from_channel chan : type)]. + The behavior is unspecified if the object in the file does not + belong to the given type. + + The representation of marshaled values is not human-readable, + and uses bytes that are not printable characters. Therefore, + input and output channels used in conjunction with [Marshal.to_channel] + and [Marshal.from_channel] must be opened in binary mode, using e.g. + [open_out_bin] or [open_in_bin]; channels opened in text mode will + cause unmarshaling errors on platforms where text channels behave + differently than binary channels, e.g. Windows. *) + +type extern_flags = + No_sharing (* Don't preserve sharing *) + | Closures (* Send function closures *) + (* The flags to the [Marshal.to_*] functions below. *) + +external to_channel: out_channel -> 'a -> extern_flags list -> unit + = "output_value" + (* [Marshal.to_channel chan v flags] writes the representation + of [v] on channel [chan]. The [flags] argument is a + possibly empty list of flags that governs the marshaling + behavior with respect to sharing and functional values. + + If [flags] does not contain [Marshal.No_sharing], circularities + and sharing inside the value [v] are detected and preserved + in the sequence of bytes produced. In particular, this + guarantees that marshaling always terminates. Sharing + between values marshaled by successive calls to + [Marshal.to_channel] is not detected, though. + If [flags] contains [Marshal.No_sharing], sharing is ignored. + This results in faster marshaling if [v] contains no shared + substructures, but may cause slower marshaling and larger + byte representations if [v] actually contains sharing, + or even non-termination if [v] contains cycles. + + If [flags] does not contain [Marshal.Closures], + marshaling fails when it encounters a functional value + inside [v]: only ``pure'' data structures, containing neither + functions nor objects, can safely be transmitted between + different programs. If [flags] contains [Marshal.Closures], + functional values will be marshaled as a position in the code + of the program. In this case, the output of marshaling can + only be read back in processes that run exactly the same program, + with exactly the same compiled code. (This is checked + at un-marshaling time, using an MD5 digest of the code + transmitted along with the code position.) *) + +external to_string: 'a -> extern_flags list -> string + = "output_value_to_string" + (* [Marshal.to_string v flags] returns a string containing + the representation of [v] as a sequence of bytes. + The [flags] argument has the same meaning as for + [Marshal.to_channel]. *) + +val to_buffer: string -> int -> int -> 'a -> extern_flags list -> unit + (* [Marshal.to_buffer v buff ofs len flags] marshals the value [v], + storing its byte representation in the string [buff], + starting at character number [ofs], and writing at most + [len] characters. If the byte representation of [v] + does not fit in [len] characters, the exception [Failure] + is raised. *) + +external from_channel: in_channel -> 'a = "input_value" + (* [Marshal.from_channel chan] reads from channel [chan] the + byte representation of a structured value, as produced by + one of the [Marshal.to_*] functions, and reconstructs and + returns the corresponding value.*) + +val from_string: string -> int -> 'a + (* [Marshal.from_string buff ofs] unmarshals a structured value + like [Marshal.from_channel] does, except that the byte + representation is not read from a channel, but taken from + the string [buff], starting at position [ofs]. *) + +val header_size : int +val data_size : string -> int -> int + (* The bytes representing a marshaled value are composed of + a fixed-size header and a variable-sized data part, + whose size can be determined from the header. + [Marshal.header_size] is the size, in characters, of the header. + [Marshal.data_size buff ofs] is the size, in characters, + of the data part, assuming a valid header is stored in + [buff] starting at position [ofs]. It raises [Failure] + if [buff], [ofs] does not contain a valid header. + + To read the byte representation of a marshaled value into + a string buffer, one needs to read first [Marshal.header_size] + characters into the buffer, then determine the length of the + remainder of the representation using [Marshal.data_size], + make sure the buffer is large enough to hold the variable + size, then read it, and finally call [Marshal.from_string] + to unmarshal the value. *) + diff --git a/stdlib/obj.ml b/stdlib/obj.ml index ec1abeb59..13f16db40 100644 --- a/stdlib/obj.ml +++ b/stdlib/obj.ml @@ -24,5 +24,3 @@ external size : t -> int = "%obj_size" external field : t -> int -> t = "%obj_field" external set_field : t -> int -> t -> unit = "%obj_set_field" external new_block : int -> int -> t = "obj_block" -external marshal : t -> string = "output_value_to_string" -external unmarshal : string -> int -> t * int = "input_value_from_string" diff --git a/stdlib/obj.mli b/stdlib/obj.mli index aee5777ac..d70a86415 100644 --- a/stdlib/obj.mli +++ b/stdlib/obj.mli @@ -26,5 +26,3 @@ external size : t -> int = "%obj_size" external field : t -> int -> t = "%obj_field" external set_field : t -> int -> t -> unit = "%obj_set_field" external new_block : int -> int -> t = "obj_block" -external marshal : t -> string = "output_value_to_string" -external unmarshal : string -> int -> t * int = "input_value_from_string" diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml index 1904da847..cf7cd1d15 100644 --- a/stdlib/pervasives.ml +++ b/stdlib/pervasives.ml @@ -195,7 +195,10 @@ let output oc s ofs len = external output_byte : out_channel -> int -> unit = "output_char" external output_binary_int : out_channel -> int -> unit = "output_int" -external output_value : out_channel -> 'a -> unit = "output_value" + +external marshal_to_channel : out_channel -> 'a -> unit list -> unit + = "output_value" +let output_value chan v = marshal_to_channel chan v [] external seek_out : out_channel -> int -> unit = "seek_out" external pos_out : out_channel -> int = "pos_out" diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index 43e99ca6b..9b707b654 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -437,8 +437,9 @@ val output_value : out_channel -> 'a -> unit (* Write the representation of a structured value of any type to a channel. Circularities and sharing inside the value are detected and preserved. The object can be read back, - by the function [input_value]. The format is compatible across - all machines for a given version of Objective Caml. *) + by the function [input_value]. See the description of module + [Marshal] for more information. [output_value] is equivalent + to [Marshal.to_channel] with an empty list of flags. *) val seek_out : out_channel -> int -> unit (* [seek_out chan pos] sets the current writing position to [pos] for channel [chan]. This works only for regular files. On @@ -510,14 +511,9 @@ val input_binary_int : in_channel -> int val input_value : in_channel -> 'a (* Read the representation of a structured value, as produced by [output_value], and return the corresponding value. - This is not type-safe. The type of the returned object is - not ['a] properly speaking: the returned object has one - unique type, which cannot be determined at compile-time. - The programmer should explicitly give the expected type of the - returned value, using the following syntax: - [(input_value chan : type)]. - The behavior is unspecified if the object in the file does not - belong to the given type. *) + This function is identical to [Marshal.from_channel]; + see the description of module [Marshal] for more information, + in particular concerning the lack of type safety. *) val seek_in : in_channel -> int -> unit (* [seek_in chan pos] sets the current reading position to [pos] for channel [chan]. This works only for regular files. On |