summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--otherlibs/str/str.ml33
-rw-r--r--otherlibs/str/str.mli23
2 files changed, 55 insertions, 1 deletions
diff --git a/otherlibs/str/str.ml b/otherlibs/str/str.ml
index bc7a0661f..db430ac19 100644
--- a/otherlibs/str/str.ml
+++ b/otherlibs/str/str.ml
@@ -113,3 +113,36 @@ let bounded_split expr text num =
let split expr text = bounded_split expr text 0
+let bounded_split_delim expr text num =
+ let rec split start n =
+ if start > String.length text then [] else
+ if n = 1 then [string_after text start] else
+ try
+ let pos = search_forward expr text start in
+ String.sub text start (pos-start) :: split (match_end()) (n-1)
+ with Not_found ->
+ [string_after text start] in
+ if text = "" then [] else split 0 num
+
+let split_delim expr text = bounded_split_delim expr text 0
+
+type split_result = Text of string | Delim of string
+
+let bounded_full_split expr text num =
+ let rec split start n =
+ if start >= String.length text then [] else
+ if n = 1 then [Text(string_after text start)] else
+ try
+ let pos = search_forward expr text start in
+ if pos > start then
+ Text(String.sub text start (pos-start)) ::
+ Delim(matched_string text) ::
+ split (match_end()) (n-1)
+ else
+ Delim(matched_string text) ::
+ split (match_end()) (n-1)
+ with Not_found ->
+ [Text(string_after text start)] in
+ split 0 num
+
+let full_split expr text = bounded_full_split expr text 0
diff --git a/otherlibs/str/str.mli b/otherlibs/str/str.mli
index 87edc7309..97882fb8f 100644
--- a/otherlibs/str/str.mli
+++ b/otherlibs/str/str.mli
@@ -129,11 +129,32 @@ val split: regexp -> string -> string list
(* [split r s] splits [s] into substrings, taking as delimiters
the substrings that match [r], and returns the list of substrings.
For instance, [split (regexp "[ \t]+") s] splits [s] into
- blank-separated words. *)
+ blank-separated words. An occurrence of the delimiter at the
+ beginning and at the end of the string is ignored. *)
val bounded_split: regexp -> string -> int -> string list
(* Same as [split], but splits into at most [n] substrings,
where [n] is the extra integer parameter. *)
+val split_delim: regexp -> string -> string list
+val bounded_split_delim: regexp -> string -> int -> string list
+ (* Same as [split] and [bounded_split], but occurrences of the
+ delimiter at the beginning and at the end of the string are
+ recognized and returned as empty strings in the result.
+ For instance, [split_delim (regexp " ") " abc "]
+ returns [[""; "abc"; ""]], while [split] with the same
+ arguments returns [["abc"]]. *)
+
+type split_result = Text of string | Delim of string
+
+val full_split: regexp -> string -> split_result list
+val bounded_full_split: regexp -> string -> int -> split_result list
+ (* Same as [split_delim] and [bounded_split_delim], but returns
+ the delimiters as well as the substrings contained between
+ delimiters. The former are tagged [Delim] in the result list;
+ the latter are tagged [Text]. For instance,
+ [full_split (regexp "[{}]") "{ab}"] returns
+ [[Delim "{"; Text "ab"; Delim "}"]]. *)
+
(*** Extracting substrings *)
val string_before: string -> int -> string