diff options
-rw-r--r-- | otherlibs/str/str.ml | 33 | ||||
-rw-r--r-- | otherlibs/str/str.mli | 23 |
2 files changed, 55 insertions, 1 deletions
diff --git a/otherlibs/str/str.ml b/otherlibs/str/str.ml index bc7a0661f..db430ac19 100644 --- a/otherlibs/str/str.ml +++ b/otherlibs/str/str.ml @@ -113,3 +113,36 @@ let bounded_split expr text num = let split expr text = bounded_split expr text 0 +let bounded_split_delim expr text num = + let rec split start n = + if start > String.length text then [] else + if n = 1 then [string_after text start] else + try + let pos = search_forward expr text start in + String.sub text start (pos-start) :: split (match_end()) (n-1) + with Not_found -> + [string_after text start] in + if text = "" then [] else split 0 num + +let split_delim expr text = bounded_split_delim expr text 0 + +type split_result = Text of string | Delim of string + +let bounded_full_split expr text num = + let rec split start n = + if start >= String.length text then [] else + if n = 1 then [Text(string_after text start)] else + try + let pos = search_forward expr text start in + if pos > start then + Text(String.sub text start (pos-start)) :: + Delim(matched_string text) :: + split (match_end()) (n-1) + else + Delim(matched_string text) :: + split (match_end()) (n-1) + with Not_found -> + [Text(string_after text start)] in + split 0 num + +let full_split expr text = bounded_full_split expr text 0 diff --git a/otherlibs/str/str.mli b/otherlibs/str/str.mli index 87edc7309..97882fb8f 100644 --- a/otherlibs/str/str.mli +++ b/otherlibs/str/str.mli @@ -129,11 +129,32 @@ val split: regexp -> string -> string list (* [split r s] splits [s] into substrings, taking as delimiters the substrings that match [r], and returns the list of substrings. For instance, [split (regexp "[ \t]+") s] splits [s] into - blank-separated words. *) + blank-separated words. An occurrence of the delimiter at the + beginning and at the end of the string is ignored. *) val bounded_split: regexp -> string -> int -> string list (* Same as [split], but splits into at most [n] substrings, where [n] is the extra integer parameter. *) +val split_delim: regexp -> string -> string list +val bounded_split_delim: regexp -> string -> int -> string list + (* Same as [split] and [bounded_split], but occurrences of the + delimiter at the beginning and at the end of the string are + recognized and returned as empty strings in the result. + For instance, [split_delim (regexp " ") " abc "] + returns [[""; "abc"; ""]], while [split] with the same + arguments returns [["abc"]]. *) + +type split_result = Text of string | Delim of string + +val full_split: regexp -> string -> split_result list +val bounded_full_split: regexp -> string -> int -> split_result list + (* Same as [split_delim] and [bounded_split_delim], but returns + the delimiters as well as the substrings contained between + delimiters. The former are tagged [Delim] in the result list; + the latter are tagged [Text]. For instance, + [full_split (regexp "[{}]") "{ab}"] returns + [[Delim "{"; Text "ab"; Delim "}"]]. *) + (*** Extracting substrings *) val string_before: string -> int -> string |