split
builtins.split
Primop
Docs pulled from | This Revision | about 19 hours ago
Nix manual
Takes 2 arguments
regex, str
Returns a list composed of non matched strings interleaved with the lists of the extended POSIX regular expression regex matches of str. Each item in the lists of matched sequences is a regex group.
builtins.split "(a)b" "abc"
Evaluates to [ "" [ "a" ] "c" ].
builtins.split "([ac])" "abc"
Evaluates to [ "" [ "a" ] "b" [ "c" ] "" ].
builtins.split "(a)|(c)" "abc"
Evaluates to [ "" [ "a" null ] "b" [ null "c" ] "" ].
builtins.split "([[:upper:]]+)" " FOO "
Evaluates to [ " " [ "FOO" ] " " ].
Noogle detected
Detected Type
split :: String -> String -> [String]
Implementation
This function is implemented in c++ and is part of the native nix runtime.
void prim_split(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto re = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.split");
try {
auto regex = state.regexCache->get(re);
NixStringContext context;
const auto str =
state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.split");
auto begin = std::cregex_iterator(str.begin(), str.end(), *regex);
auto end = std::cregex_iterator();
// Any matches results are surrounded by non-matching results.
const size_t len = std::distance(begin, end);
auto list = state.buildList(2 * len + 1);
size_t idx = 0;
if (len == 0) {
list[0] = args[1];
v.mkList(list);
return;
}
for (auto i = begin; i != end; ++i) {
assert(idx <= 2 * len + 1 - 3);
const auto & match = *i;
// Add a string for non-matched characters.
list[idx++] = mkString(state, match.prefix());
// Add a list for matched substrings.
const size_t slen = match.size() - 1;
// Start at 1, because the first match is the whole string.
auto list2 = state.buildList(slen);
for (const auto & [si, v2] : enumerate(list2)) {
if (!match[si + 1].matched)
v2 = &Value::vNull;
else
v2 = mkString(state, match[si + 1]);
}
(list[idx++] = state.allocValue())->mkList(list2);
// Add a string for non-matched suffix characters.
if (idx == 2 * len)
list[idx++] = mkString(state, match.suffix());
}
assert(idx == 2 * len + 1);
v.mkList(list);
} catch (std::regex_error & e) {
if (e.code() == std::regex_constants::error_space) {
// limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
state.error<EvalError>("memory limit exceeded by regular expression '%s'", re).atPos(pos).debugThrow();
} else
state.error<EvalError>("invalid regular expression '%s'", re).atPos(pos).debugThrow();
}
}