query
On this page

split

builtins.split

Primop
Docs pulled from | This Revision | about 19 hours ago


Nix manual

Takes 2 arguments

regex, str

Returns a list composed of non matched strings interleaved with the lists of the extended POSIX regular expression regex matches of str. Each item in the lists of matched sequences is a regex group.

builtins.split "(a)b" "abc"

Evaluates to [ "" [ "a" ] "c" ].

builtins.split "([ac])" "abc"

Evaluates to [ "" [ "a" ] "b" [ "c" ] "" ].

builtins.split "(a)|(c)" "abc"

Evaluates to [ "" [ "a" null ] "b" [ null "c" ] "" ].

builtins.split "([[:upper:]]+)" " FOO "

Evaluates to [ " " [ "FOO" ] " " ].

Noogle detected

Aliases

Detected Type
split :: String -> String -> [String]

Implementation

This function is implemented in c++ and is part of the native nix runtime.

src/libexpr/primops.cc:4992

void prim_split(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
    auto re = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.split");

    try {

        auto regex = state.regexCache->get(re);

        NixStringContext context;
        const auto str =
            state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.split");

        auto begin = std::cregex_iterator(str.begin(), str.end(), *regex);
        auto end = std::cregex_iterator();

        // Any matches results are surrounded by non-matching results.
        const size_t len = std::distance(begin, end);
        auto list = state.buildList(2 * len + 1);
        size_t idx = 0;

        if (len == 0) {
            list[0] = args[1];
            v.mkList(list);
            return;
        }

        for (auto i = begin; i != end; ++i) {
            assert(idx <= 2 * len + 1 - 3);
            const auto & match = *i;

            // Add a string for non-matched characters.
            list[idx++] = mkString(state, match.prefix());

            // Add a list for matched substrings.
            const size_t slen = match.size() - 1;

            // Start at 1, because the first match is the whole string.
            auto list2 = state.buildList(slen);
            for (const auto & [si, v2] : enumerate(list2)) {
                if (!match[si + 1].matched)
                    v2 = &Value::vNull;
                else
                    v2 = mkString(state, match[si + 1]);
            }

            (list[idx++] = state.allocValue())->mkList(list2);

            // Add a string for non-matched suffix characters.
            if (idx == 2 * len)
                list[idx++] = mkString(state, match.suffix());
        }

        assert(idx == 2 * len + 1);

        v.mkList(list);

    } catch (std::regex_error & e) {
        if (e.code() == std::regex_constants::error_space) {
            // limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
            state.error<EvalError>("memory limit exceeded by regular expression '%s'", re).atPos(pos).debugThrow();
        } else
            state.error<EvalError>("invalid regular expression '%s'", re).atPos(pos).debugThrow();
    }
}