src/gliff@internal@tokenize.erl

-module(gliff@internal@tokenize).
-compile([no_auto_import, nowarn_unused_vars, nowarn_unused_function, nowarn_nomatch, inline]).
-define(FILEPATH, "src/gliff/internal/tokenize.gleam").
-export([words/1]).

-if(?OTP_RELEASE >= 27).
-define(MODULEDOC(Str), -moduledoc(Str)).
-define(DOC(Str), -doc(Str)).
-else.
-define(MODULEDOC(Str), -compile([])).
-define(DOC(Str), -compile([])).
-endif.

?MODULEDOC(false).

-file("src/gliff/internal/tokenize.gleam", 36).
?DOC(false).
-spec is_whitespace(binary()) -> boolean().
is_whitespace(G) ->
    (((G =:= <<" "/utf8>>) orelse (G =:= <<"\t"/utf8>>)) orelse (G =:= <<"\n"/utf8>>))
    orelse (G =:= <<"\r"/utf8>>).

-file("src/gliff/internal/tokenize.gleam", 10).
?DOC(false).
-spec tokenize_loop(list(binary()), binary(), boolean(), list(binary())) -> list(binary()).
tokenize_loop(Graphemes, Current, In_whitespace, Acc) ->
    case Graphemes of
        [] ->
            case Current of
                <<""/utf8>> ->
                    Acc;

                _ ->
                    [Current | Acc]
            end;

        [G | Rest] ->
            G_is_ws = is_whitespace(G),
            case G_is_ws =:= In_whitespace of
                true ->
                    tokenize_loop(
                        Rest,
                        <<Current/binary, G/binary>>,
                        In_whitespace,
                        Acc
                    );

                false ->
                    case Current of
                        <<""/utf8>> ->
                            tokenize_loop(Rest, G, G_is_ws, Acc);

                        _ ->
                            tokenize_loop(Rest, G, G_is_ws, [Current | Acc])
                    end
            end
    end.

-file("src/gliff/internal/tokenize.gleam", 4).
?DOC(false).
-spec words(binary()) -> list(binary()).
words(Text) ->
    Graphemes = gleam@string:to_graphemes(Text),
    _pipe = tokenize_loop(Graphemes, <<""/utf8>>, false, []),
    lists:reverse(_pipe).