Skip to main content

src/magic_string@position_index.erl

-module(magic_string@position_index).
-compile([no_auto_import, nowarn_unused_vars, nowarn_unused_function, nowarn_nomatch, inline]).
-define(FILEPATH, "src/magic_string/position_index.gleam").
-export([new/1, line_count/1, lookup/2]).
-export_type([position/0, position_index/0]).

-if(?OTP_RELEASE >= 27).
-define(MODULEDOC(Str), -moduledoc(Str)).
-define(DOC(Str), -doc(Str)).
-else.
-define(MODULEDOC(Str), -compile([])).
-define(DOC(Str), -compile([])).
-endif.

?MODULEDOC(
    " UTF-8 byte offset -> (line, UTF-16 column) conversion.\n"
    "\n"
    " Source Map v3 columns count UTF-16 code units, not bytes, so for any\n"
    " non-ASCII source a byte offset is not a column. Build a `PositionIndex`\n"
    " once with `new`, then `lookup` each offset. Line and column are 0-based.\n"
).

-type position() :: {position, integer(), integer()}.

-opaque position_index() :: {position_index,
        bitstring(),
        list(integer()),
        integer()}.

-file("src/magic_string/position_index.gleam", 56).
?DOC(
    " Walk the bytes recording the offset that begins each line. `acc` is built in\n"
    " reverse (newest first) and reversed by the caller.\n"
).
-spec line_starts_loop(bitstring(), integer(), list(integer())) -> list(integer()).
line_starts_loop(Bytes, Pos, Acc) ->
    case Bytes of
        <<>> ->
            Acc;

        <<16#0A, Rest/bitstring>> ->
            line_starts_loop(Rest, Pos + 1, [Pos + 1 | Acc]);

        <<_, Rest@1/bitstring>> ->
            line_starts_loop(Rest@1, Pos + 1, Acc);

        _ ->
            Acc
    end.

-file("src/magic_string/position_index.gleam", 32).
?DOC(
    " Build a `PositionIndex` for `source`. A line begins at byte 0 and after each\n"
    " `\\n` (0x0A) byte; a `\\r\\n` sequence is split by its `\\n`, so the carriage\n"
    " return is the last byte of the preceding line. Scanning for the 0x0A byte is\n"
    " UTF-8 safe because 0x0A never appears inside a multibyte sequence.\n"
).
-spec new(binary()) -> position_index().
new(Source) ->
    Bytes = gleam_stdlib:identity(Source),
    Byte_len = erlang:byte_size(Bytes),
    Line_starts = lists:reverse(line_starts_loop(Bytes, 0, [0])),
    {position_index, Bytes, Line_starts, Byte_len}.

-file("src/magic_string/position_index.gleam", 40).
?DOC(" Number of lines in the indexed source (always at least 1).\n").
-spec line_count(position_index()) -> integer().
line_count(Index) ->
    erlang:length(erlang:element(3, Index)).

-file("src/magic_string/position_index.gleam", 99).
?DOC(
    " Length of a string measured in UTF-16 code units: astral codepoints\n"
    " (>= U+10000) take a surrogate pair (2 units), all others take 1.\n"
).
-spec utf16_len(binary()) -> integer().
utf16_len(Text) ->
    _pipe = Text,
    _pipe@1 = gleam@string:to_utf_codepoints(_pipe),
    gleam@list:fold(
        _pipe@1,
        0,
        fun(Acc, Cp) -> case gleam_stdlib:identity(Cp) >= 16#10000 of
                true ->
                    Acc + 2;

                false ->
                    Acc + 1
            end end
    ).

-file("src/magic_string/position_index.gleam", 87).
?DOC(
    " Count UTF-16 code units in the byte range `[start, end)`. Falls back to the\n"
    " byte distance only if the range is not on codepoint boundaries (which should\n"
    " not happen for spans/line starts produced from valid source).\n"
).
-spec utf16_count(bitstring(), integer(), integer()) -> integer().
utf16_count(Bytes, Start, End) ->
    Byte_len = End - Start,
    Counted = begin
        gleam@result:'try'(
            gleam_stdlib:bit_array_slice(Bytes, Start, Byte_len),
            fun(Slice) ->
                gleam@result:map(
                    gleam@bit_array:to_string(Slice),
                    fun(Text) -> utf16_len(Text) end
                )
            end
        )
    end,
    gleam@result:unwrap(Counted, Byte_len).

-file("src/magic_string/position_index.gleam", 69).
?DOC(
    " Find the greatest line start `<= offset`. Returns `(line_index, line_start)`\n"
    " where `line_index` is how many starts precede or equal `offset`, minus one.\n"
    " `line_starts` always begins with 0 and `offset >= 0`, so at least one start\n"
    " qualifies and `idx >= 1` at return.\n"
).
-spec find_line(list(integer()), integer(), integer(), integer()) -> {integer(),
    integer()}.
find_line(Line_starts, Offset, Idx, Cur_start) ->
    case Line_starts of
        [] ->
            {Idx - 1, Cur_start};

        [Start | Rest] ->
            gleam@bool:guard(
                Start > Offset,
                {Idx - 1, Cur_start},
                fun() -> find_line(Rest, Offset, Idx + 1, Start) end
            )
    end.

-file("src/magic_string/position_index.gleam", 47).
?DOC(
    " Convert a byte offset into a 0-based `(line, UTF-16 column)` position.\n"
    " Offsets are clamped into `[0, byte_len]`, so an end-exclusive span boundary\n"
    " at end-of-file resolves to the position just past the last byte.\n"
).
-spec lookup(position_index(), integer()) -> position().
lookup(Index, Byte_offset) ->
    Offset = gleam@int:clamp(Byte_offset, 0, erlang:element(4, Index)),
    {Line, Line_start} = find_line(erlang:element(3, Index), Offset, 0, 0),
    Column = utf16_count(erlang:element(2, Index), Line_start, Offset),
    {position, Line, Column}.