//// String editing by byte offset, with a source map as a byproduct.
////
//// Keep the original source, record overwrite/remove/insert edits keyed by
//// UTF-8 byte offset, then `to_string` and `generate_map` replay the log.
//// Surviving slices of the original produce mapping segments; inserted text
//// does not.
import gleam/bit_array
import gleam/dict.{type Dict}
import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
import gleam/result
import gleam/string
import magic_string/codec.{type Segment, type SourceMap}
import magic_string/position_index
/// One overwrite/remove over the half-open byte range `[start, end)`. `content`
/// is the replacement text (empty string for a removal). The replacement is
/// emitted verbatim and produces NO mapping segment (it is synthesized text).
type Edit {
Edit(start: Int, end: Int, content: String)
}
/// Immutable string editor. Keeps the original `source` plus a sorted edit log
/// and is materialized lazily by `to_string` / `generate_map`.
///
/// - `intro` / `outro` accumulate `prepend` / `append` content wrapping the
/// whole string.
/// - `edits` maps an edit's start offset to the overwrite/remove at that point.
/// - `left_inserts` / `right_inserts` map a byte offset to text inserted just
/// left of (after the preceding content) or right of (before the following
/// content) that offset.
pub opaque type MagicString {
MagicString(
source: String,
intro: String,
outro: String,
edits: Dict(Int, Edit),
left_inserts: Dict(Int, String),
right_inserts: Dict(Int, String),
)
}
/// A materialized output fragment. `orig` is `Some(byte_offset)` for a surviving
/// slice of the original source (it produces a mapping) and `None` for inserted
/// or replacement text (no mapping).
type Piece {
Piece(text: String, orig: Option(Int))
}
/// Why an edit was rejected.
pub type EditError {
/// `attempted` shares at least one byte with an `existing` range.
Overlap(existing: #(Int, Int), attempted: #(Int, Int))
/// An insert at `at` falls strictly inside `range`
/// (`range.0 < at < range.1`). Boundaries are fine.
SwallowedInsert(at: Int, range: #(Int, Int))
/// `offset` is outside `[0, source_length]`.
OutOfBounds(offset: Int, source_length: Int)
/// `end < start`.
InvertedRange(start: Int, end: Int)
}
/// Human-readable description of an `EditError`.
pub fn describe_error(err: EditError) -> String {
case err {
Overlap(existing:, attempted:) ->
"edit "
<> range_str(attempted)
<> " overlaps existing edit "
<> range_str(existing)
SwallowedInsert(at:, range:) ->
"insert at offset "
<> int.to_string(at)
<> " falls inside overwrite/remove range "
<> range_str(range)
OutOfBounds(offset:, source_length:) ->
"offset "
<> int.to_string(offset)
<> " is out of bounds (source is "
<> int.to_string(source_length)
<> " bytes)"
InvertedRange(start:, end:) ->
"range ["
<> int.to_string(start)
<> ", "
<> int.to_string(end)
<> ") has end < start"
}
}
fn range_str(r: #(Int, Int)) -> String {
"[" <> int.to_string(r.0) <> ", " <> int.to_string(r.1) <> ")"
}
/// Create an editor over `source` with no edits.
pub fn new(source: String) -> MagicString {
MagicString(
source: source,
intro: "",
outro: "",
edits: dict.new(),
left_inserts: dict.new(),
right_inserts: dict.new(),
)
}
/// Replace the original byte range `[start, end)` with `with`. The
/// replacement is inserted text and produces no mapping segment.
///
/// Returns `Error` if `[start, end)` overlaps an existing range, would
/// swallow an existing insert, or is out of bounds. Same-start is
/// last-write-wins; adjacent ranges that only share a boundary are fine.
pub fn overwrite(
ms: MagicString,
start: Int,
end: Int,
with content: String,
) -> Result(MagicString, EditError) {
let len = string.byte_size(ms.source)
use Nil <- result.try(check_range(start, end, len))
use Nil <- result.try(check_no_overlap(ms, start, end))
use Nil <- result.try(check_no_swallowed_inserts(ms, start, end))
Ok(
MagicString(
..ms,
edits: dict.insert(ms.edits, start, Edit(start:, end:, content:)),
),
)
}
/// Delete the original byte range `[start, end)`. Returns `Error` for the
/// same reasons as `overwrite` (it is `overwrite` with an empty replacement).
pub fn remove(
ms: MagicString,
start: Int,
end: Int,
) -> Result(MagicString, EditError) {
overwrite(ms, start, end, "")
}
/// Insert `content` just LEFT of `index`: after the content that ends at
/// `index`, before anything inserted to the right of `index`. Repeated calls
/// at the same index append in call order.
///
/// Returns `Error(SwallowedInsert)` if `index` falls strictly inside an
/// existing overwrite/remove range (the insert would never be visited and so
/// would be silently dropped), or `Error(OutOfBounds)` if `index` is outside
/// the source.
pub fn append_left(
ms: MagicString,
index: Int,
content: String,
) -> Result(MagicString, EditError) {
use Nil <- result.try(check_insert_at(ms, index))
let existing = dict.get(ms.left_inserts, index) |> result.unwrap("")
Ok(
MagicString(
..ms,
left_inserts: dict.insert(ms.left_inserts, index, existing <> content),
),
)
}
/// Insert `content` just RIGHT of `index`: before the content that starts at
/// `index`, after anything inserted to the left of `index`. Repeated calls at
/// the same index append in call order.
///
/// Returns `Error` for the same reasons as `append_left`.
pub fn append_right(
ms: MagicString,
index: Int,
content: String,
) -> Result(MagicString, EditError) {
use Nil <- result.try(check_insert_at(ms, index))
let existing = dict.get(ms.right_inserts, index) |> result.unwrap("")
Ok(
MagicString(
..ms,
right_inserts: dict.insert(ms.right_inserts, index, existing <> content),
),
)
}
// --- Eager edit validation --------------------------------------------------
fn check_range(start: Int, end: Int, len: Int) -> Result(Nil, EditError) {
use Nil <- result.try(check_bound(start, len))
use Nil <- result.try(check_bound(end, len))
case end < start {
True -> Error(InvertedRange(start:, end:))
False -> Ok(Nil)
}
}
fn check_bound(offset: Int, len: Int) -> Result(Nil, EditError) {
case offset < 0 || offset > len {
True -> Error(OutOfBounds(offset:, source_length: len))
False -> Ok(Nil)
}
}
/// `[start, end)` overlaps an existing edit if any recorded range shares at
/// least one byte with it. An edit at the SAME start is last-write-wins, not
/// an overlap.
fn check_no_overlap(
ms: MagicString,
start: Int,
end: Int,
) -> Result(Nil, EditError) {
let conflict =
dict.values(ms.edits)
|> list.find(fn(e) {
// Overlap iff the ranges intersect on at least one byte, excluding the
// same-start last-write-wins case.
e.start != start && e.start < end && start < e.end
})
case conflict {
Ok(e) ->
Error(Overlap(existing: #(e.start, e.end), attempted: #(start, end)))
Error(Nil) -> Ok(Nil)
}
}
/// A new range swallows an insert if any recorded `append_left`/`append_right`
/// index lies strictly between `start` and `end`.
fn check_no_swallowed_inserts(
ms: MagicString,
start: Int,
end: Int,
) -> Result(Nil, EditError) {
let swallowed =
list.append(dict.keys(ms.left_inserts), dict.keys(ms.right_inserts))
|> list.find(fn(at) { at > start && at < end })
case swallowed {
Ok(at) -> Error(SwallowedInsert(at:, range: #(start, end)))
Error(Nil) -> Ok(Nil)
}
}
/// An insert at `index` is rejected if it lies strictly inside an existing
/// overwrite/remove range, or is out of bounds.
fn check_insert_at(ms: MagicString, index: Int) -> Result(Nil, EditError) {
use Nil <- result.try(check_bound(index, string.byte_size(ms.source)))
let enclosing =
dict.values(ms.edits)
|> list.find(fn(e) { index > e.start && index < e.end })
case enclosing {
Ok(e) -> Error(SwallowedInsert(at: index, range: #(e.start, e.end)))
Error(Nil) -> Ok(Nil)
}
}
/// Prepend `content` to the very start of the output. Repeated calls stack so
/// the most recent prepend ends up first.
pub fn prepend(ms: MagicString, content: String) -> MagicString {
MagicString(..ms, intro: content <> ms.intro)
}
/// Append `content` to the very end of the output.
pub fn append(ms: MagicString, content: String) -> MagicString {
MagicString(..ms, outro: ms.outro <> content)
}
/// Materialize the edited output as a string.
pub fn to_string(ms: MagicString) -> String {
materialize(ms)
|> list.fold("", fn(acc, piece) { acc <> piece.text })
}
// --- Materialization --------------------------------------------------------
/// Replay the edit log into an ordered list of output pieces.
///
/// Walks the original source from offset 0: at each position it emits any
/// left-then-right inserts bound to that offset, then either the replacement of
/// an edit beginning there (advancing past the edit's end) or the surviving
/// original slice up to the next boundary (carrying its original offset). `intro`
/// and `outro` wrap the result.
fn materialize(ms: MagicString) -> List(Piece) {
let bytes = bit_array.from_string(ms.source)
let byte_len = bit_array.byte_size(bytes)
// Conflicts (overlapping ranges, swallowed inserts, out-of-bounds offsets)
// are rejected eagerly by `overwrite`/`remove`/`append_left`/`append_right`,
// so by the time we reach here every recorded edit composes cleanly.
let boundaries = compute_boundaries(ms, byte_len)
let body = walk(ms, bytes, byte_len, boundaries, 0, [])
let intro_pieces = case ms.intro {
"" -> []
text -> [Piece(text, None)]
}
let outro_pieces = case ms.outro {
"" -> []
text -> [Piece(text, None)]
}
list.flatten([intro_pieces, list.reverse(body), outro_pieces])
}
/// Sorted, de-duplicated set of offsets at which a surviving slice must stop:
/// every edit start and every insert index, plus the end of the source.
fn compute_boundaries(ms: MagicString, byte_len: Int) -> List(Int) {
list.flatten([
dict.keys(ms.edits),
dict.keys(ms.left_inserts),
dict.keys(ms.right_inserts),
[byte_len],
])
|> list.unique
|> list.sort(int.compare)
}
/// Tail-recursive walk building the body piece list in reverse.
fn walk(
ms: MagicString,
bytes: BitArray,
byte_len: Int,
boundaries: List(Int),
pos: Int,
acc: List(Piece),
) -> List(Piece) {
let acc = push_insert(acc, ms.left_inserts, pos)
let acc = push_insert(acc, ms.right_inserts, pos)
use <- bool_at_end(pos >= byte_len, acc)
case dict.get(ms.edits, pos) {
Ok(edit) ->
case edit.end > pos {
True ->
walk(ms, bytes, byte_len, boundaries, edit.end, [
Piece(edit.content, None),
..acc
])
// Zero-width or already-consumed edit at this offset: treat as a plain
// boundary and emit the surviving slice.
False -> emit_slice(ms, bytes, byte_len, boundaries, pos, acc)
}
Error(Nil) -> emit_slice(ms, bytes, byte_len, boundaries, pos, acc)
}
}
/// Emit the surviving original slice from `pos` to the next boundary, carrying
/// `pos` as the mapping offset, then continue the walk.
fn emit_slice(
ms: MagicString,
bytes: BitArray,
byte_len: Int,
boundaries: List(Int),
pos: Int,
acc: List(Piece),
) -> List(Piece) {
let stop = next_boundary(boundaries, pos, byte_len)
let text = slice_bytes(bytes, pos, stop - pos)
walk(ms, bytes, byte_len, boundaries, stop, [Piece(text, Some(pos)), ..acc])
}
/// Helper so the end-of-source check reads as an early return without nesting.
fn bool_at_end(
at_end: Bool,
acc: List(Piece),
cont: fn() -> List(Piece),
) -> List(Piece) {
case at_end {
True -> acc
False -> cont()
}
}
fn push_insert(
acc: List(Piece),
inserts: Dict(Int, String),
pos: Int,
) -> List(Piece) {
case dict.get(inserts, pos) {
Ok("") -> acc
Ok(text) -> [Piece(text, None), ..acc]
Error(Nil) -> acc
}
}
fn next_boundary(boundaries: List(Int), pos: Int, byte_len: Int) -> Int {
boundaries
|> list.filter(fn(b) { b > pos })
|> list.first
|> result.unwrap(byte_len)
}
/// Slice `[start, start + len)` bytes of `bytes` back into a String. The range
/// always lands on UTF-8 boundaries (offsets come from edit/insert positions and
/// byte lengths), so a slice/decode failure can only mean an out-of-range
/// request, for which the empty string is the safe fragment.
fn slice_bytes(bytes: BitArray, start: Int, len: Int) -> String {
case bit_array.slice(bytes, start, len) {
Ok(slice) ->
case bit_array.to_string(slice) {
Ok(text) -> text
Error(Nil) -> ""
}
Error(Nil) -> ""
}
}
// --- Segment generation -----------------------------------------------------
/// Convert a piece list into mapping segments, threading the running generated
/// position. `gen_line` / `gen_col` accumulate across the whole materialized
/// output (and, for bundles, across sources). Returns the produced segments plus
/// the generated position just past the last piece.
fn pieces_to_segments(
pieces: List(Piece),
index: position_index.PositionIndex,
source_idx: Int,
gen_line: Int,
gen_col: Int,
) -> #(List(Segment), Int, Int) {
list.fold(pieces, #([], gen_line, gen_col), fn(acc, piece) {
let #(segs, gline, gcol) = acc
case piece.orig {
None -> {
let #(next_line, next_col) = advance(gline, gcol, piece.text)
#(segs, next_line, next_col)
}
Some(offset) -> {
let new_segs =
slice_segments(piece.text, offset, index, source_idx, gline, gcol)
let #(next_line, next_col) = advance(gline, gcol, piece.text)
#(list.append(new_segs, segs), next_line, next_col)
}
}
})
}
/// Produce one segment per generated line covered by a surviving slice. The
/// first line maps the slice's start (at the current generated column); each
/// subsequent line (after a newline in the slice) maps the corresponding
/// original line start at generated column 0. `hires` is false, so there is one
/// segment per line, not per token.
fn slice_segments(
text: String,
offset: Int,
index: position_index.PositionIndex,
source_idx: Int,
gen_line: Int,
gen_col: Int,
) -> List(Segment) {
let lines = string.split(text, "\n")
let #(segs, _byte_off) =
list.index_fold(lines, #([], 0), fn(state, line, i) {
let #(acc, byte_off) = state
let pos = position_index.lookup(index, offset + byte_off)
let seg_gen_col = case i {
0 -> gen_col
_ -> 0
}
let seg =
codec.Segment(
gen_line: gen_line + i,
gen_col: seg_gen_col,
source_idx: source_idx,
orig_line: pos.line,
orig_col: pos.column,
)
// +1 for the '\n' that separated this line from the next.
let next_off = byte_off + byte_size(line) + 1
#([seg, ..acc], next_off)
})
segs
}
/// Advance a generated `(line, col)` position past `text`. Columns are UTF-16
/// code units; lines increment per `\n`.
fn advance(gen_line: Int, gen_col: Int, text: String) -> #(Int, Int) {
case string.split(text, "\n") {
[] -> #(gen_line, gen_col)
[single] -> #(gen_line, gen_col + utf16_len(single))
lines -> {
let newlines = list.length(lines) - 1
let last = list.last(lines) |> result.unwrap("")
#(gen_line + newlines, utf16_len(last))
}
}
}
fn byte_size(text: String) -> Int {
bit_array.byte_size(bit_array.from_string(text))
}
/// Length of `text` in UTF-16 code units: astral codepoints (>= U+10000) count
/// as 2 (a surrogate pair), everything else as 1.
fn utf16_len(text: String) -> Int {
text
|> string.to_utf_codepoints
|> list.fold(0, fn(acc, cp) {
case string.utf_codepoint_to_int(cp) >= 0x10000 {
True -> acc + 2
False -> acc + 1
}
})
}
// --- Single-source map ------------------------------------------------------
/// Generate a Source Map v3 for a single edited source. `filename` is recorded
/// as the sole entry of `sources`; `include_content` controls whether the
/// original text is embedded in `sourcesContent`.
pub fn generate_map(
ms: MagicString,
filename: String,
opts: MapOptions,
) -> SourceMap {
let index = position_index.new(ms.source)
let pieces = materialize(ms)
let #(segments, _line, _col) = pieces_to_segments(pieces, index, 0, 0, 0)
let sources_content = case opts.include_content {
True -> [Some(ms.source)]
False -> [None]
}
codec.SourceMap(
version: 3,
file: opts.file,
source_root: opts.source_root,
sources: [filename],
sources_content: sources_content,
names: [],
mappings: codec.generate_mappings(segments),
)
}
/// Options shared by `generate_map` and `bundle_generate_map`.
pub type MapOptions {
MapOptions(
file: Option(String),
source_root: Option(String),
include_content: Bool,
)
}
/// Sensible defaults: no `file`, no `sourceRoot`, embed original content.
pub fn default_map_options() -> MapOptions {
MapOptions(file: None, source_root: None, include_content: True)
}
// --- Bundle -----------------------------------------------------------------
/// One source within a bundle: its display `filename` (entered into `sources`),
/// its original `content`, and the `MagicString` describing its edits.
type BundleSource {
BundleSource(filename: String, content: String, ms: MagicString)
}
/// A concatenation of edited sources. Materializes to the joined output text and
/// a single Source Map v3 spanning every source.
pub opaque type Bundle {
// Stored newest-first for O(1) `add_source`; reversed to output order on read.
Bundle(sources_rev: List(BundleSource))
}
/// An empty bundle.
pub fn bundle() -> Bundle {
Bundle(sources_rev: [])
}
/// Append a source to the bundle. `filename` is the name recorded in the map's
/// `sources`; `content` is the original text; `ms` is its edit log.
pub fn add_source(
b: Bundle,
filename: String,
content: String,
ms: MagicString,
) -> Bundle {
Bundle(sources_rev: [BundleSource(filename:, content:, ms:), ..b.sources_rev])
}
fn bundle_sources(b: Bundle) -> List(BundleSource) {
list.reverse(b.sources_rev)
}
/// Materialize the bundle output: each source's edited text, joined by newlines
/// (one line break between sources).
pub fn bundle_to_string(b: Bundle) -> String {
bundle_sources(b)
|> list.map(fn(src) { to_string(src.ms) })
|> string.join("\n")
}
/// Generate one Source Map v3 spanning every source in output order.
///
/// Walks each source's pieces, offsetting their segments into chunk-space: the
/// generated line/column accumulates across the whole output, and the newline
/// joining each source to the next advances the generated line by one (and
/// resets the column). Each source is assigned its index in `sources`.
pub fn bundle_generate_map(b: Bundle, opts: MapOptions) -> SourceMap {
let srcs = bundle_sources(b)
let #(segments, _line, _col, _idx) =
list.fold(srcs, #([], 0, 0, 0), fn(acc, src) {
let #(segs, gen_line, _gen_col, source_idx) = acc
let index = position_index.new(src.content)
let pieces = materialize(src.ms)
let #(new_segs, end_line, _end_col) =
pieces_to_segments(pieces, index, source_idx, gen_line, 0)
// The separator newline between this source and the next advances one
// generated line and resets the column.
#(list.append(new_segs, segs), end_line + 1, 0, source_idx + 1)
})
let sources = list.map(srcs, fn(src) { src.filename })
let sources_content =
list.map(srcs, fn(src) {
case opts.include_content {
True -> Some(src.content)
False -> None
}
})
codec.SourceMap(
version: 3,
file: opts.file,
source_root: opts.source_root,
sources: sources,
sources_content: sources_content,
names: [],
mappings: codec.generate_mappings(segments),
)
}