//// Molt's TOML parser.
////
//// This is an optimized recursive descent parser that combines scanning and
//// tokenization into a single step. The optimization begins with TOML being
//// a line-oriented format, where table headers, table array headers, and
//// key-value assignments must be on a single line with only whitespace leading
//// and comments following.
////
//// It uses multiple [splitter][splitter] instances to efficiently feed the
//// parser levels with token-appropriate data and constructs that are permitted
//// to cross lines (multi-line strings, array values) can pull additional lines
//// from the source.
////
//// The performance of this parser is acceptable up to 25% slower than `tom`
//// version 1 (initial benchmarks suggest it may be 25% faster).
////
//// [splitter]: https://splitter.hexdocs.pm/
import gleam/bool
import gleam/list
import gleam/option.{None, Some}
import gleam/result
import gleam/string
import greenwood.{
type Element, type Node, type Token, Bare, Node, NodeElement as N, Token,
TokenElement as T, Trivia,
}
import molt/error.{type MoltError, ParseError}
import molt/internal/classifier
import molt/internal/parser/core.{
type Splitters, type State, Building, NoTable, State,
}
import molt/internal/parser/scanners
import molt/internal/utils
import molt/types.{type TomlKind}
import splitter
// --- Public API ---
/// Parse TOML source into a Document CST node.
///
/// An up-front scan halts on any forbidden control character (per the TOML
/// spec: U+0000–U+0008, U+000B, U+000C, U+000E–U+001F, U+007F, and bare CR not
/// followed by LF) and is treated as corrupt input.
///
/// If there is a leading UTF-8 BOM, we keep it.
pub fn parse(source: String) -> Result(Node(TomlKind), MoltError) {
let #(bom, source) = case source {
"\u{FEFF}" <> rest -> #("\u{FEFF}", rest)
_ -> #("", source)
}
use _ <- result.try(scan_control(source))
core.new_state(bom)
|> do_parse(source)
}
/// Single-pass control-char scan using a splitter (native fast scan).
/// On hit, computes the byte offset from the prefix length.
fn scan_control(source: String) -> Result(Nil, MoltError) {
splitter.new([
"\u{0000}", "\u{0001}", "\u{0002}", "\u{0003}", "\u{0004}", "\u{0005}",
"\u{0006}", "\u{0007}", "\u{0008}", "\u{000B}", "\u{000C}", "\u{000E}",
"\u{000F}", "\u{0010}", "\u{0011}", "\u{0012}", "\u{0013}", "\u{0014}",
"\u{0015}", "\u{0016}", "\u{0017}", "\u{0018}", "\u{0019}", "\u{001A}",
"\u{001B}", "\u{001C}", "\u{001D}", "\u{001E}", "\u{001F}", "\u{007F}",
"\u{FEFF}", "\r",
])
|> do_scan_control(input: source, offset: 0)
}
fn do_scan_control(
controls controls: splitter.Splitter,
input input: String,
offset offset: Int,
) -> Result(Nil, MoltError) {
let #(prefix, delim, input) = splitter.split(controls, input)
let here = offset + string.byte_size(prefix)
case delim {
"" -> Ok(Nil)
"\u{FEFF}" -> Error(ParseError("BOM not at start of file", here))
"\u{0000}" -> Error(ParseError("Null byte in input", here))
"\r" ->
case input {
"\n" <> more ->
do_scan_control(controls:, input: more, offset: here + 2)
_ -> Error(ParseError("Bare carriage return", here))
}
_ -> Error(ParseError("Forbidden control character", here))
}
}
fn do_parse(state: State, input: String) -> Result(Node(TomlKind), MoltError) {
use Nil <- result.try(state.error)
case input {
"" -> core.finalize(state)
_ -> {
let #(state, remaining) = process_line(state, input)
do_parse(state, remaining)
}
}
}
fn process_line(state: State, input: String) -> #(State, String) {
dispatch_line(state, splitter.split(state.sp.line, input))
}
fn dispatch_line(
state: State,
split_line: #(String, String, String),
) -> #(State, String) {
let #(line, nl, rest) = split_line
case core.peek_first_significant(line) {
"" -> #(handle_blank_line(state:, line:, nl:), rest)
"#" <> _ -> #(handle_comment_line(state:, line:, nl:), rest)
"[[" <> _ -> #(
handle_table_line(state:, kind: types.ArrayOfTables, line:, nl:),
rest,
)
"[" <> _ -> #(
handle_table_line(state:, kind: types.Table, line:, nl:),
rest,
)
_ -> handle_kv_line(state:, line:, nl:, source_rest: rest)
}
}
fn handle_blank_line(
state state: State,
line line: String,
nl nl: String,
) -> State {
let pending = case line {
"" -> state.pending_trivia
_ -> [greenwood.token(types.Whitespace, line), ..state.pending_trivia]
}
let pending = case nl {
"" -> pending
_ -> [greenwood.token(types.Newline, nl), ..pending]
}
State(..state, pending_trivia: pending)
}
fn handle_comment_line(
state state: State,
line line: String,
nl nl: String,
) -> State {
let #(pending, rest) = case core.take_ws(line) {
#("", rest) -> #(state.pending_trivia, rest)
#(ws, rest) -> #(
[greenwood.token(types.Whitespace, ws), ..state.pending_trivia],
rest,
)
}
// rest starts with "#"; entire remainder of line is one comment token.
let pending = [greenwood.token(types.Comment, rest), ..pending]
let pending = case nl {
"" -> pending
_ -> [greenwood.token(types.Newline, nl), ..pending]
}
State(..state, pending_trivia: pending)
}
fn handle_table_line(
state state: State,
kind kind: TomlKind,
line line: String,
nl nl: String,
) -> State {
// Flush any currently-open table.
let doc_acc = core.flush_current_table(state)
// Tokenize the header line (reverse-ordered).
let header = scanners.tokenize_header_line(sp: state.sp, input: line, acc: [])
let header = case nl {
"" -> header
_ -> [greenwood.token(types.Newline, nl), ..header]
}
// Split the header line's trailing comment off into trailing trivia.
let #(children, trailing) =
core.peel_trailing_comment_rev(list.map(header, T))
// At the document head, route a blank-separated leading comment block to Root.
let #(leading, state) = head_split(state)
State(
..state,
doc_acc:,
current_table: Building(kind:, children:, trivia: leading, trailing:),
pending_trivia: [],
)
}
/// At the true document head (no statements yet, no open table), peel a
/// blank-separated leading comment block off the pending trivia and stash it as
/// Root's leading trivia, returning the trivia that stays on the first node. The
/// BOM is held separately (`state.bom`) and prepended to Root's leading trivia
/// at finalize, so it stays first regardless. Elsewhere this is a no-op.
fn head_split(state: State) -> #(List(Token(TomlKind)), State) {
case state.current_table, state.doc_acc {
NoTable, [] -> {
let #(root_rev, node_rev) = core.peel_head_block(state.pending_trivia)
#(node_rev, State(..state, root_leading: root_rev))
}
_, _ -> #(state.pending_trivia, state)
}
}
/// Process a key-value line. The value may span multiple source lines (open
/// arrays, inline tables, or multi-line strings); when that happens the
/// scanners pull more lines from `source_rest` via the line splitter, emitting
/// the in-between newlines as content of the spanning construct.
///
/// `pending_nl` is the newline terminating the _current_ line. If the value
/// spans, it gets emitted as content; otherwise it becomes the KV's trailing
/// newline. Returns the updated state and the remaining unconsumed source.
fn handle_kv_line(
state state: State,
line line: String,
nl nl: String,
source_rest source_rest: String,
) -> #(State, String) {
let #(children_rev, after_eq) =
scanners.consume_key_and_eq(sp: state.sp, input: line, acc: [])
let #(children_rev, final_nl, final_rest) =
consume_value(
sp: state.sp,
input: after_eq,
acc: children_rev,
pending_nl: nl,
source_rest:,
)
let children_rev = case final_nl {
"" -> children_rev
_ -> [T(Token(kind: types.Newline, text: final_nl)), ..children_rev]
}
let children_rev = wrap_dotted_key(children_rev)
let full = greenwood.node(types.KeyValue, children_rev)
// Validity is judged on the full line (a `key = # c` line is a KV with a
// missing value, not unparsable junk). A valid KV carries its trailing
// comment on `trivia.trailing`; an invalid line keeps the comment inline,
// wrapped verbatim in an Error node, and takes only leading trivia.
// At the document head, route a blank-separated leading comment block to Root.
let #(leading, state) = head_split(state)
let node = case is_valid_kv(full) {
True -> {
let #(clean_rev, trailing) = core.peel_trailing_comment_rev(children_rev)
attach_line_trivia(
node: greenwood.node(types.KeyValue, clean_rev),
leading:,
trailing:,
)
}
False ->
greenwood.node(types.Error, [N(full)])
|> attach_pending_trivia(leading)
}
let state = State(..state, pending_trivia: [])
#(add_kv(state, node), final_rest)
}
/// A KeyValue node is valid if it has an Equals token, a value after the
/// equals, and all BareKey tokens contain only valid bare key characters.
fn is_valid_kv(node: Node(TomlKind)) -> Bool {
// node.children are in reverse order at this point
let children = list.reverse(node.children)
has_equals(children)
&& has_value_after_equals(children)
&& all_bare_keys_valid(children)
}
fn has_equals(children: List(Element(TomlKind))) -> Bool {
list.any(children, fn(el) {
case el {
T(Token(kind: types.Equals, ..)) -> True
_ -> False
}
})
}
fn has_value_after_equals(children: List(Element(TomlKind))) -> Bool {
do_has_value_after_equals(children, False)
}
fn do_has_value_after_equals(
children: List(Element(TomlKind)),
past_equals: Bool,
) -> Bool {
case children, past_equals {
[], False -> True
[], True -> False
[T(Token(kind: types.Equals, ..)), ..rest], _ ->
do_has_value_after_equals(rest, True)
[T(Token(kind: types.Whitespace, ..)), ..rest], pe ->
do_has_value_after_equals(rest, pe)
[T(Token(kind: types.Newline, ..)), ..], True -> False
[_, ..], True -> True
[_, ..rest], False -> do_has_value_after_equals(rest, False)
}
}
fn all_bare_keys_valid(children: List(Element(TomlKind))) -> Bool {
list.all(children, fn(el) {
case el {
T(Token(kind: types.BareKey, text:)) -> utils.is_bare_key(text)
N(node) if node.kind == types.Key -> all_bare_keys_valid(node.children)
_ -> True
}
})
}
/// Consume the value portion of a KV line. Does not span lines itself:
/// only the constructs it dispatches into (arrays, inline tables, multi-line
/// strings) span. Returns the updated children, the pending newline (the one
/// terminating the line where the value ended), and the unconsumed source.
fn consume_value(
sp sp: Splitters,
input input: String,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
) -> #(List(Element(TomlKind)), String, String) {
case input {
"" -> #(acc, pending_nl, source_rest)
_ -> {
let #(prefix, delim, rest) = splitter.split(sp.value, input)
let acc = emit_value_prefix(prefix, acc)
case delim {
"" -> #(acc, pending_nl, source_rest)
" " | "\t" ->
consume_value_after_whitespace(
sp:,
input: rest,
acc:,
pending_nl:,
source_rest:,
delim:,
)
"[" -> {
let #(node, rem, rem_nl, rem_rest) =
consume_array(sp:, input: rest, pending_nl:, source_rest:)
consume_value(
sp:,
input: rem,
acc: [N(node), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"{" -> {
let #(node, rem, rem_nl, rem_rest) =
consume_inline_table(sp:, input: rest, pending_nl:, source_rest:)
consume_value(
sp:,
input: rem,
acc: [N(node), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"\"" -> {
let #(tok_, rem, rem_nl, rem_rest) =
scanners.scan_basic_string(sp, rest, pending_nl, source_rest)
consume_value(
sp:,
input: rem,
acc: [T(tok_), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"'" -> {
let #(tok_, rem, rem_nl, rem_rest) =
scanners.scan_literal_string(
sp:,
input: rest,
pending_nl:,
source_rest:,
)
consume_value(
sp:,
input: rem,
acc: [T(tok_), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"#" -> #(
[T(Token(kind: types.Comment, text: "#" <> rest)), ..acc],
pending_nl,
source_rest,
)
"," ->
consume_value(
sp:,
input: rest,
acc: [T(Token(kind: types.Comma, text: "")), ..acc],
pending_nl:,
source_rest:,
)
"]" ->
consume_value(
sp:,
input: rest,
acc: [T(Token(kind: types.RightBracket, text: "")), ..acc],
pending_nl:,
source_rest:,
)
"}" ->
consume_value(
sp:,
input: rest,
acc: [T(Token(kind: types.RightBrace, text: "")), ..acc],
pending_nl:,
source_rest:,
)
"=" ->
consume_value(
sp:,
input: rest,
acc: [T(Token(kind: types.Equals, text: "")), ..acc],
pending_nl:,
source_rest:,
)
_ -> consume_value(sp:, input: rest, acc:, pending_nl:, source_rest:)
}
}
}
}
fn emit_value_prefix(
prefix: String,
acc: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
case prefix {
"" -> acc
_ -> [T(core.tokenize_value(prefix)), ..acc]
}
}
/// If `acc` ends with a `LocalDate` token and the next value-splitter prefix
/// together with the date forms a valid `LocalDateTime` or `OffsetDateTime`,
/// fold the three pieces (date, single-space ws, time) into a single
/// datetime token. Otherwise return Error so the caller emits the ws
/// normally.
fn try_merge_datetime(
acc acc: List(Element(TomlKind)),
ws ws: String,
rest rest: String,
sp sp: Splitters,
) -> Result(#(List(Element(TomlKind)), String), Nil) {
case ws, acc {
" ", [T(Token(kind: types.LocalDate, text: date_text)), ..rest_acc] -> {
let #(prefix, _delim, _r) = splitter.split(sp.value, rest)
use <- bool.guard(prefix == "", return: Error(Nil))
let combined = date_text <> " " <> prefix
case classifier.match_datetime(combined) {
Some(k) -> {
let new_rest = string.drop_start(rest, string.length(prefix))
Ok(#([T(Token(kind: k, text: combined)), ..rest_acc], new_rest))
}
None -> Error(Nil)
}
}
_, _ -> Error(Nil)
}
}
// --- Arrays (may span lines) ---
fn consume_array(
sp sp: Splitters,
input input: String,
pending_nl pending_nl: String,
source_rest source_rest: String,
) -> #(Node(TomlKind), String, String, String) {
let #(children_rev, rem, rem_nl, rem_rest) =
do_consume_array(
sp:,
input:,
acc: [T(Token(kind: types.LeftBracket, text: ""))],
pending_nl:,
source_rest:,
)
let children_rev = group_array_elements(children_rev)
let node = greenwood.node(types.Array, children_rev)
#(node, rem, rem_nl, rem_rest)
}
/// Group flat array children (in reverse order) into ArrayElement nodes.
/// Each element gets leading trivia (comments/newlines/whitespace before value)
/// and contains the value + trailing comma/whitespace.
/// LeftBracket and RightBracket stay as direct children of the Array node.
fn group_array_elements(
children: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
// Work in forward order, then reverse back
list.reverse(children)
|> do_group_array_forward(trivia_acc: [], element_acc: [], result: [])
}
/// Walk forward through array children, grouping into ArrayElement nodes.
/// - `trivia_acc`: accumulated trivia tokens (comments, newlines, whitespace)
/// that will become leading trivia on the next element
/// - `element_acc`: tokens belonging to the current element (value + trailing)
/// - `result`: completed elements/brackets in reverse order
fn do_group_array_forward(
children children: List(Element(TomlKind)),
trivia_acc trivia_acc: List(Token(TomlKind)),
element_acc element_acc: List(Element(TomlKind)),
result result: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
case children {
[] -> flush_element(trivia_acc:, element_acc:, result:)
[T(Token(kind: types.LeftBracket, text: _)), ..rest] ->
do_group_array_forward(children: rest, trivia_acc:, element_acc:, result: [
T(Token(kind: types.LeftBracket, text: "")),
..result
])
[T(Token(kind: types.RightBracket, text: _)), ..rest] -> {
let result = flush_element(trivia_acc:, element_acc:, result:)
do_group_array_forward(
children: rest,
trivia_acc: [],
element_acc: [],
result: [T(Token(kind: types.RightBracket, text: "")), ..result],
)
}
[T(Token(kind: types.Comment, ..) as tok), ..rest] ->
case element_acc {
[] ->
do_group_array_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
element_acc: [],
result:,
)
_ ->
do_group_array_forward(
children: rest,
trivia_acc:,
element_acc: [T(tok), ..element_acc],
result:,
)
}
[T(Token(kind: types.Newline, ..) as tok), ..rest] ->
case element_acc {
[] ->
do_group_array_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
element_acc: [],
result:,
)
_ -> {
let result =
flush_element(
trivia_acc:,
element_acc: [T(tok), ..element_acc],
result:,
)
do_group_array_forward(
children: rest,
trivia_acc: [],
element_acc: [],
result:,
)
}
}
[T(Token(kind: types.Whitespace, ..) as tok), ..rest] ->
case element_acc {
[] ->
do_group_array_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
element_acc: [],
result:,
)
_ ->
do_group_array_forward(
children: rest,
trivia_acc:,
element_acc: [T(tok), ..element_acc],
result:,
)
}
[T(Token(kind: types.Comma, ..) as tok), ..rest] -> {
let element_acc = [T(tok), ..element_acc]
// Everything on the same line after comma belongs to this element
let #(element_acc, rest) = take_same_line(rest, element_acc)
let result = flush_element(trivia_acc:, element_acc:, result:)
do_group_array_forward(
children: rest,
trivia_acc: [],
element_acc: [],
result:,
)
}
[el, ..rest] ->
do_group_array_forward(
children: rest,
trivia_acc:,
element_acc: [el, ..element_acc],
result:,
)
}
}
/// Flush accumulated element tokens into an ArrayElement node with trivia.
fn flush_element(
trivia_acc trivia_acc: List(Token(TomlKind)),
element_acc element_acc: List(Element(TomlKind)),
result result: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
case element_acc {
[] ->
// No element to flush: put trivia back as bare tokens in correct order
case trivia_acc {
[] -> result
_ ->
list.fold(list.reverse(trivia_acc), result, fn(acc, tok) {
[T(tok), ..acc]
})
}
_ -> {
// Leave children and trivia in reverse order: reverse_tree will fix them.
// The element's trailing comment lives in trivia.trailing, like every
// other line-terminated node.
let #(children, trailing) = core.peel_trailing_comment_rev(element_acc)
let trivia = case trivia_acc, trailing {
[], [] -> Bare
_, _ -> Trivia(leading: trivia_acc, trailing:)
}
let node = Node(kind: types.ArrayElement, children:, trivia:)
[N(node), ..result]
}
}
}
/// Consume tokens on the same line (up to and including the newline).
/// Returns updated element_acc (reversed) and remaining children.
fn take_same_line(
children: List(Element(TomlKind)),
acc: List(Element(TomlKind)),
) -> #(List(Element(TomlKind)), List(Element(TomlKind))) {
case children {
[] -> #(acc, [])
[T(Token(kind: types.Newline, ..) as tok), ..rest] -> #(
[T(tok), ..acc],
rest,
)
[T(Token(kind: types.Whitespace, ..)) as el, ..rest] ->
take_same_line(rest, [el, ..acc])
[T(Token(kind: types.Comment, ..)) as el, ..rest] ->
take_same_line(rest, [el, ..acc])
_ -> #(acc, children)
}
}
/// Group flat inline table children (in reverse order) into KeyValue nodes.
/// LeftBrace and RightBrace stay as direct children. Each key=value entry
/// between commas becomes a KeyValue node with leading trivia.
fn group_inline_table_entries(
children: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
list.reverse(children)
|> do_group_inline_forward(trivia_acc: [], entry_acc: [], result: [])
}
fn do_group_inline_forward(
children children: List(Element(TomlKind)),
trivia_acc trivia_acc: List(Token(TomlKind)),
entry_acc entry_acc: List(Element(TomlKind)),
result result: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
case children {
[] -> flush_inline_entry(trivia_acc:, entry_acc:, result:)
[T(Token(kind: types.LeftBrace, ..)), ..rest] ->
do_group_inline_forward(children: rest, trivia_acc:, entry_acc:, result: [
T(Token(kind: types.LeftBrace, text: "")),
..result
])
[T(Token(kind: types.RightBrace, ..)), ..rest] -> {
let result = flush_inline_entry(trivia_acc:, entry_acc:, result:)
do_group_inline_forward(
children: rest,
trivia_acc: [],
entry_acc: [],
result: [T(Token(kind: types.RightBrace, text: "")), ..result],
)
}
[T(Token(kind: types.Comma, ..) as tok), ..rest] -> {
// Comma belongs to current entry, then close it
let entry_acc = [T(tok), ..entry_acc]
let #(entry_acc, rest) = take_same_line(rest, entry_acc)
let result = flush_inline_entry(trivia_acc:, entry_acc:, result:)
do_group_inline_forward(
children: rest,
trivia_acc: [],
entry_acc: [],
result:,
)
}
[T(Token(kind: types.Comment, ..) as tok), ..rest] ->
case entry_acc {
[] ->
do_group_inline_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
entry_acc: [],
result:,
)
_ ->
do_group_inline_forward(
children: rest,
trivia_acc:,
entry_acc: [T(tok), ..entry_acc],
result:,
)
}
[T(Token(kind: types.Newline, ..) as tok), ..rest] ->
case entry_acc {
[] ->
do_group_inline_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
entry_acc: [],
result:,
)
_ -> {
let entry_acc = [T(tok), ..entry_acc]
let result = flush_inline_entry(trivia_acc:, entry_acc:, result:)
do_group_inline_forward(
children: rest,
trivia_acc: [],
entry_acc: [],
result:,
)
}
}
[T(Token(kind: types.Whitespace, ..) as tok), ..rest] ->
case entry_acc {
[] ->
do_group_inline_forward(
children: rest,
trivia_acc: [tok, ..trivia_acc],
entry_acc: [],
result:,
)
_ ->
do_group_inline_forward(
children: rest,
trivia_acc:,
entry_acc: [T(tok), ..entry_acc],
result:,
)
}
[el, ..rest] ->
do_group_inline_forward(
children: rest,
trivia_acc:,
entry_acc: [el, ..entry_acc],
result:,
)
}
}
fn flush_inline_entry(
trivia_acc trivia_acc: List(Token(TomlKind)),
entry_acc entry_acc: List(Element(TomlKind)),
result result: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
case entry_acc {
[] ->
case trivia_acc {
[] -> result
_ ->
list.fold(list.reverse(trivia_acc), result, fn(acc, tok) {
[T(tok), ..acc]
})
}
_ -> {
let #(children, trailing) = core.peel_trailing_comment_rev(entry_acc)
let trivia = case trivia_acc, trailing {
[], [] -> Bare
_, _ -> Trivia(leading: trivia_acc, trailing:)
}
let node = Node(kind: types.KeyValue, children:, trivia:)
[N(node), ..result]
}
}
}
fn do_consume_array(
sp sp: Splitters,
input input: String,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
) -> #(List(Element(TomlKind)), String, String, String) {
case input {
"" -> pull_next_line_array(sp:, acc:, pending_nl:, source_rest:)
_ -> {
let #(prefix, delim, rest) = splitter.split(sp.value, input)
let acc = emit_value_prefix(prefix, acc)
case delim {
"" -> pull_next_line_array(sp:, acc:, pending_nl:, source_rest:)
"]" -> #(
[T(Token(kind: types.RightBracket, text: "")), ..acc],
rest,
pending_nl,
source_rest,
)
"," ->
do_consume_array(
sp:,
input: rest,
acc: [T(Token(kind: types.Comma, text: "")), ..acc],
pending_nl:,
source_rest:,
)
" " | "\t" ->
consume_array_after_whitespace(
sp:,
input: rest,
acc:,
pending_nl:,
source_rest:,
delim:,
)
"[" -> {
let #(nested, rem, rem_nl, rem_rest) =
consume_array(sp:, input: rest, pending_nl:, source_rest:)
do_consume_array(
sp:,
input: rem,
acc: [N(nested), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"{" -> {
let #(nested, rem, rem_nl, rem_rest) =
consume_inline_table(sp:, input: rest, pending_nl:, source_rest:)
do_consume_array(
sp:,
input: rem,
acc: [N(nested), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"\"" -> {
let #(tok_, rem, rem_nl, rem_rest) =
scanners.scan_basic_string(sp, rest, pending_nl, source_rest)
do_consume_array(
sp:,
input: rem,
acc: [T(tok_), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"'" -> {
let #(tok_, rem, rem_nl, rem_rest) =
scanners.scan_literal_string(
sp:,
input: rest,
pending_nl:,
source_rest:,
)
do_consume_array(
sp:,
input: rem,
acc: [T(tok_), ..acc],
pending_nl: rem_nl,
source_rest: rem_rest,
)
}
"#" -> {
// Comment extends to end of current line. Emit it, then continue
// by pulling the next line (still inside the array).
let acc = [T(Token(kind: types.Comment, text: "#" <> rest)), ..acc]
pull_next_line_array(sp:, acc:, pending_nl:, source_rest:)
}
"=" ->
do_consume_array(
sp:,
input: rest,
acc: [T(Token(kind: types.InvalidValue, text: "=")), ..acc],
pending_nl:,
source_rest:,
)
_ -> do_consume_array(sp:, input: rest, acc:, pending_nl:, source_rest:)
}
}
}
}
fn consume_value_after_whitespace(
sp sp: Splitters,
input input: String,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
delim delim: String,
) -> #(List(Element(TomlKind)), String, String) {
{
let #(ws, rest) = core.scan_ws(delim, input)
case try_merge_datetime(acc:, ws:, rest: rest, sp:) {
Ok(#(merged_acc, new_rest)) ->
consume_value(
sp:,
input: new_rest,
acc: merged_acc,
pending_nl:,
source_rest:,
)
Error(Nil) ->
consume_value(
sp:,
input: rest,
acc: [T(Token(kind: types.Whitespace, text: ws)), ..acc],
pending_nl:,
source_rest:,
)
}
}
}
fn consume_array_after_whitespace(
sp sp: Splitters,
input input: String,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
delim delim: String,
) -> #(List(Element(TomlKind)), String, String, String) {
let #(ws, rest) = core.scan_ws(delim, input)
case try_merge_datetime(acc:, ws:, rest:, sp:) {
Ok(#(merged_acc, rest)) ->
do_consume_array(
sp:,
input: rest,
acc: merged_acc,
pending_nl:,
source_rest:,
)
Error(Nil) ->
do_consume_array(
sp:,
input: rest,
acc: [T(Token(kind: types.Whitespace, text: ws)), ..acc],
pending_nl:,
source_rest:,
)
}
}
/// Inside an open `[ ... ]`: emit the pending newline as content, pull the
/// next source line, and resume scanning. If the source is exhausted the
/// array is unterminated: we surface what we have rather than loop forever.
fn pull_next_line_array(
sp sp: Splitters,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
) -> #(List(Element(TomlKind)), String, String, String) {
let acc = case pending_nl {
"" -> acc
_ -> [T(Token(kind: types.Newline, text: pending_nl)), ..acc]
}
case source_rest {
"" -> #(acc, "", "", "")
_ -> {
let #(next_line, next_nl, more_rest) =
splitter.split(sp.line, source_rest)
do_consume_array(
sp:,
input: next_line,
acc:,
pending_nl: next_nl,
source_rest: more_rest,
)
}
}
}
// --- Inline tables (may span lines; TOML 1.1) ---
fn consume_inline_table(
sp sp: Splitters,
input input: String,
pending_nl pending_nl: String,
source_rest source_rest: String,
) -> #(Node(TomlKind), String, String, String) {
let #(children_rev, rem, rem_nl, rem_rest) =
do_consume_inline_table(
sp:,
input:,
acc: [T(Token(kind: types.LeftBrace, text: ""))],
pending_nl:,
source_rest:,
after_eq: False,
)
let children_rev = group_inline_table_entries(children_rev)
let node = greenwood.node(types.InlineTable, children_rev)
#(node, rem, rem_nl, rem_rest)
}
/// `after_eq` distinguishes key position (False) from value position (True).
/// In key position, prefixes are emitted as BareKey. In value position they
/// are classified by `tokenize_value` so unquoted barewords surface as
/// `types.InvalidValue` for the validator to reject.
fn do_consume_inline_table(
sp sp: Splitters,
input input: String,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
after_eq after_eq: Bool,
) -> #(List(Element(TomlKind)), String, String, String) {
case input {
"" ->
pull_next_line_inline_table(
sp:,
acc:,
pending_nl:,
source_rest:,
after_eq:,
)
_ -> {
let #(prefix, delim, rest) = splitter.split(sp.value, input)
let acc = case prefix, after_eq {
"", _ -> acc
_, True -> [T(core.tokenize_value(prefix)), ..acc]
_, False -> [T(Token(kind: types.BareKey, text: prefix)), ..acc]
}
case delim {
"" ->
pull_next_line_inline_table(
sp:,
acc:,
pending_nl:,
source_rest:,
after_eq:,
)
"}" -> #(
[T(Token(kind: types.RightBrace, text: "")), ..acc],
rest,
pending_nl,
source_rest,
)
"," ->
do_consume_inline_table(
sp:,
input: rest,
acc: [T(Token(kind: types.Comma, text: "")), ..acc],
pending_nl:,
source_rest:,
after_eq: False,
)
" " | "\t" -> {
let #(ws, rest_after) = core.scan_ws(delim, rest)
do_consume_inline_table(
sp:,
input: rest_after,
acc: [T(Token(kind: types.Whitespace, text: ws)), ..acc],
pending_nl:,
source_rest:,
after_eq:,
)
}
"=" ->
do_consume_inline_table(
sp:,
input: rest,
acc: [T(Token(kind: types.Equals, text: "")), ..acc],
pending_nl:,
source_rest:,
after_eq: True,
)
"[" -> {
let #(nested, input, pending_nl, source_rest) =
consume_array(sp:, input: rest, pending_nl:, source_rest:)
do_consume_inline_table(
sp:,
input:,
acc: [N(nested), ..acc],
pending_nl:,
source_rest:,
after_eq:,
)
}
"{" -> {
let #(nested, input, pending_nl, source_rest) =
consume_inline_table(sp:, input: rest, pending_nl:, source_rest:)
do_consume_inline_table(
sp:,
input:,
acc: [N(nested), ..acc],
pending_nl:,
source_rest:,
after_eq:,
)
}
"\"" -> {
let #(tok_, input, pending_nl, source_rest) =
scanners.scan_basic_string(sp, rest, pending_nl, source_rest)
do_consume_inline_table(
sp:,
input:,
acc: [T(tok_), ..acc],
pending_nl:,
source_rest:,
after_eq:,
)
}
"'" -> {
let #(tok_, input, pending_nl, source_rest) =
scanners.scan_literal_string(
sp:,
input: rest,
pending_nl:,
source_rest:,
)
do_consume_inline_table(
sp:,
input:,
acc: [T(tok_), ..acc],
pending_nl:,
source_rest:,
after_eq:,
)
}
"#" -> {
let acc = [T(Token(kind: types.Comment, text: "#" <> rest)), ..acc]
pull_next_line_inline_table(
sp:,
acc:,
pending_nl:,
source_rest:,
after_eq:,
)
}
_ ->
do_consume_inline_table(
sp:,
input: rest,
acc:,
pending_nl:,
source_rest:,
after_eq:,
)
}
}
}
}
fn pull_next_line_inline_table(
sp sp: Splitters,
acc acc: List(Element(TomlKind)),
pending_nl pending_nl: String,
source_rest source_rest: String,
after_eq after_eq: Bool,
) -> #(List(Element(TomlKind)), String, String, String) {
let acc = case pending_nl {
"" -> acc
_ -> [T(Token(kind: types.Newline, text: pending_nl)), ..acc]
}
case source_rest {
"" -> #(acc, "", "", "")
_ -> {
let #(next_line, pending_nl, source_rest) =
splitter.split(sp.line, source_rest)
do_consume_inline_table(
sp:,
input: next_line,
acc:,
pending_nl:,
source_rest:,
after_eq:,
)
}
}
}
// --- String scanners ---
/// If the key tokens contain a Dot, wrap the contiguous key tokens in a Key
/// node. Operates on reverse-ordered children, returns reverse-ordered
/// children.
///
/// Structure of `children_rev` for `[leading_ws] key_tokens [ws] = ws value`:
/// head→tail = [..value.., Equals, ws_before_eq, key_tokens.., leading_ws].
/// The Key node only encloses the key_tokens segment. Whitespace flanking it
/// (leading line ws, ws between key and `=`) stays inline as siblings.
///
/// All list segments below preserve the original head-to-tail (reverse-of-
/// forward) order of `children_rev`, so the final flatten yields a valid
/// reverse-ordered children list that `reverse_tree` flips to forward order.
fn wrap_dotted_key(
children_rev: List(Element(TomlKind)),
) -> List(Element(TomlKind)) {
let #(after_eq_rev, eq_and_key_rev) = split_at_equals_rev(children_rev, [])
case eq_and_key_rev {
[] -> children_rev
[eq_el, ..key_rev] ->
case has_dot_token(key_rev) {
False -> children_rev
True -> {
let #(trailing_ws_rev, after_ws_rev) = take_trivia_prefix(key_rev, [])
let #(key_inner_rev, leading_rev) = take_key_tokens(after_ws_rev, [])
let key_node = greenwood.node(types.Key, key_inner_rev)
list.flatten([
after_eq_rev,
[eq_el],
trailing_ws_rev,
[N(key_node)],
leading_rev,
])
}
}
}
}
fn split_at_equals_rev(
children_rev: List(Element(TomlKind)),
acc: List(Element(TomlKind)),
) -> #(List(Element(TomlKind)), List(Element(TomlKind))) {
case children_rev {
[] -> #(list.reverse(acc), [])
[T(Token(kind: types.Equals, ..)), ..] as tail -> #(list.reverse(acc), tail)
[el, ..rest] -> split_at_equals_rev(rest, [el, ..acc])
}
}
fn has_dot_token(children: List(Element(TomlKind))) -> Bool {
list.any(children, fn(el) {
case el {
T(Token(kind: types.Dot, ..)) -> True
_ -> False
}
})
}
/// Take whitespace tokens off the head of a list (in head order). Returns the
/// taken prefix in its original head order, and the unconsumed tail.
fn take_trivia_prefix(
items: List(Element(TomlKind)),
acc: List(Element(TomlKind)),
) -> #(List(Element(TomlKind)), List(Element(TomlKind))) {
case items {
[T(Token(kind: types.Whitespace, ..)) as el, ..rest] ->
take_trivia_prefix(rest, [el, ..acc])
_ -> #(list.reverse(acc), items)
}
}
/// Take the contiguous key-run from the head of a list (in head order).
/// A key-run is `BareKey | Dot | BasicString | LiteralString | Whitespace`:
/// TOML allows whitespace around dots (`a . b` is the same as `a.b`),
/// so interior whitespace belongs inside the Key node.
///
/// After greedy collection, strip any trailing whitespace (in walk order,
/// i.e., the most recently taken items). That trailing run corresponds to
/// leading line whitespace in forward order, which is _not_ part of the key.
fn take_key_tokens(
items: List(Element(TomlKind)),
acc: List(Element(TomlKind)),
) -> #(List(Element(TomlKind)), List(Element(TomlKind))) {
case items {
[T(Token(kind: types.BareKey, ..)) as el, ..rest]
| [T(Token(kind: types.Dot, ..)) as el, ..rest]
| [T(Token(kind: types.BasicString, ..)) as el, ..rest]
| [T(Token(kind: types.LiteralString, ..)) as el, ..rest]
| [T(Token(kind: types.Whitespace, ..)) as el, ..rest] ->
take_key_tokens(rest, [el, ..acc])
_ -> {
let #(stripped_acc, ws_tail) = peel_leading_ws_from_acc(acc, [])
#(list.reverse(stripped_acc), list.append(ws_tail, items))
}
}
}
/// Pop whitespace tokens off the head of `acc` (i.e., the most recently
/// taken items, which sit at the tail of the key-run in walk order).
/// Returns the trimmed acc and the popped whitespace in walk order
/// (which is `acc`-head order).
fn peel_leading_ws_from_acc(
acc: List(Element(TomlKind)),
ws_acc: List(Element(TomlKind)),
) -> #(List(Element(TomlKind)), List(Element(TomlKind))) {
case acc {
[T(Token(kind: types.Whitespace, ..)) as el, ..rest] ->
peel_leading_ws_from_acc(rest, [el, ..ws_acc])
_ -> #(acc, list.reverse(ws_acc))
}
}
// --- Table management ---
fn add_kv(state: State, kv_node: Node(TomlKind)) -> State {
case state.current_table {
NoTable -> State(..state, doc_acc: [N(kv_node), ..state.doc_acc])
Building(kind: k, children: c, trivia: t, trailing: tr) ->
State(
..state,
current_table: Building(
kind: k,
children: [N(kv_node), ..c],
trivia: t,
trailing: tr,
),
)
}
}
/// Attach leading (reverse-ordered pending) and trailing (reverse-ordered)
/// trivia to a node, leaving it `Bare` when both are empty.
fn attach_line_trivia(
node node: Node(TomlKind),
leading leading: List(Token(TomlKind)),
trailing trailing: List(Token(TomlKind)),
) -> Node(TomlKind) {
case leading, trailing {
[], [] -> node
_, _ -> Node(..node, trivia: Trivia(leading:, trailing:))
}
}
fn attach_pending_trivia(
node: Node(TomlKind),
pending_rev: List(Token(TomlKind)),
) -> Node(TomlKind) {
case pending_rev {
[] -> node
_ -> Node(..node, trivia: Trivia(leading: pending_rev, trailing: []))
}
}
// --- Finalization ---