Skip to main content

src/swatch.gleam

//// Swatch is a CSS syntax highlighter.
////
//// Use [`to_tokens`](#to_tokens) for classified tokens, or
//// [`to_html`](#to_html) / [`to_ansi`](#to_ansi) to render directly.
////
//// Based on CSS Syntax Level 3.

import gleam/list
import gleam/string
import gleam_community/ansi
import houdini
import swatch/internal/lexer.{
  type LexToken, LexAtKeyword, LexCdc, LexCdo, LexCloseBrace, LexCloseBracket,
  LexCloseParen, LexColon, LexComma, LexComment, LexDelim, LexDimension,
  LexFunction, LexHash, LexIdent, LexNumber, LexOpenBrace, LexOpenBracket,
  LexOpenParen, LexPercentage, LexSemicolon, LexString, LexUrange, LexUrlBody,
  LexWhitespace, annotate_nested, is_valid_hex_color, lex, lex_token_to_source,
}

/// A CSS token.
pub type Token {
  /// Spaces, tabs, newlines.
  Whitespace(String)
  /// A `/* ... */` block comment, including the delimiters.
  Comment(String)
  /// An element selector (`div`, `h1`, `*`) or other bare selector identifier.
  Selector(String)
  /// A class selector, including the leading `.` (e.g. `.btn`).
  ClassSelector(String)
  /// An id selector, including the leading `#` (e.g. `#header`).
  IdSelector(String)
  /// A pseudo-class or pseudo-element (e.g. `:hover`, `::before`).
  PseudoSelector(String)
  /// The attribute name in an attribute selector (`type` in `[type="text"]`),
  /// or a named namespace prefix (`ns` in `[ns|attr]`). The universal prefix
  /// (`*` in `[*|attr]`) emits as `Selector`.
  AttributeName(String)
  /// An unquoted attribute value (`text` in `[type=text]`). A quoted
  /// value emits as `String`.
  AttributeValue(String)
  /// The case-sensitivity flag (`i` or `s`) at the end of an attribute
  /// selector.
  AttributeFlag(String)
  /// An at-rule keyword, including the leading `@` (e.g. `@media`).
  AtRule(String)
  /// A property name on the left-hand side of a declaration (e.g. `color`).
  Property(String)
  /// A custom property name, including the leading `--` (e.g. `--brand`).
  Variable(String)
  /// A quoted string (including the quotes), or the unquoted body of a
  /// `url(...)` call.
  String(String)
  /// A numeric literal (e.g. `10`, `1.5`, `-2`).
  Number(String)
  /// A unit attached to a number (e.g. `px`, `em`, `%`).
  Unit(String)
  /// A `#` followed by hex digits in value position (e.g. `#fff`).
  HexColor(String)
  /// A function name immediately followed by `(` (e.g. `rgb`, `var`, `calc`).
  Function(String)
  /// An identifier in value position or at-rule prelude (e.g. `red`,
  /// `print`, `and`), or a `<urange>` (e.g. `U+0025-00FF`).
  Keyword(String)
  /// The `!important` annotation.
  Important(String)
  /// Arithmetic and combinators (`+`, `-`, `*`, `/`, `>`, `~`, `=`, `!`),
  /// range comparisons (`<`, `<=`, `>=`), and `|` / `||` (namespace
  /// separator and column combinator). Nesting `&` is a `Selector`.
  Operator(String)
  /// Structural punctuation: `{`, `}`, `;`, `:`, `,`, `(`, `)`, and
  /// `[`/`]` or `.` outside selector contexts.
  Punctuation(String)
  /// Anything that did not match a more specific category.
  Other(String)
}

/// Tokenize some CSS source. The returned tokens, concatenated, will
/// reproduce the original source.
pub fn to_tokens(code: String) -> List(Token) {
  classify(lex(code))
}

/// The verbatim source text a token was cut from, including any sigil or
/// delimiters (`.` for a class, the quotes of a string, `/* */` for a
/// comment).
pub fn token_to_source(token: Token) -> String {
  case token {
    Whitespace(s) -> s
    Comment(s) -> s
    Selector(s) -> s
    ClassSelector(s) -> s
    IdSelector(s) -> s
    PseudoSelector(s) -> s
    AttributeName(s) -> s
    AttributeValue(s) -> s
    AttributeFlag(s) -> s
    AtRule(s) -> s
    Property(s) -> s
    Variable(s) -> s
    String(s) -> s
    Number(s) -> s
    Unit(s) -> s
    HexColor(s) -> s
    Function(s) -> s
    Keyword(s) -> s
    Important(s) -> s
    Operator(s) -> s
    Punctuation(s) -> s
    Other(s) -> s
  }
}

/// Concatenate a token list back into source text — the inverse of
/// [`to_tokens`](#to_tokens). `to_source(to_tokens(css)) == css` holds for
/// any input.
pub fn to_source(tokens: List(Token)) -> String {
  tokens
  |> list.map(token_to_source)
  |> string.concat
}

/// Render CSS source as HTML. Each token is wrapped in a `<span>` with a
/// CSS class describing its kind. Wrap the result in
/// `<pre><code>...</code></pre>` and style the classes below.
///
/// | Token          | CSS class          |
/// | -------------- | ------------------ |
/// | Whitespace     | (no wrapper)       |
/// | Comment        | hl-comment         |
/// | Selector       | hl-selector        |
/// | ClassSelector  | hl-class           |
/// | IdSelector     | hl-id              |
/// | PseudoSelector | hl-pseudo          |
/// | AttributeName  | hl-attribute       |
/// | AttributeValue | hl-attribute-value |
/// | AttributeFlag  | hl-attribute-flag  |
/// | AtRule         | hl-at-rule         |
/// | Property       | hl-property        |
/// | Variable       | hl-variable        |
/// | String         | hl-string          |
/// | Number         | hl-number          |
/// | Unit           | hl-unit            |
/// | HexColor       | hl-hex             |
/// | Function       | hl-function        |
/// | Keyword        | hl-keyword         |
/// | Important      | hl-important       |
/// | Operator       | hl-operator        |
/// | Punctuation    | hl-punctuation     |
/// | Other          | hl-other           |
///
/// Starter stylesheet:
///
/// ```css
/// pre code .hl-comment         { color: #6a737d; font-style: italic }
/// pre code .hl-selector        { color: #d73a49 }
/// pre code .hl-class           { color: #6f42c1 }
/// pre code .hl-id              { color: #6f42c1 }
/// pre code .hl-pseudo          { color: #6f42c1 }
/// pre code .hl-attribute       { color: #6f42c1 }
/// pre code .hl-attribute-value { color: #032f62 }
/// pre code .hl-attribute-flag  { color: #6f42c1 }
/// pre code .hl-at-rule         { color: #d73a49 }
/// pre code .hl-property        { color: #005cc5 }
/// pre code .hl-variable        { color: #e36209 }
/// pre code .hl-string          { color: #032f62 }
/// pre code .hl-number          { color: #005cc5 }
/// pre code .hl-unit            { color: #005cc5 }
/// pre code .hl-hex             { color: #005cc5 }
/// pre code .hl-function        { color: #6f42c1 }
/// pre code .hl-keyword         { color: #22863a }
/// pre code .hl-important       { color: #d73a49; font-weight: bold }
/// pre code .hl-operator        { color: #d73a49 }
/// pre code .hl-punctuation     { color: #24292e }
/// pre code .hl-other           { color: #24292e }
/// ```
pub fn to_html(code: String) -> String {
  code
  |> to_tokens
  |> tokens_to_html
}

/// Render an already-tokenized list as HTML. Like [`to_html`](#to_html) but
/// skips re-tokenizing, for callers that already hold the token list.
pub fn tokens_to_html(tokens: List(Token)) -> String {
  tokens
  |> list.map(token_to_html)
  |> string.concat
}

/// Render CSS source for the terminal using ANSI color escapes.
///
/// | Token                                                                                         | Color       |
/// | --------------------------------------------------------------------------------------------- | ----------- |
/// | Selector, ClassSelector, IdSelector, PseudoSelector, AttributeName, AttributeFlag, Keyword    | yellow      |
/// | Property, Variable                                                                            | cyan        |
/// | String, Number, Unit, HexColor, AttributeValue                                                | green       |
/// | Function                                                                                      | blue        |
/// | AtRule, Operator                                                                              | magenta     |
/// | Important                                                                                     | bold red    |
/// | Comment                                                                                       | italic gray |
/// | Whitespace, Punctuation, Other                                                                | reset       |
///
/// Structural tokens use `ansi.reset` so an unclosed attribute from
/// upstream text can't bleed into characters like `{` and `}`.
pub fn to_ansi(code: String) -> String {
  code
  |> to_tokens
  |> tokens_to_ansi
}

/// Render an already-tokenized list for the terminal. Like
/// [`to_ansi`](#to_ansi), but skips re-tokenizing for callers that already
/// hold the token list.
pub fn tokens_to_ansi(tokens: List(Token)) -> String {
  tokens
  |> list.map(token_to_ansi)
  |> string.concat
}

// --- Classifier -------------------------------------------------------------
//
// Walks the flat `LexToken` stream and threads mode + brace/parenthesis stacks
// to decide which public `Token` each lex token (or short run) becomes.

// Each token paired with its `annotate_nested` flag (see lexer): does the
// forward run from here reach a top-level `{` before `;`/`}`?
type Annotated =
  #(LexToken, Bool)

// The next token's nested-rule flag (`False` at EOF).
fn head_flag(tokens: List(Annotated)) -> Bool {
  case tokens {
    [#(_, flag), ..] -> flag
    [] -> False
  }
}

fn classify(tokens: List(LexToken)) -> List(Token) {
  classify_loop(list.zip(tokens, annotate_nested(tokens)), initial_state(), [])
}

fn classify_loop(
  tokens: List(Annotated),
  state: State,
  out: List(Token),
) -> List(Token) {
  case tokens {
    [] -> list.reverse(out)
    [token, ..rest] -> {
      let #(rest2, state2, out2) = classify_one(token, rest, state, out)
      classify_loop(rest2, state2, out2)
    }
  }
}

fn classify_one(
  atoken: Annotated,
  rest: List(Annotated),
  state: State,
  out: List(Token),
) -> #(List(Annotated), State, List(Token)) {
  let #(token, flag) = atoken
  case token {
    LexWhitespace(s) -> #(rest, state, [Whitespace(s), ..out])
    LexComment(s) -> #(rest, state, [Comment(s), ..out])
    LexCdo -> #(rest, state, [Comment("<!--"), ..out])
    LexCdc -> #(rest, state, [Comment("-->"), ..out])
    LexString(s) -> #(rest, state, [String(s), ..out])
    LexUrlBody(s) -> #(rest, state, [String(s), ..out])
    LexNumber(n) -> #(rest, state, [Number(n), ..out])
    LexPercentage(n) -> #(rest, state, [Unit("%"), Number(n), ..out])
    LexDimension(n, u) -> #(rest, state, [Unit(u), Number(n), ..out])
    LexUrange(s) -> #(rest, state, [Keyword(s), ..out])

    LexComma -> #(rest, state, [Punctuation(","), ..out])
    LexCloseBracket -> #(rest, state, [Punctuation("]"), ..out])

    LexOpenBrace -> {
      let state2 =
        State(
          ..state,
          mode: PropertyMode,
          brace_stack: [PropertyMode, ..state.brace_stack],
          at_rule: "",
        )
      #(rest, state2, [Punctuation("{"), ..out])
    }

    LexCloseBrace -> {
      let stack2 = case state.brace_stack {
        [] -> []
        [_, ..tail] -> tail
      }
      let outer_mode = case stack2 {
        [] -> SelectorMode
        [mode, ..] -> mode
      }
      let state2 =
        State(..state, mode: outer_mode, brace_stack: stack2, at_rule: "")
      #(rest, state2, [Punctuation("}"), ..out])
    }

    LexSemicolon -> {
      let new_mode = case state.mode {
        ValueMode -> PropertyMode
        AtRuleMode ->
          case state.brace_stack {
            [mode, ..] -> mode
            [] -> SelectorMode
          }
        other -> other
      }
      let state2 = State(..state, mode: new_mode, at_rule: "")
      #(rest, state2, [Punctuation(";"), ..out])
    }

    LexColon ->
      case rest {
        [#(LexColon, _), ..rest2] ->
          case leading_name(rest2) {
            Ok(#(name, rest3)) -> #(rest3, state, [
              PseudoSelector("::" <> name),
              ..out
            ])
            Error(_) -> #(rest2, state, [Other("::"), ..out])
          }
        _ -> classify_colon(rest, state, out)
      }

    LexAtKeyword(s) -> {
      let state2 = State(..state, mode: AtRuleMode, at_rule: s)
      #(rest, state2, [AtRule(s), ..out])
    }

    LexHash(s) -> {
      let state = case state.mode {
        PropertyMode -> promote_if_nested(state, head_flag(rest))
        _ -> state
      }
      let name = string.drop_start(s, 1)
      let token = case state.mode {
        ValueMode ->
          case is_valid_hex_color(name) {
            True -> HexColor(s)
            False -> Other(s)
          }
        SelectorMode -> IdSelector(s)
        _ -> Other(s)
      }
      #(rest, state, [token, ..out])
    }

    LexIdent(name) ->
      case string.starts_with(name, "--") {
        True -> #(rest, state, [Variable(name), ..out])
        False ->
          case state.mode {
            SelectorMode -> #(rest, state, [Selector(name), ..out])
            ValueMode -> #(rest, state, [Keyword(name), ..out])
            AtRuleMode -> #(rest, state, [Keyword(name), ..out])
            PropertyMode ->
              case head_flag(rest) {
                True -> #(rest, State(..state, mode: SelectorMode), [
                  Selector(name),
                  ..out
                ])
                False -> #(rest, state, [Property(name), ..out])
              }
          }
      }

    LexFunction(name) ->
      case rest {
        [#(LexOpenParen, _), ..after] -> {
          let new_mode =
            function_context_flip(
              string.lowercase(name),
              string.lowercase(state.at_rule),
              state.mode,
            )
          let state2 =
            State(..state, mode: new_mode, parenthesis_stack: [
              FunctionCall(state.mode),
              ..state.parenthesis_stack
            ])
          #(after, state2, [Punctuation("("), Function(name), ..out])
        }
        _ -> #(rest, state, [Function(name), ..out])
      }

    LexOpenParen -> {
      let new_mode = case
        at_prelude_grouping_level(state.at_rule, state.parenthesis_stack)
      {
        True -> prelude_parenthesis_mode(state.at_rule)
        False -> state.mode
      }
      let state2 =
        State(
          ..state,
          parenthesis_stack: [Grouping, ..state.parenthesis_stack],
          mode: new_mode,
        )
      #(rest, state2, [Punctuation("("), ..out])
    }

    LexCloseParen -> {
      let #(popped, rest_stack) = case state.parenthesis_stack {
        [head, ..tail] -> #(Ok(head), tail)
        [] -> #(Error(Nil), [])
      }
      let new_mode = case popped {
        Ok(FunctionCall(restore_mode)) -> restore_mode
        Ok(Grouping) ->
          case at_prelude_grouping_level(state.at_rule, rest_stack) {
            True -> AtRuleMode
            False -> state.mode
          }
        Error(_) -> state.mode
      }
      let state2 = State(..state, mode: new_mode, parenthesis_stack: rest_stack)
      #(rest, state2, [Punctuation(")"), ..out])
    }

    LexOpenBracket -> {
      // The `[`'s own flag (scan starts at the bracket, not the next token).
      let state = case state.mode {
        PropertyMode -> promote_if_nested(state, flag)
        _ -> state
      }
      case state.mode {
        SelectorMode -> classify_attribute_selector(rest, state, out)
        _ -> #(rest, state, [Punctuation("["), ..out])
      }
    }

    LexDelim(s) -> classify_delim(s, rest, state, out)
  }
}

// Flip PropertyMode → SelectorMode when the flag marks a nested-rule prelude.
fn promote_if_nested(state: State, nested: Bool) -> State {
  case nested {
    True -> State(..state, mode: SelectorMode)
    False -> state
  }
}

fn classify_colon(
  rest: List(Annotated),
  state: State,
  out: List(Token),
) -> #(List(Annotated), State, List(Token)) {
  let state = case state.mode {
    PropertyMode -> promote_if_nested(state, head_flag(rest))
    _ -> state
  }
  case state.mode, state.parenthesis_stack {
    PropertyMode, _ -> #(rest, State(..state, mode: ValueMode), [
      Punctuation(":"),
      ..out
    ])
    SelectorMode, _ -> classify_pseudo(rest, state, out)
    AtRuleMode, [] -> classify_pseudo(rest, state, out)
    _, _ -> #(rest, state, [Punctuation(":"), ..out])
  }
}

fn classify_pseudo(
  rest: List(Annotated),
  state: State,
  out: List(Token),
) -> #(List(Annotated), State, List(Token)) {
  case leading_name(rest) {
    Ok(#(name, rest2)) -> #(rest2, state, [PseudoSelector(":" <> name), ..out])
    Error(_) -> #(rest, state, [Punctuation(":"), ..out])
  }
}

fn leading_name(
  tokens: List(Annotated),
) -> Result(#(String, List(Annotated)), Nil) {
  case tokens {
    [#(LexIdent(name), _), ..rest] -> Ok(#(name, rest))
    [#(LexFunction(name), _), ..rest] -> Ok(#(name, rest))
    _ -> Error(Nil)
  }
}

fn classify_delim(
  s: String,
  rest: List(Annotated),
  state: State,
  out: List(Token),
) -> #(List(Annotated), State, List(Token)) {
  case s {
    "&" -> {
      let state2 = case state.mode {
        PropertyMode | ValueMode -> State(..state, mode: SelectorMode)
        _ -> state
      }
      #(rest, state2, [Selector("&"), ..out])
    }

    "*" -> {
      let state = case state.mode {
        PropertyMode -> promote_if_nested(state, head_flag(rest))
        _ -> state
      }
      case state.mode {
        SelectorMode -> #(rest, state, [Selector("*"), ..out])
        _ -> #(rest, state, [Operator("*"), ..out])
      }
    }

    "." -> {
      let state = case state.mode {
        PropertyMode -> promote_if_nested(state, head_flag(rest))
        _ -> state
      }
      case state.mode {
        SelectorMode ->
          case leading_name(rest) {
            Ok(#(name, rest2)) -> #(rest2, state, [
              ClassSelector("." <> name),
              ..out
            ])
            Error(_) -> #(rest, state, [Punctuation("."), ..out])
          }
        _ -> #(rest, state, [Punctuation("."), ..out])
      }
    }

    "<" ->
      case rest {
        [#(LexDelim("="), _), ..rest2] -> #(rest2, state, [
          Operator("<="),
          ..out
        ])
        _ -> #(rest, state, [Operator("<"), ..out])
      }
    ">" ->
      case rest {
        [#(LexDelim("="), _), ..rest2] -> #(rest2, state, [
          Operator(">="),
          ..out
        ])
        _ -> #(rest, state, [Operator(">"), ..out])
      }
    "|" ->
      case rest {
        [#(LexDelim("|"), _), ..rest2] -> #(rest2, state, [
          Operator("||"),
          ..out
        ])
        _ -> #(rest, state, [Operator("|"), ..out])
      }

    "!" ->
      case important_match(rest) {
        Ok(#(text, rest2)) -> #(rest2, state, [Important(text), ..out])
        Error(_) -> #(rest, state, [Operator("!"), ..out])
      }

    "/" | "~" | "=" | "+" | "-" -> #(rest, state, [Operator(s), ..out])

    _ -> #(rest, state, [Other(s), ..out])
  }
}

fn important_match(
  tokens: List(Annotated),
) -> Result(#(String, List(Annotated)), Nil) {
  important_match_loop(tokens, "!")
}

fn important_match_loop(
  tokens: List(Annotated),
  acc: String,
) -> Result(#(String, List(Annotated)), Nil) {
  case tokens {
    [#(LexWhitespace(s), _), ..rest] -> important_match_loop(rest, acc <> s)
    [#(LexComment(s), _), ..rest] -> important_match_loop(rest, acc <> s)
    [#(LexIdent(name), _), ..rest] ->
      case string.lowercase(name) == "important" {
        True -> Ok(#(acc <> name, rest))
        False -> Error(Nil)
      }
    _ -> Error(Nil)
  }
}

// Classify an attribute-selector body. The opening `[` is already emitted;
// consume through the closing `]` or EOF. `state` is unchanged — a body touches
// neither mode nor the stacks.
fn classify_attribute_selector(
  rest: List(Annotated),
  state: State,
  out: List(Token),
) -> #(List(Annotated), State, List(Token)) {
  let #(remaining, out2) =
    classify_attribute_body_loop(rest, BeforeMatcher, [Punctuation("["), ..out])
  #(remaining, state, out2)
}

// `attribute_head` decides each head token; this loop and
// `take_attribute_other_token_run` share it. Directly self-recursive, so it's
// TCO'd and JS-stack-safe on a long body.
fn classify_attribute_body_loop(
  tokens: List(Annotated),
  position: AttributePosition,
  out: List(Token),
) -> #(List(Annotated), List(Token)) {
  case tokens {
    [] -> #([], out)
    [#(token, _), ..rest] ->
      case attribute_head(token, rest, position) {
        HeadClose(remaining) -> #(remaining, [Punctuation("]"), ..out])
        HeadEmit(emitted, remaining, position2) ->
          classify_attribute_body_loop(
            remaining,
            position2,
            list.fold(emitted, out, fn(acc, t) { [t, ..acc] }),
          )
        // `Other` doesn't advance `position`; the run shares one verdict and
        // consumes at least one token, so the loop progresses.
        HeadOther -> {
          let #(text, remaining) =
            take_attribute_other_token_run(tokens, position, "")
          classify_attribute_body_loop(remaining, position, [Other(text), ..out])
        }
      }
  }
}

// The head of an attribute body at `position`: a construct to emit
// (`HeadEmit`), the closing `]` (`HeadClose`), or a token to fold into `Other`
// (`HeadOther`).
type AttributeHead {
  HeadEmit(
    emitted: List(Token),
    remaining: List(Annotated),
    position: AttributePosition,
  )
  HeadClose(remaining: List(Annotated))
  HeadOther
}

fn attribute_head(
  token: LexToken,
  rest: List(Annotated),
  position: AttributePosition,
) -> AttributeHead {
  case token {
    LexCloseBracket -> HeadClose(rest)
    LexWhitespace(s) -> HeadEmit([Whitespace(s)], rest, position)
    LexComment(s) -> HeadEmit([Comment(s)], rest, position)
    LexString(s) ->
      HeadEmit([String(s)], rest, advance_past_attribute_value(position))
    LexIdent(name) -> {
      let #(emitted, position2) = emit_attribute_identifier(name, position)
      HeadEmit([emitted], rest, position2)
    }
    // The name classifies as an identifier; its `(` folds into Other.
    LexFunction(name) -> {
      let #(emitted, position2) = emit_attribute_identifier(name, position)
      HeadEmit([emitted], rest, position2)
    }
    LexDelim(d) -> attribute_delim_head(d, rest, position)
    _ -> HeadOther
  }
}

// Matchers and the namespace bar. A matcher's two delims (`~` then `=`) are
// merged here. `*|` is the universal namespace prefix only before a matcher;
// elsewhere the `*` folds into `Other`.
fn attribute_delim_head(
  d: String,
  rest: List(Annotated),
  position: AttributePosition,
) -> AttributeHead {
  case d {
    "=" -> HeadEmit([Operator("=")], rest, AfterMatcher)
    "~" | "^" | "$" ->
      case rest {
        [#(LexDelim("="), _), ..rest2] ->
          HeadEmit([Operator(d <> "=")], rest2, AfterMatcher)
        _ -> HeadOther
      }
    "|" ->
      case rest {
        [#(LexDelim("="), _), ..rest2] ->
          HeadEmit([Operator("|=")], rest2, AfterMatcher)
        _ -> HeadEmit([Operator("|")], rest, position)
      }
    "*" ->
      case rest {
        [#(LexDelim("="), _), ..rest2] ->
          HeadEmit([Operator("*=")], rest2, AfterMatcher)
        [#(LexDelim("|"), _), ..rest2] ->
          case position {
            BeforeMatcher ->
              HeadEmit([Selector("*"), Operator("|")], rest2, position)
            _ -> HeadOther
          }
        _ -> HeadOther
      }
    _ -> HeadOther
  }
}

// Coalesce a maximal run of `HeadOther` tokens into one string, stopping at the
// first token `attribute_head` recognizes. Directly self-recursive → TCO'd.
fn take_attribute_other_token_run(
  tokens: List(Annotated),
  position: AttributePosition,
  acc: String,
) -> #(String, List(Annotated)) {
  case tokens {
    [] -> #(acc, [])
    [#(token, _), ..rest] ->
      case attribute_head(token, rest, position) {
        HeadOther ->
          take_attribute_other_token_run(
            rest,
            position,
            acc <> lex_token_to_source(token),
          )
        _ -> #(acc, tokens)
      }
  }
}

// --- Tokenizer state machine ------------------------------------------------

type Mode {
  SelectorMode
  PropertyMode
  ValueMode
  AtRuleMode
}

// Each open `(` is recorded so the matching `)` can restore mode. `Grouping`:
// bare `(`, `)` falls back on context. `FunctionCall`: carries the mode the
// function opened in.
type Parenthesis {
  Grouping
  FunctionCall(restore_mode: Mode)
}

type State {
  State(
    mode: Mode,
    brace_stack: List(Mode),
    parenthesis_stack: List(Parenthesis),
    at_rule: String,
  )
}

fn initial_state() -> State {
  State(mode: SelectorMode, brace_stack: [], parenthesis_stack: [], at_rule: "")
}

// True when the next `(` is a prelude grouping, not nested in a function call.
// `@supports (a) and (b)` → True; `@supports selector(:not(.a))` → False.
fn at_prelude_grouping_level(
  at_rule: String,
  stack: List(Parenthesis),
) -> Bool {
  at_rule != ""
  && case stack {
    [] | [Grouping, ..] -> True
    _ -> False
  }
}

// Per-at-rule prelude classification, keyed on the lowercased at-keyword.
type AtRuleContext {
  AtRuleContext(
    // Mode a bare prelude `(` opens: `SelectorMode` for a selector list
    // (`@scope`), `PropertyMode` for feature/media queries (`@supports`,
    // `@media`).
    prelude_parenthesis: Mode,
    // Prelude functional notations whose body opens a fixed mode (lowercase
    // names); an unlisted function leaves the mode unchanged.
    prelude_functions: List(#(String, Mode)),
  )
}

fn at_rule_context(at_rule: String) -> AtRuleContext {
  case string.lowercase(at_rule) {
    "@scope" ->
      AtRuleContext(prelude_parenthesis: SelectorMode, prelude_functions: [])
    "@supports" ->
      AtRuleContext(prelude_parenthesis: PropertyMode, prelude_functions: [
        #("selector", SelectorMode),
      ])
    "@container" ->
      AtRuleContext(prelude_parenthesis: PropertyMode, prelude_functions: [
        #("style", PropertyMode),
        #("scroll-state", PropertyMode),
      ])
    "@import" ->
      AtRuleContext(prelude_parenthesis: PropertyMode, prelude_functions: [
        #("supports", PropertyMode),
      ])
    _ -> AtRuleContext(prelude_parenthesis: PropertyMode, prelude_functions: [])
  }
}

// Mode flip for a functional notation in an at-rule prelude, gated on the
// active at-rule so it applies even inside an outer grouping
// (`@supports (selector(.a))`).
fn function_context_flip(
  fn_name: String,
  at_rule: String,
  current_mode: Mode,
) -> Mode {
  case list.key_find(at_rule_context(at_rule).prelude_functions, fn_name) {
    Ok(mode) -> mode
    Error(_) -> current_mode
  }
}

fn prelude_parenthesis_mode(at_rule: String) -> Mode {
  at_rule_context(at_rule).prelude_parenthesis
}

// Position in the attribute-selector body — drives which variant an identifier
// emits as: name (default / after `|`), value (after a matcher), flag (after a
// value), `Other` (after a flag). `Other` emission never advances the position.
type AttributePosition {
  BeforeMatcher
  AfterMatcher
  AfterValue
  AfterFlag
}

fn emit_attribute_identifier(
  name: String,
  position: AttributePosition,
) -> #(Token, AttributePosition) {
  case position {
    BeforeMatcher -> #(AttributeName(name), BeforeMatcher)
    AfterMatcher -> #(AttributeValue(name), AfterValue)
    AfterValue -> #(AttributeFlag(name), AfterFlag)
    AfterFlag -> #(Other(name), AfterFlag)
  }
}

// A quoted string in the value slot consumes the value; other positions don't
// advance.
fn advance_past_attribute_value(
  position: AttributePosition,
) -> AttributePosition {
  case position {
    AfterMatcher -> AfterValue
    _ -> position
  }
}

// --- HTML rendering ---------------------------------------------------------

fn token_to_html(token: Token) -> String {
  case token {
    Whitespace(s) -> s
    Comment(s) -> wrap("hl-comment", s)
    Selector(s) -> wrap("hl-selector", s)
    ClassSelector(s) -> wrap("hl-class", s)
    IdSelector(s) -> wrap("hl-id", s)
    PseudoSelector(s) -> wrap("hl-pseudo", s)
    AttributeName(s) -> wrap("hl-attribute", s)
    AttributeValue(s) -> wrap("hl-attribute-value", s)
    AttributeFlag(s) -> wrap("hl-attribute-flag", s)
    AtRule(s) -> wrap("hl-at-rule", s)
    Property(s) -> wrap("hl-property", s)
    Variable(s) -> wrap("hl-variable", s)
    String(s) -> wrap("hl-string", s)
    Number(s) -> wrap("hl-number", s)
    Unit(s) -> wrap("hl-unit", s)
    HexColor(s) -> wrap("hl-hex", s)
    Function(s) -> wrap("hl-function", s)
    Keyword(s) -> wrap("hl-keyword", s)
    Important(s) -> wrap("hl-important", s)
    Operator(s) -> wrap("hl-operator", s)
    Punctuation(s) -> wrap("hl-punctuation", s)
    Other(s) -> wrap("hl-other", s)
  }
}

fn wrap(class: String, content: String) -> String {
  "<span class=\"" <> class <> "\">" <> houdini.escape(content) <> "</span>"
}

// --- ANSI rendering ---------------------------------------------------------

fn token_to_ansi(token: Token) -> String {
  case token {
    Whitespace(s) -> ansi.reset(s)
    Comment(s) -> ansi.italic(ansi.gray(s))
    Selector(s) -> ansi.yellow(s)
    ClassSelector(s) -> ansi.yellow(s)
    IdSelector(s) -> ansi.yellow(s)
    PseudoSelector(s) -> ansi.yellow(s)
    AttributeName(s) -> ansi.yellow(s)
    AttributeValue(s) -> ansi.green(s)
    AttributeFlag(s) -> ansi.yellow(s)
    AtRule(s) -> ansi.magenta(s)
    Property(s) -> ansi.cyan(s)
    Variable(s) -> ansi.cyan(s)
    String(s) -> ansi.green(s)
    Number(s) -> ansi.green(s)
    Unit(s) -> ansi.green(s)
    HexColor(s) -> ansi.green(s)
    Function(s) -> ansi.blue(s)
    Keyword(s) -> ansi.yellow(s)
    Important(s) -> ansi.bold(ansi.red(s))
    Operator(s) -> ansi.magenta(s)
    Punctuation(s) -> ansi.reset(s)
    Other(s) -> ansi.reset(s)
  }
}