Skip to main content

lib/dot_prompt/parser/lexer.ex

defmodule DotPrompt.Parser.Lexer do
  alias DotPrompt.Parser.Lexer.Token

  @moduledoc """
  Tokenizes .prompt files line by line.
  """

  def tokenize(content) do
    content
    |> String.split(["\r\n", "\n"])
    |> Enum.with_index(1)
    |> Enum.flat_map(fn {line, line_no} -> tokenize_line(line, line_no) end)
  end

  defp tokenize_line(line, line_no) do
    {line, doc_token} = extract_doc(line, line_no)

    trimmed = String.trim_leading(line)
    indent = String.length(line) - String.length(trimmed)
    trimmed = String.trim_trailing(trimmed)

    tokens = tokenize_trimmed(trimmed, line, line_no, indent)

    tokens = Enum.map(tokens, &%{&1 | indent: indent})
    tokens ++ Enum.map(doc_token, &%{&1 | indent: indent})
  end

  defp extract_doc(line, line_no) do
    if String.contains?(line, "->") do
      [text, doc] = String.split(line, "->", parts: 2)
      {text, [%Token{type: :doc, value: String.trim(doc), line: line_no}]}
    else
      {line, []}
    end
  end

  defp tokenize_trimmed("", _line, line_no, _indent) do
    [%Token{type: :text, value: "", line: line_no}]
  end

  defp tokenize_trimmed(trimmed, original_line, line_no, _indent) do
    cond do
      trimmed == "" ->
        [%Token{type: :text, value: "", line: line_no}]

      String.starts_with?(trimmed, "#") ->
        []

      match_block_start(trimmed, line_no) ->
        match_block_start(trimmed, line_no)

      match_block_end(trimmed, line_no) ->
        match_block_end(trimmed, line_no)

      match_condition(trimmed, line_no) ->
        match_condition(trimmed, line_no)

      trimmed == "else" ->
        [%Token{type: :else, value: "else", line: line_no}]

      match_case_start(trimmed, line_no) ->
        match_case_start(trimmed, line_no)

      match_case_label(trimmed, line_no) ->
        match_case_label(trimmed, line_no)

      match_vary_start(trimmed, line_no) ->
        match_vary_start(trimmed, line_no)

      match_fragment_static(trimmed, line_no) ->
        match_fragment_static(trimmed, line_no)

      match_fragment_dynamic(trimmed, line_no) ->
        match_fragment_dynamic(trimmed, line_no)

      match_param_def(trimmed, line_no) ->
        match_param_def(trimmed, line_no)

      match_fragment_def(trimmed, line_no) ->
        match_fragment_def(trimmed, line_no)

      match_init_item(trimmed, line_no) ->
        match_init_item(trimmed, line_no)

      true ->
        [%Token{type: :text, value: original_line, line: line_no}]
    end
  end

  defp match_block_start(trimmed, line_no) do
    cond do
      String.starts_with?(trimmed, "init do") ->
        [%Token{type: :block_start, value: "init", line: line_no}]

      String.starts_with?(trimmed, "docs do") ->
        [%Token{type: :block_start, value: "docs", line: line_no}]

      String.starts_with?(trimmed, "response do") ->
        [%Token{type: :block_start, value: "response", line: line_no}]

      # Message section blocks (system, user, context)
      String.starts_with?(trimmed, "system do") ->
        [%Token{type: :block_start, value: "system", line: line_no}]

      String.starts_with?(trimmed, "user do") ->
        [%Token{type: :block_start, value: "user", line: line_no}]

      String.starts_with?(trimmed, "context do") ->
        [%Token{type: :block_start, value: "context", line: line_no}]

      true ->
        nil
    end
  end

  defp match_block_end(trimmed, line_no) do
    cond do
      Regex.match?(~r/^end\s+(@?[\w\d_]+)$/, trimmed) ->
        [_, value] = Regex.run(~r/^end\s+(@?[\w\d_]+)$/, trimmed)
        [%Token{type: :block_end, value: String.trim(value), line: line_no}]

      trimmed == "end" ->
        [%Token{type: :block_end, value: nil, line: line_no}]

      Regex.match?(~r/^end\s+init$/, trimmed) ->
        [%Token{type: :block_end, value: "init", line: line_no}]

      Regex.match?(~r/^end\s+docs$/, trimmed) ->
        [%Token{type: :block_end, value: "docs", line: line_no}]

      Regex.match?(~r/^end\s+response$/, trimmed) ->
        [%Token{type: :block_end, value: "response", line: line_no}]

      true ->
        nil
    end
  end

  defp match_condition(trimmed, line_no) do
    if Regex.match?(~r/^(if|elif)\s+(@\w+).*?\sdo$/, trimmed) do
      [_, kind, var, cond_str] = Regex.run(~r/^(if|elif)\s+(@\w+)\s*(.*?)\sdo$/, trimmed)

      [
        %Token{
          type: :condition,
          value: %{kind: kind, var: var, cond: String.trim(cond_str)},
          line: line_no
        }
      ]
    end
  end

  defp match_case_start(trimmed, line_no) do
    if Regex.match?(~r/^case @\w+ do/, trimmed) do
      [_, var] = Regex.run(~r/^case (@\w+) do/, trimmed)
      [%Token{type: :case_start, value: var, line: line_no}]
    end
  end

  defp match_case_label(trimmed, line_no) do
    if Regex.match?(~r/^[a-zA-Z_]\w*\s+do$/, trimmed) and
         trimmed not in ["vary do", "init do", "docs do", "response do", "else"] do
      [label] = Regex.run(~r/^([a-zA-Z_]\w*)\s+do$/, trimmed)
      [%Token{type: :case_label, value: label, line: line_no}]
    end
  end

  defp match_vary_start(trimmed, line_no) do
    cond do
      Regex.match?(~r/^vary\s+(@\w+)\s+do/, trimmed) ->
        [_, var] = Regex.run(~r/^vary\s+(@\w+) do/, trimmed)
        [%Token{type: :vary_start, value: var, line: line_no}]

      trimmed == "vary do" ->
        [%Token{type: :vary_start, value: nil, line: line_no}]

      true ->
        nil
    end
  end

  defp match_fragment_static(trimmed, line_no) do
    if Regex.match?(~r/^\{[\w\-\.\/]+\}$/, trimmed) and trimmed != "{response_contract}" do
      [%Token{type: :fragment_static, value: trimmed, line: line_no}]
    end
  end

  defp match_fragment_dynamic(trimmed, line_no) do
    if Regex.match?(~r/^\{\{[\w\-\.\/]+\}\}$/, trimmed) do
      [%Token{type: :fragment_dynamic, value: trimmed, line: line_no}]
    end
  end

  defp match_param_def(trimmed, line_no) do
    if Regex.match?(~r/^@[\w\d_]+:\s*(.*)$/, trimmed) do
      [_, name, type_info] = Regex.run(~r/^(@[\w\d_]+):\s*(.*)$/, trimmed)

      if name == "@version" do
        [%Token{type: :init_item, value: "version", meta: String.trim(type_info), line: line_no}]
      else
        [%Token{type: :param_def, value: name, meta: String.trim(type_info), line: line_no}]
      end
    end
  end

  defp match_fragment_def(trimmed, line_no) do
    if Regex.match?(~r/^\{{1,2}[\w\-\.\/]+\}{1,2}: ?/, trimmed) do
      [_, name, type_info] = Regex.run(~r/^(\{{1,2}[\w\-\.\/]+\}{1,2}):\s*(.*)$/, trimmed)
      [%Token{type: :fragment_def, value: name, meta: String.trim(type_info), line: line_no}]
    end
  end

  defp match_init_item(trimmed, line_no) do
    cond do
      Regex.match?(~r/^(def|params|fragments):\s*(.*)$/, trimmed) ->
        [_, key, val] = Regex.run(~r/^(def|params|fragments):\s*(.*)$/, trimmed)

        if String.trim(val) == "" do
          [%Token{type: :case_label, value: key, line: line_no}]
        else
          [%Token{type: :init_item, value: key, meta: String.trim(val), line: line_no}]
        end

      Regex.match?(~r/^([a-zA-Z0-9_\-\.]+):\s*(.*)$/, trimmed) ->
        [_, key, meta] = Regex.run(~r/^([a-zA-Z0-9_\-\.]+):\s*(.*)$/, trimmed)
        [%Token{type: :init_item, value: key, meta: String.trim(meta), line: line_no}]

      true ->
        nil
    end
  end
end