lib/xpeg.ex

defmodule Xpeg do

  require Record
  
  Record.defrecord(:state, 
    func: :nil,
    back_stack: [],
    ret_stack: [],
    captures: [],
    cap_stack: [],
    status: :running
  )

  @moduledoc """

    XPeg is a pure Elixir pattern matching library. It provides macros to compile
    patterns and grammars (PEGs) to Elixir function which will parse a string and
    collect selected parts of the input. PEGs are not unlike regular expressions,
    but offer more power and flexibility, and have less ambiguities. (More about 
    PEGs on [Wikipedia](https://en.wikipedia.org/wiki/Parsing_expression_grammar))

    Some use cases where XPeg is useful are configuration or data file parsers,
    robust protocol implementations, input validation, lexing of programming
    languages or domain specific languages.

      p = Xpeg.peg :dict do
        :dict <- :pair * star("," * :pair) * !1
        :pair <- :word * "=" * :number * fn [a,b|cs] -> [{b,a}|cs] end
        :word <- cap(+{'a'..'z'})
        :number <- cap(+{'0'..'9'}) * fn [v|cs] -> [String.to_integer(v) | cs] end
      end

      Xpeg.match(p, "grass=4,horse=1,star=2")

  """

  def collect_captures(stack, acc, caps) do
    case {stack, acc} do
      {[{:open, s, si} | stack], _} ->
        collect_captures(stack, [{:open, s, si} | acc], caps)

      {[{:close, _sc, sic} | stack], [{:open, so, sio} | acc]} ->
        len = sic - sio
        cap = to_string(Enum.take(so, len))
        collect_captures(stack, acc, [cap | caps])

      {_, acc} ->
        {acc, caps}
    end
  end

  def collect_captures(ctx) do
    {cap_stack, captures} =
      state(ctx, :cap_stack)
      |> Enum.reverse()
      |> Xpeg.collect_captures([], [])
    state(ctx, cap_stack: cap_stack, captures: captures ++ state(ctx, :captures))
  end

  def trace(ip, cmd, s) do
    ip = to_string(ip) |> String.pad_leading(4, " ")
    s = inspect(s) |> String.slice(0, 20) |> String.pad_trailing(20, " ")
    IO.puts(" #{ip} | #{s} | #{cmd} ")
  end

  defp emit_inst(ip, inst, _options) do
    case inst do
      {:nop} ->
        quote do
        {ctx, s, si, unquote(ip + 1)}
        end

      {:any, n} ->
        quote do
          if Enum.count(s) >= unquote(n) do
            {ctx, Enum.drop(s, unquote(n)), si + unquote(n), unquote(ip + 1)}
          else
            {ctx, s, si, :fail}
          end
        end

      {:chr, c} ->
        quote do
          case s do
            [unquote(c) | s] -> {ctx, s, si + 1, unquote(ip + 1)}
            _ -> {ctx, s, si, :fail}
          end
        end

      {:set, cs} ->
        quote do
          case s do
            [c | s] when c in unquote(cs) -> {ctx, s, si + 1, unquote(ip + 1)}
            _ -> {ctx, s, si, :fail}
          end
        end

      {:span, cs} ->
        quote do
          {s1, s2} = Enum.split_while(s, fn c -> c in unquote(cs) end)
          {ctx, s2, si + Enum.count(s1), unquote(ip) + 1}
        end

      {:return} ->
        quote do
          case Xpeg.state(ctx, :ret_stack) do
            [ip | ret_stack] -> 
              ctx = Xpeg.state(ctx, ret_stack: ret_stack)
              {ctx, s, si, ip}
            [] ->
              ctx = Xpeg.state(ctx, status: :ok)
              {ctx, s, si, ip}
          end
        end

      {:choice, off_back, off_commit} ->
        quote do
          frame = %{
            ip_back: ip + unquote(off_back),
            ip_commit: ip + unquote(off_commit),
            ret_stack: Xpeg.state(ctx, :ret_stack),
            cap_stack: Xpeg.state(ctx, :cap_stack),
            s: s,
            si: si
          }

          back_stack = Xpeg.state(ctx, :back_stack)
          ctx = Xpeg.state(ctx, back_stack: [frame | back_stack])
          {ctx, s, si, unquote(ip + 1)}
        end

      {:commit} ->
        quote do
          [frame | back_stack] = Xpeg.state(ctx, :back_stack)
          ctx = Xpeg.state(ctx, back_stack: back_stack)
          {ctx, s, si, frame.ip_commit}
        end

      {:call, addr} ->
        quote do
          ret_stack = Xpeg.state(ctx, :ret_stack)
          ctx = Xpeg.state(ctx, ret_stack: [ip+1 | ret_stack])
          {ctx, s, si, unquote(addr)}
        end

      {:jump, addr} ->
        quote do
          {ctx, s, si, unquote(addr)}
        end

      {:capopen} ->
        quote do
          cap_stack = Xpeg.state(ctx, :cap_stack)
          ctx = Xpeg.state(ctx, cap_stack: [{:open, s, si} | cap_stack])
          {ctx, s, si, unquote(ip + 1)}
        end

      {:capclose} ->
        quote do
          cap_stack = Xpeg.state(ctx, :cap_stack)
          ctx = Xpeg.state(ctx, cap_stack: [{:close, s, si} | cap_stack])
          {ctx, s, si, unquote(ip + 1)}
        end

      {:code, code} ->
        quote do
          ctx = Xpeg.collect_captures(ctx)
          func = unquote(code)
          captures = func.(Xpeg.state(ctx, :captures))
          ctx = Xpeg.state(ctx, captures: captures)
          {ctx, s, si, unquote(ip + 1)}
        end

      {:fail} ->
        quote do
          case Xpeg.state(ctx, :back_stack) do
            [frame | back_stack] ->
              ctx = Xpeg.state(ctx,
                back_stack: back_stack,
                ret_stack: frame.ret_stack,
                cap_stack: frame.cap_stack
              )

              {ctx, frame.s, frame.si, frame.ip_back}

            [] ->
              ctx = Xpeg.state(ctx, status: :error)
              {ctx, s, si, 0}
          end
        end
    end
  end

  defp emit(program, options \\ []) do
    cases =
      program.instructions
      |> Enum.map(fn {ip, inst} ->
        body = emit_inst(ip, inst, options)
        body =
          if options[:trace] do
            trace = quote do: Xpeg.trace(unquote(ip), unquote(inspect(inst)), s)
            {:__block__, [], [trace, body]}
          else
            body
          end
        {:->, [], [[ip], body]}
      end)

    f = quote do
      fn ctx, s, si, ip ->
        {ctx, s, si, ip} = case ip do unquote(cases) end
        func = state(ctx, :func)
        case Xpeg.state(ctx, :status) do
          :running -> func.(ctx, s, si, ip)
          _ -> {ctx, si}
        end
      end
    end

    if options[:debug] do
      IO.puts(Macro.to_string(f))
    end

    f
  end

  defp link_one(program, rules, name) do
    instructions = rules[name]

    program = %{
      program
      | symtab: Map.put(program.symtab, name, Enum.count(program.instructions)),
        instructions: program.instructions ++ instructions ++ [{:return}]
    }
    #|> IO.inspect

    Enum.reduce(instructions, program, fn inst, program ->
      case inst do
        {:call, callname} ->
          if !Map.has_key?(rules, callname) do
            raise("XPeg: rule '#{name}' is referencing undefined rule '#{callname}'")
          end

          if !Map.has_key?(program.symtab, callname) do
            link_one(program, rules, callname)
          else
            program
          end

        _ ->
          program
      end
    end)
  end

  defp link_grammar(grammar, options) do
    if options[:dump_ir] do
      IO.inspect(grammar)
    end

    program = %{
      instructions: [],
      symtab: %{}
    }

    program = link_one(program, grammar.rules, grammar.start)

    is = program.instructions
         #|> IO.inspect(limit: :infinity)
         |> peephole()
         |> resolve_addresses(program)

    is = is ++ [{:fail, {:fail}}]

    program = %{program | instructions: is}
    program
  end

  def peephole(insts) do
    case insts do
      # tail call optimization
      [{:call, name}, {:return} | rest] ->
        [{:jump, name}, {:nop} | peephole(rest)]
      [a | rest] -> [a | peephole(rest)]
      e -> e
    end
  end

  defp resolve_addresses(insts, program) do
    Enum.with_index(insts, fn inst, ip ->
      case inst do
        {op, name} when op in [:call, :jump] ->
          {ip, {op, program.symtab[name]}}

        inst ->
          {ip, inst}
      end
    end)
  end

  def make(start, rules, options) do
    %{start: start, rules: rules}
    |> link_grammar(options)
    |> emit(options)
  end

  defmacro peg(start, [{:do, v}]) do
    make(start, Parsepatt.parse(v), [])
  end

  defmacro peg(start, options, [{:do, v}]) do
    make(start, Parsepatt.parse(v), options)
  end

  defmacro patt(v) do
    make(:anon, %{anon: Parsepatt.parse(%{}, v) ++ [{:return}]}, [])
  end

  def match(func, s) do
    ctx = state(func: func)

    {time, {ctx, match_len}} = :timer.tc(fn ->
      func.(ctx, String.to_charlist(s), 0, 0)
    end)

    ctx = collect_captures(ctx)
    %{
      captures: state(ctx, :captures),
      status: state(ctx, :status),
      time: time / 1.0e6,
      match_len: match_len
    }
  end

end

# set ft=elixir