lib/expression/v2/parser.ex

defmodule Expression.V2.Parser do
  @moduledoc """
  A NimbleParsec parser for FLOIP expressions.

  FLOIP Expressions consist of plain text and of blocks. Plain text is returned untouched
  but blocks are evaluated.

  Blocks are prefixed with an `@` sign. Blocks can either have expressions between brackets or
  be used in a shorthand form when wanting to use a single function or variable substitution.

  As an example, the following are identical:

  * `@(now())` and `@now()`
  * `@contact.name` and `@(contact.name)`

  However, a full expression needs to be within brackets:

  `Tomorrow's is @(today().day + 1)`

  This parses it into an Abstract Syntax Tree (AST) which follows a style much like a Lisp would.
  It parses expressions in [Infix notation](https://en.wikipedia.org/wiki/Infix_notation) such as
  `1 + 1` and parses it into lists where the operator is the first element and the second element
  is the list of arguments for the operator.

  ```
  ["+", [1, 1]]
  ```

  Functions are expressed as:

  ```
  {"function name", [arg1, arg2]}
  ```

  Until we have a fixed scope of allowed functions, or if we can dynamically look up whether an
  `atom` is a function or a variable reference, we will need to rely on tuples to represent functions
  as otherwise the system has no means to distinguish the following AST as being a function call
  or a list as a variable:

  ```
  ["echo", [1, 2, 3]]
  ```

  Without being able to say _ahead_ of time whether or not `echo/1` is a known function, the
  system cannot reliable determine whether the result of this AST should be `["echo", [1, 2, 3]]`
  or the result of `echo(1, 2, 3)`.

  Variable references are single values.

  ```
  "contact"
  ```

  This module provides two functions for parsing. `parse/2` which will parse a full FLOIP expression
  including text and blocks, and `expression/2` which will parse expression blocks.

  Internally `parse/2` refers to the same parsers as `expression/2` for things that are expressions.
  """
  import NimbleParsec
  import Expression.DateHelpers

  # Booleans can be spelled in any mixed case
  boolean_true =
    choice([string("t"), string("T")])
    |> choice([string("r"), string("R")])
    |> choice([string("u"), string("U")])
    |> choice([string("e"), string("E")])
    |> replace(true)

  boolean_false =
    choice([string("f"), string("F")])
    |> choice([string("a"), string("A")])
    |> choice([string("l"), string("L")])
    |> choice([string("s"), string("S")])
    |> choice([string("e"), string("E")])
    |> replace(false)

  boolean =
    choice([
      boolean_true,
      boolean_false
    ])

  int =
    optional(string("-"))
    |> concat(ascii_string([?0..?9], min: 1))
    |> reduce({Enum, :join, [""]})
    |> map({String, :to_integer, []})

  # These are just regular floats, previous iteration used the
  # Decimal library but that just made some simple arithmetic
  # and comparisons more complicated than needed to be.
  float =
    optional(string("-"))
    |> integer(min: 1)
    |> string(".")
    # Using ascii string here instead of integer/2 to prevent us chopping
    # off leading zeros after the period.
    |> concat(ascii_string([?0..?9], min: 1))
    |> reduce({Enum, :join, [""]})
    |> map({String, :to_float, []})

  # This is inspired  by the NimbleParsec docs
  # https://hexdocs.pm/nimble_parsec/NimbleParsec.html#repeat_while/4
  defparsecp(
    :double_quoted_string,
    ascii_char([?"])
    |> repeat_while(
      choice([
        string(~S(\")) |> replace("\\\""),
        utf8_char([])
      ]),
      {:not_double_quote, []}
    )
    |> ascii_char([?"])
    |> reduce({List, :to_string, []})
  )

  @doc false
  defp not_double_quote(<<?", _::binary>>, context, _, _), do: {:halt, context}
  defp not_double_quote(_, context, _, _), do: {:cont, context}

  defparsecp(
    :single_quoted_string,
    ignore(ascii_char([?']))
    |> repeat_while(
      choice([
        string(~S(\')) |> replace("'"),
        utf8_char([])
      ]),
      {:not_single_quote, []}
    )
    |> ignore(ascii_char([?']))
    |> reduce(:to_double_quoted_string)
  )

  # Helper to convert a 'hello' parsed by parsec(:single_quoted_string) into
  # a "hello"
  def to_double_quoted_string(charlist) when is_list(charlist),
    do: inspect(to_string(charlist))

  @doc false
  def not_single_quote(<<?', _::binary>>, context, _, _), do: {:halt, context}
  def not_single_quote(_, context, _, _), do: {:cont, context}

  # We support single & double quoted strings.
  string_with_quotes =
    choice([
      parsec(:single_quoted_string),
      parsec(:double_quoted_string)
    ])

  # Atoms are names, these can be variable names or function names etc.
  atom =
    ascii_string([?a..?z, ?A..?Z, ?0..?9], min: 1)
    |> ascii_string([?a..?z, ?A..?Z, ?0..?9, ?_, ?-], min: 0)
    |> map({String, :downcase, []})
    |> reduce({Enum, :join, []})

  whitespace =
    choice([
      string(" "),
      string("\n"),
      string("\r")
    ])

  # Helper function to wrap parsers in, resulting in them ignoring
  # surrounding whitespace.
  #
  # This has to be an anonymous function otherwise the compiler cannot
  # find it during compilation steps.
  ignore_surrounding_whitespace = fn p ->
    ignore(repeat(whitespace))
    |> concat(p)
    |> ignore(repeat(whitespace))
  end

  list =
    ignore(string("["))
    |> wrap(
      repeat(
        parsec(:term_operator)
        |> optional(ignore(ignore_surrounding_whitespace.(string(","))))
      )
    )
    |> ignore(string("]"))

  function_arguments =
    ignore(string("("))
    |> concat(
      ignore_surrounding_whitespace.(
        wrap(
          repeat(
            parsec(:term_operator)
            |> optional(ignore(ignore_surrounding_whitespace.(string(","))))
          )
        )
      )
    )
    |> ignore(string(")"))

  function =
    atom
    |> concat(function_arguments)
    |> reduce(:as_function_tuple)

  lambda_capture =
    string("&")
    |> concat(integer(min: 1))
    |> reduce({Enum, :join, []})

  lambda =
    string("&")
    |> optional(ignore(string(" ")))
    |> choice([
      # either we get a block as a function
      function_arguments,
      # or we refer to a function directly
      wrap(parsec(:term_operator))
    ])
    |> reduce(:as_function_tuple)

  @doc false
  def as_function_tuple([binary]) when is_binary(binary), do: {binary, []}

  def as_function_tuple([binary, args]) when is_binary(binary) and is_list(args),
    do: {binary, args}

  range =
    integer(min: 1)
    |> ignore(string(".."))
    |> concat(integer(min: 1))
    |> optional(
      ignore(string("//"))
      |> concat(integer(min: 1))
    )
    |> reduce(:ensure_range)

  @doc false
  def ensure_range([first, last, step]), do: Range.new(first, last, step)
  def ensure_range([first, last]), do: Range.new(first, last)

  # A block is a expression that can be parsed and is surrounded
  # by opening & closing brackets
  block =
    ignore(string("("))
    |> parsec(:term_operator)
    |> ignore(string(")"))

  # A term is the lowest level types in an Expression
  term =
    times(
      choice([
        label(datetime(), "a datetime"),
        label(date(), "a date"),
        label(time(), "a time"),
        label(range, "a range"),
        label(float, "a float"),
        label(int, "an integer"),
        label(string_with_quotes, "a quoted string"),
        label(lambda_capture, "a capture"),
        label(lambda, "an anonymous function"),
        label(function, "a function"),
        label(boolean, "a boolean"),
        label(atom, "an atom")
      ]),
      min: 1
    )

  # Properties are only allowed to be atoms
  # So if we have a foo.bar, `bar` is property the atom
  # If we allow more complicated types here it causes confusion
  # because in `foo.false`, `false` would be parsed as a boolean type
  property =
    times(
      replace(string("."), "__property__")
      |> concat(atom),
      min: 1
    )

  # We allow parsec operators here to allow expressions
  # to be evaluated and their result to be used as a key
  # to lookup an attribute value
  attribute =
    times(
      replace(string("["), "__attribute__")
      |> concat(parsec(:term_operator))
      |> ignore(string("]")),
      min: 1
    )

  property_or_attribute = repeat(choice([property, attribute]))

  # A compound term is either a term or a term made up of terms
  # using a list or a block which may or may not have a property
  # or an attribute
  #
  # As an example `foo.bar` calls the property `bar` on term `foo`.
  compound_term_with_property_or_attribute =
    choice([
      label(list, "a list"),
      label(block, "a group"),
      term
    ])
    |> optional(property_or_attribute)

  # The following operators determine the order of operations, as per
  # https://en.wikipedia.org/wiki/Order_of_operations

  # Normally this would also have root but we don't have a shorthand for that
  # and so rather than a list of options, this is just the exponent operator.
  # This has higher precedence.
  exponentiation_operator = string("^")

  # Multiplication & division is second
  multiplication_division_operator =
    choice([
      string("*"),
      string("/")
    ])

  # Addition & subtraction are last
  addition_subtraction_operator =
    choice([
      string("+"),
      string("-"),
      string("<>"),
      string(">="),
      string(">"),
      string("!="),
      string("<="),
      string("<"),
      string("=="),
      string("&"),
      replace(string("="), "==")
    ])

  # Below are the precedence parsers, each gives the higher precedence
  # a change to parse its things _before_ it itself attempts to do so.
  # This is how the precedence is guaranteed.

  # First operator precedence parser
  defparsecp(
    :exponentiation,
    compound_term_with_property_or_attribute
    |> label("an expression")
    |> repeat(
      exponentiation_operator
      |> label("an operator")
      |> ignore_surrounding_whitespace.()
      |> concat(compound_term_with_property_or_attribute)
    )
    |> reduce(:fold_infixl)
  )

  # Second operator precedence parser
  defparsecp(
    :multiplication_division,
    parsec(:exponentiation)
    |> repeat(
      multiplication_division_operator
      |> ignore_surrounding_whitespace.()
      |> concat(parsec(:exponentiation))
    )
    |> reduce(:fold_infixl)
  )

  # Third operator precedence parser
  defparsecp(
    :term_operator,
    parsec(:multiplication_division)
    |> repeat(
      addition_subtraction_operator
      |> ignore_surrounding_whitespace.()
      |> concat(parsec(:multiplication_division))
    )
    |> reduce(:fold_infixl)
  )

  # Parses a block such as `@(1 + 1)`
  expression_block =
    ignore(string("@"))
    |> concat(wrap(block))

  # Parsed a short hand such as `@now()`
  expression_shorthand =
    ignore(string("@"))
    |> concat(wrap(reduce(compound_term_with_property_or_attribute, :fold_infixl)))

  single_at = string("@")

  # @@ should be treated as an @
  escaped_at =
    ignore(string("@"))
    |> string("@")

  # Parses any old text as long as it doesn't have
  # any @ expression markers
  text =
    empty()
    |> lookahead_not(string("@"))
    |> utf8_string([], 1)
    |> times(min: 1)
    |> reduce({Enum, :join, []})

  @doc false
  def fold_infixl(acc) do
    acc
    |> Enum.reverse()
    |> Enum.chunk_every(2)
    |> List.foldr([], fn
      [l], [] -> l
      [r, op], l -> {op, [l, r]}
    end)
  end

  @doc """
  Parse a block and return the AST

  ## Example

      iex> Expression.V2.Parser.expression("contact.age + 1")
      {:ok,  [{"+", [{"__property__", ["contact", "age"]}, 1]}], "", %{}, {1, 0}, 15}

  """
  defparsec(:expression, parsec(:term_operator))

  @doc """
  Parse an expression and return the AST

  ## Example

      iex> Expression.V2.Parser.parse("hello @world the time is @now()")
      {:ok, ["hello ", ["world"], " the time is ", [{"now", []}]], "", %{}, {1, 0}, 31}

  """
  defparsec(
    :parse,
    repeat(choice([text, escaped_at, expression_block, expression_shorthand, single_at]))
  )
end