lib/makeup/lexers/swift_lexer.ex

defmodule Makeup.Lexers.SwiftLexer do
  @moduledoc """
  Lexer for the Swift language to be used with the Makeup package.

  Based on the official language reference [1].

  [1] https://docs.swift.org/swift-book/documentation/the-swift-programming-language/aboutthelanguagereference
  """

  import NimbleParsec
  import Makeup.Lexer.Combinators
  import Makeup.Lexer.Groups

  @behaviour Makeup.Lexer

  ###################################################################
  # Step #1: tokenize the input (into a list of tokens)
  ###################################################################

  @inline Application.compile_env(:makeup_html, :inline, false)

  # Lines

  # LF/U+000A
  line_feed = ascii_char([?\n])
  # CR/U+000D
  carriage_return = ascii_char([?\r])

  line_break =
    choice([
      line_feed,
      carriage_return,
      concat(line_feed, carriage_return)
    ])

  # Whitespace
  whitespace_item = [?\r, ?\s, ?\n, ?\f]
  whitespace = whitespace_item |> ascii_string(min: 1) |> token(:whitespace)

  # Literals
  #
  # A literal is the source code representation of a value of a type, such as a number or string.
  #
  # The following are examples of literals:
  #
  # 42               // Integer literal
  # 3.14159          // Floating-point literal
  # "Hello, world!"  // String literal
  # /Hello, .*/      // Regular expression literal
  # true             // Boolean literal

  ## String
  quoted_text_item = utf8_string([], 1)

  quoted_text = quoted_text_item

  string_literal_opening_delimiter = string("\"")

  string_literal_closing_delimiter = string("\"")

  static_string_literal =
    many_surrounded_by(
      quoted_text,
      string_literal_opening_delimiter,
      string_literal_closing_delimiter
    )

  string_literal = static_string_literal |> token(:literal)

  ## nil
  nil_literal = string("nil") |> token(:literal)

  ## Boolean
  boolean_literal = choice([string("true"), string("false")]) |> token(:literal)

  ## Numeric

  ### Integer

  #### Decimal
  decimal_digit = ascii_string([?0..?9], min: 1)
  decimal_literal = times(decimal_digit, min: 1)
  integer_literal = decimal_literal |> token(:number_integer)

  ### Float
  decimal_fraction = concat(string("."), decimal_literal)
  floating_point_literal = concat(decimal_literal, decimal_fraction) |> token(:number_float)

  ## All
  numeric_literal =
    choice([
      floating_point_literal,
      integer_literal
    ])

  ## All
  literal =
    choice([
      nil_literal,
      boolean_literal,
      string_literal,
      numeric_literal
    ])

  # Comments
  comment_text = utf8_string([], 1)

  ## Inline
  inline_comment =
    string("//")
    |> concat(repeat(lookahead_not(line_break) |> utf8_string([], 1)))
    |> token(:comment_single)

  ## Multi-line
  multiline_comment_text = comment_text

  multiline_comment =
    multiline_comment_text
    |> many_surrounded_by("/*", "*/")
    |> token(:comment_multiline)

  # Root

  root_element_combinator =
    choice([
      # Whitespace
      whitespace,
      # Literals
      literal,
      # Comments
      inline_comment,
      multiline_comment
    ])

  @doc false
  def __as_swift_language__({ttype, meta, value}) do
    {ttype, Map.put(meta, :language, :swift), value}
  end

  ##############################################################################
  # Semi-public API: these two functions can be used by someone who wants to
  # embed this lexer into another lexer, but other than that, they are not
  # meant to be used by end-users
  ##############################################################################

  @impl Makeup.Lexer
  defparsec(
    :root_element,
    root_element_combinator |> map({__MODULE__, :__as_swift_language__, []}),
    inline: @inline
  )

  @impl Makeup.Lexer
  defparsec(
    :root,
    repeat(parsec(:root_element)),
    inline: @inline
  )

  ###################################################################
  # Step #2: postprocess the list of tokens
  ###################################################################

  @impl Makeup.Lexer
  def postprocess(tokens, _opts \\ []) do
    tokens
  end

  #######################################################################
  # Step #3: highlight matching delimiters
  #######################################################################

  @impl Makeup.Lexer
  defgroupmatcher(:match_groups, [])

  # Finally, the public API for the lexer
  @impl Makeup.Lexer
  def lex(text, opts \\ []) do
    group_prefix = Keyword.get(opts, :group_prefix, random_prefix(10))
    {:ok, tokens, "", _, _, _} = root(text)

    tokens
    |> postprocess()
    |> match_groups(group_prefix)
  end
end