lib/makeup_sql.ex

defmodule MakeupSql do
  @moduledoc """
  A `Makeup` lexer for ANSI SQL.
  """

  import Makeup.Lexer.Combinators
  import Makeup.Lexer.Groups
  import NimbleParsec

  @behaviour Makeup.Lexer

  @impl Makeup.Lexer
  def lex(text, opts \\ []) do
    group_prefix = Keyword.get(opts, :group_prefix, random_prefix(10))

    case root(text) do
      {:ok, tokens, "", _, _, _} ->
        tokens
        |> postprocess([])
        |> match_groups(group_prefix)

      {:ok, _tokens, not_parsed, _, _, _} ->
        raise "unsupported expression: " <> not_parsed
    end
  end

  @impl Makeup.Lexer
  def postprocess(tokens, _opts \\ []), do: tokens

  @impl Makeup.Lexer
  defgroupmatcher(:match_groups,
    parentheses: [
      open: [[{:punctuation, %{language: :sql}, "("}]],
      close: [[{:punctuation, %{language: :sql}, ")"}]]
    ],
    list: [
      open: [
        [{:punctuation, %{language: :sql}, "["}]
      ],
      close: [
        [{:punctuation, %{language: :sql}, "]"}]
      ]
    ],
    curly: [
      open: [
        [{:punctuation, %{language: :sql}, "{"}]
      ],
      close: [
        [{:punctuation, %{language: :sql}, "}"}]
      ]
    ]
  )

  keywords =
    [
      "ABORT",
      "ABORTSESSION",
      "ABS",
      "ABSOLUTE",
      "ACCESS",
      "ACCESSIBLE",
      "ACCESS_LOCK",
      "ACCOUNT",
      "ACOS",
      "ACOSH",
      "ACTION",
      "ADD",
      "ADD_MONTHS",
      "ADMIN",
      "AFTER",
      "AGGREGATE",
      "ALIAS",
      "ALL",
      "ALLOCATE",
      "ALLOW",
      "ALTER",
      "ALTERAND",
      "AMP",
      "ANALYSE",
      "ANALYZE",
      "AND",
      "ANSIDATE",
      "ANY",
      "ARE",
      "ARRAY",
      "ARRAY_AGG",
      "ARRAY_EXISTS",
      "ARRAY_MAX_CARDINALITY",
      "AS",
      "ASC",
      "ASENSITIVE",
      "ASIN",
      "ASINH",
      "ASSERTION",
      "ASSOCIATE",
      "ASUTIME",
      "ASYMMETRIC",
      "AT",
      "ATAN",
      "ATAN2",
      "ATANH",
      "ATOMIC",
      "AUDIT",
      "AUTHORIZATION",
      "AUX",
      "AUXILIARY",
      "AVE",
      "AVERAGE",
      "AVG",
      "BACKUP",
      "BEFORE",
      "BEGIN",
      "BEGIN_FRAME",
      "BEGIN_PARTITION",
      "BETWEEN",
      "BIGINT",
      "BINARY",
      "BIT",
      "BLOB",
      "BOOLEAN",
      "BOTH",
      "BREADTH",
      "BREAK",
      "BROWSE",
      "BT",
      "BUFFERPOOL",
      "BULK",
      "BUT",
      "BY",
      "BYTE",
      "BYTEINT",
      "BYTES",
      "CALL",
      "CALLED",
      "CAPTURE",
      "CARDINALITY",
      "CASCADE",
      "CASCADED",
      "CASE",
      "CASESPECIFIC",
      "CASE_N",
      "CAST",
      "CATALOG",
      "CCSID",
      "CD",
      "CEIL",
      "CEILING",
      "CHANGE",
      "CHAR",
      "CHAR2HEXINT",
      "CHARACTER",
      "CHARACTERS",
      "CHARACTER_LENGTH",
      "CHARS",
      "CHAR_LENGTH",
      "CHECK",
      "CHECKPOINT",
      "CLASS",
      "CLASSIFIER",
      "CLOB",
      "CLONE",
      "CLOSE",
      "CLUSTER",
      "CLUSTERED",
      "CM",
      "COALESCE",
      "COLLATE",
      "COLLATION",
      "COLLECT",
      "COLLECTION",
      "COLLID",
      "COLUMN",
      "COLUMN_VALUE",
      "COMMENT",
      "COMMIT",
      "COMPLETION",
      "COMPRESS",
      "COMPUTE",
      "CONCAT",
      "CONCURRENTLY",
      "CONDITION",
      "CONNECT",
      "CONNECTION",
      "CONSTRAINT",
      "CONSTRAINTS",
      "CONSTRUCTOR",
      "CONTAINS",
      "CONTAINSTABLE",
      "CONTENT",
      "CONTINUE",
      "CONVERT",
      "CONVERT_TABLE_HEADER",
      "COPY",
      "CORR",
      "CORRESPONDING",
      "COS",
      "COSH",
      "COUNT",
      "COVAR_POP",
      "COVAR_SAMP",
      "CREATE",
      "CROSS",
      "CS",
      "CSUM",
      "CT",
      "CUBE",
      "CUME_DIST",
      "CURRENT",
      "CURRENT_CATALOG",
      "CURRENT_DATE",
      "CURRENT_DEFAULT_TRANSFORM_GROUP",
      "CURRENT_LC_CTYPE",
      "CURRENT_PATH",
      "CURRENT_ROLE",
      "CURRENT_ROW",
      "CURRENT_SCHEMA",
      "CURRENT_SERVER",
      "CURRENT_TIME",
      "CURRENT_TIMESTAMP",
      "CURRENT_TIMEZONE",
      "CURRENT_TRANSFORM_GROUP_FOR_TYPE",
      "CURRENT_USER",
      "CURRVAL",
      "CURSOR",
      "CV",
      "CYCLE",
      "DATA",
      "DATABASE",
      "DATABASES",
      "DATABLOCKSIZE",
      "DATE",
      "DATEFORM",
      "DAY",
      "DAYS",
      "DAY_HOUR",
      "DAY_MICROSECOND",
      "DAY_MINUTE",
      "DAY_SECOND",
      "DBCC",
      "DBINFO",
      "DEALLOCATE",
      "DEC",
      "DECFLOAT",
      "DECIMAL",
      "DECLARE",
      "DEFAULT",
      "DEFERRABLE",
      "DEFERRED",
      "DEFINE",
      "DEGREES",
      "DEL",
      "DELAYED",
      "DELETE",
      "DENSE_RANK",
      "DENY",
      "DEPTH",
      "DEREF",
      "DESC",
      "DESCRIBE",
      "DESCRIPTOR",
      "DESTROY",
      "DESTRUCTOR",
      "DETERMINISTIC",
      "DIAGNOSTIC",
      "DIAGNOSTICS",
      "DICTIONARY",
      "DISABLE",
      "DISABLED",
      "DISALLOW",
      "DISCONNECT",
      "DISK",
      "DISTINCT",
      "DISTINCTROW",
      "DISTRIBUTED",
      "DIV",
      "DO",
      "DOCUMENT",
      "DOMAIN",
      "DOUBLE",
      "DROP",
      "DSSIZE",
      "DUAL",
      "DUMP",
      "DYNAMIC",
      "EACH",
      "ECHO",
      "EDITPROC",
      "ELEMENT",
      "ELSE",
      "ELSEIF",
      "EMPTY",
      "ENABLED",
      "ENCLOSED",
      "ENCODING",
      "ENCRYPTION",
      "END",
      "END-EXEC",
      "ENDING",
      "END_FRAME",
      "END_PARTITION",
      "EQ",
      "EQUALS",
      "ERASE",
      "ERRLVL",
      "ERROR",
      "ERRORFILES",
      "ERRORTABLES",
      "ESCAPE",
      "ESCAPED",
      "ET",
      "EVERY",
      "EXCEPT",
      "EXCEPTION",
      "EXCLUSIVE",
      "EXEC",
      "EXECUTE",
      "EXISTS",
      "EXIT",
      "EXP",
      "EXPLAIN",
      "EXTERNAL",
      "EXTRACT",
      "FALLBACK",
      "FASTEXPORT",
      "FENCED",
      "FETCH",
      "FIELDPROC",
      "FILE",
      "FILLFACTOR",
      "FILTER",
      "FINAL",
      "FIRST",
      "FIRST_VALUE",
      "FLOAT",
      "FLOAT4",
      "FLOAT8",
      "FLOOR",
      "FOR",
      "FORCE",
      "FOREIGN",
      "FORMAT",
      "FOUND",
      "FRAME_ROW",
      "FREE",
      "FREESPACE",
      "FREETEXT",
      "FREETEXTTABLE",
      "FREEZE",
      "FROM",
      "FULL",
      "FULLTEXT",
      "FUNCTION",
      "FUSION",
      "GE",
      "GENERAL",
      "GENERATED",
      "GET",
      "GIVE",
      "GLOBAL",
      "GO",
      "GOTO",
      "GRANT",
      "GRAPHIC",
      "GROUP",
      "GROUPING",
      "GROUPS",
      "GT",
      "HANDLER",
      "HASH",
      "HASHAMP",
      "HASHBAKAMP",
      "HASHBUCKET",
      "HASHROW",
      "HAVING",
      "HELP",
      "HIGH_PRIORITY",
      "HOLD",
      "HOLDLOCK",
      "HOST",
      "HOUR",
      "HOURS",
      "HOUR_MICROSECOND",
      "HOUR_MINUTE",
      "HOUR_SECOND",
      "IDENTIFIED",
      "IDENTITY",
      "IDENTITYCOL",
      "IDENTITY_INSERT",
      "IF",
      "IGNORE",
      "ILIKE",
      "IMMEDIATE",
      "IN",
      "INCLUSIVE",
      "INCONSISTENT",
      "INCREMENT",
      "INDEX",
      "INDICATOR",
      "INFILE",
      "INHERIT",
      "INITIAL",
      "INITIALIZE",
      "INITIALLY",
      "INITIATE",
      "INNER",
      "INOUT",
      "INPUT",
      "INS",
      "INSENSITIVE",
      "INSERT",
      "INSTEAD",
      "INT",
      "INT1",
      "INT2",
      "INT3",
      "INT4",
      "INT8",
      "INTEGER",
      "INTEGERDATE",
      "INTERSECT",
      "INTERSECTION",
      "INTERVAL",
      "INTO",
      "IO_AFTER_GTIDS",
      "IO_BEFORE_GTIDS",
      "IS",
      "ISNULL",
      "ISOBID",
      "ISOLATION",
      "ITERATE",
      "JAR",
      "JOIN",
      "JOURNAL",
      "JSON_ARRAY",
      "JSON_ARRAYAGG",
      "JSON_EXISTS",
      "JSON_OBJECT",
      "JSON_OBJECTAGG",
      "JSON_QUERY",
      "JSON_TABLE",
      "JSON_TABLE_PRIMITIVE",
      "JSON_VALUE",
      "KEEP",
      "KEY",
      "KEYS",
      "KILL",
      "KURTOSIS",
      "LABEL",
      "LAG",
      "LANGUAGE",
      "LARGE",
      "LAST",
      "LAST_VALUE",
      "LATERAL",
      "LC_CTYPE",
      "LE",
      "LEAD",
      "LEADING",
      "LEAVE",
      "LEFT",
      "LESS",
      "LEVEL",
      "LIKE",
      "LIKE_REGEX",
      "LIMIT",
      "LINEAR",
      "LINENO",
      "LINES",
      "LISTAGG",
      "LN",
      "LOAD",
      "LOADING",
      "LOCAL",
      "LOCALE",
      "LOCALTIME",
      "LOCALTIMESTAMP",
      "LOCATOR",
      "LOCATORS",
      "LOCK",
      "LOCKING",
      "LOCKMAX",
      "LOCKSIZE",
      "LOG",
      "LOG10",
      "LOGGING",
      "LOGON",
      "LONG",
      "LONGBLOB",
      "LONGTEXT",
      "LOOP",
      "LOWER",
      "LOW_PRIORITY",
      "LT",
      "MACRO",
      "MAINTAINED",
      "MAP",
      "MASTER_BIND",
      "MASTER_SSL_VERIFY_SERVER_CERT",
      "MATCH",
      "MATCHES",
      "MATCH_NUMBER",
      "MATCH_RECOGNIZE",
      "MATERIALIZED",
      "MAVG",
      "MAX",
      "MAXEXTENTS",
      "MAXIMUM",
      "MAXVALUE",
      "MCHARACTERS",
      "MDIFF",
      "MEDIUMBLOB",
      "MEDIUMINT",
      "MEDIUMTEXT",
      "MEMBER",
      "MERGE",
      "METHOD",
      "MICROSECOND",
      "MICROSECONDS",
      "MIDDLEINT",
      "MIN",
      "MINDEX",
      "MINIMUM",
      "MINUS",
      "MINUTE",
      "MINUTES",
      "MINUTE_MICROSECOND",
      "MINUTE_SECOND",
      "MLINREG",
      "MLOAD",
      "MLSLABEL",
      "MOD",
      "MODE",
      "MODIFIES",
      "MODIFY",
      "MODULE",
      "MONITOR",
      "MONRESOURCE",
      "MONSESSION",
      "MONTH",
      "MONTHS",
      "MSUBSTR",
      "MSUM",
      "MULTISET",
      "NAMED",
      "NAMES",
      "NATIONAL",
      "NATURAL",
      "NCHAR",
      "NCLOB",
      "NE",
      "NESTED_TABLE_ID",
      "NEW",
      "NEW_TABLE",
      "NEXT",
      "NEXTVAL",
      "NO",
      "NOAUDIT",
      "NOCHECK",
      "NOCOMPRESS",
      "NONCLUSTERED",
      "NONE",
      "NORMALIZE",
      "NOT",
      "NOTNULL",
      "NOWAIT",
      "NO_WRITE_TO_BINLOG",
      "NTH_VALUE",
      "NTILE",
      "NULLIF",
      "NULLIFZERO",
      "NULLS",
      "NUMBER",
      "NUMERIC",
      "NUMPARTS",
      "OBID",
      "OBJECT",
      "OBJECTS",
      "OCCURRENCES_REGEX",
      "OCTET_LENGTH",
      "OF",
      "OFF",
      "OFFLINE",
      "OFFSET",
      "OFFSETS",
      "OLD",
      "OLD_TABLE",
      "OMIT",
      "ON",
      "ONE",
      "ONLINE",
      "ONLY",
      "OPEN",
      "OPENDATASOURCE",
      "OPENQUERY",
      "OPENROWSET",
      "OPENXML",
      "OPERATION",
      "OPTIMIZATION",
      "OPTIMIZE",
      "OPTIMIZER_COSTS",
      "OPTION",
      "OPTIONALLY",
      "OR",
      "ORDER",
      "ORDINALITY",
      "ORGANIZATION",
      "OUT",
      "OUTER",
      "OUTFILE",
      "OUTPUT",
      "OVER",
      "OVERLAPS",
      "OVERLAY",
      "OVERRIDE",
      "PACKAGE",
      "PAD",
      "PADDED",
      "PARAMETER",
      "PARAMETERS",
      "PART",
      "PARTIAL",
      "PARTITION",
      "PARTITIONED",
      "PARTITIONING",
      "PASSWORD",
      "PATH",
      "PATTERN",
      "PCTFREE",
      "PER",
      "PERCENT",
      "PERCENTILE_CONT",
      "PERCENTILE_DISC",
      "PERCENT_RANK",
      "PERIOD",
      "PERM",
      "PERMANENT",
      "PIECESIZE",
      "PIVOT",
      "PLACING",
      "PLAN",
      "PORTION",
      "POSITION",
      "POSITION_REGEX",
      "POSTFIX",
      "POWER",
      "PRECEDES",
      "PRECISION",
      "PREFIX",
      "PREORDER",
      "PREPARE",
      "PRESERVE",
      "PREVVAL",
      "PRIMARY",
      "PRINT",
      "PRIOR",
      "PRIQTY",
      "PRIVATE",
      "PRIVILEGES",
      "PROC",
      "PROCEDURE",
      "PROFILE",
      "PROGRAM",
      "PROPORTIONAL",
      "PROTECTION",
      "PSID",
      "PTF",
      "PUBLIC",
      "PURGE",
      "QUALIFIED",
      "QUALIFY",
      "QUANTILE",
      "QUERY",
      "QUERYNO",
      "RADIANS",
      "RAISERROR",
      "RANDOM",
      "RANGE",
      "RANGE_N",
      "RANK",
      "RAW",
      "READ",
      "READS",
      "READTEXT",
      "READ_WRITE",
      "REAL",
      "RECONFIGURE",
      "RECURSIVE",
      "REF",
      "REFERENCES",
      "REFERENCING",
      "REFRESH",
      "REGEXP",
      "REGR_AVGX",
      "REGR_AVGY",
      "REGR_COUNT",
      "REGR_INTERCEPT",
      "REGR_R2",
      "REGR_SLOPE",
      "REGR_SXX",
      "REGR_SXY",
      "REGR_SYY",
      "RELATIVE",
      "RELEASE",
      "RENAME",
      "REPEAT",
      "REPLACE",
      "REPLICATION",
      "REPOVERRIDE",
      "REQUEST",
      "REQUIRE",
      "RESIGNAL",
      "RESOURCE",
      "RESTART",
      "RESTORE",
      "RESTRICT",
      "RESULT",
      "RESULT_SET_LOCATOR",
      "RESUME",
      "RET",
      "RETRIEVE",
      "RETURN",
      "RETURNING",
      "RETURNS",
      "REVALIDATE",
      "REVERT",
      "REVOKE",
      "RIGHT",
      "RIGHTS",
      "RLIKE",
      "ROLE",
      "ROLLBACK",
      "ROLLFORWARD",
      "ROLLUP",
      "ROUND_CEILING",
      "ROUND_DOWN",
      "ROUND_FLOOR",
      "ROUND_HALF_DOWN",
      "ROUND_HALF_EVEN",
      "ROUND_HALF_UP",
      "ROUND_UP",
      "ROUTINE",
      "ROW",
      "ROWCOUNT",
      "ROWGUIDCOL",
      "ROWID",
      "ROWNUM",
      "ROWS",
      "ROWSET",
      "ROW_NUMBER",
      "RULE",
      "RUN",
      "RUNNING",
      "SAMPLE",
      "SAMPLEID",
      "SAVE",
      "SAVEPOINT",
      "SCHEMA",
      "SCHEMAS",
      "SCOPE",
      "SCRATCHPAD",
      "SCROLL",
      "SEARCH",
      "SECOND",
      "SECONDS",
      "SECOND_MICROSECOND",
      "SECQTY",
      "SECTION",
      "SECURITY",
      "SECURITYAUDIT",
      "SEEK",
      "SEL",
      "SELECT",
      "SEMANTICKEYPHRASETABLE",
      "SEMANTICSIMILARITYDETAILSTABLE",
      "SEMANTICSIMILARITYTABLE",
      "SENSITIVE",
      "SEPARATOR",
      "SEQUENCE",
      "SESSION",
      "SESSION_USER",
      "SET",
      "SETRESRATE",
      "SETS",
      "SETSESSRATE",
      "SETUSER",
      "SHARE",
      "SHOW",
      "SHUTDOWN",
      "SIGNAL",
      "SIMILAR",
      "SIMPLE",
      "SIN",
      "SINH",
      "SIZE",
      "SKEW",
      "SKIP",
      "SMALLINT",
      "SOME",
      "SOUNDEX",
      "SOURCE",
      "SPACE",
      "SPATIAL",
      "SPECIFIC",
      "SPECIFICTYPE",
      "SPOOL",
      "SQL",
      "SQLEXCEPTION",
      "SQLSTATE",
      "SQLTEXT",
      "SQLWARNING",
      "SQL_BIG_RESULT",
      "SQL_CALC_FOUND_ROWS",
      "SQL_SMALL_RESULT",
      "SQRT",
      "SS",
      "SSL",
      "STANDARD",
      "START",
      "STARTING",
      "STARTUP",
      "STATE",
      "STATEMENT",
      "STATIC",
      "STATISTICS",
      "STAY",
      "STDDEV_POP",
      "STDDEV_SAMP",
      "STEPINFO",
      "STOGROUP",
      "STORED",
      "STORES",
      "STRAIGHT_JOIN",
      "STRING_CS",
      "STRUCTURE",
      "STYLE",
      "SUBMULTISET",
      "SUBSCRIBER",
      "SUBSET",
      "SUBSTR",
      "SUBSTRING",
      "SUBSTRING_REGEX",
      "SUCCEEDS",
      "SUCCESSFUL",
      "SUM",
      "SUMMARY",
      "SUSPEND",
      "SYMMETRIC",
      "SYNONYM",
      "SYSDATE",
      "SYSTEM",
      "SYSTEM_TIME",
      "SYSTEM_USER",
      "SYSTIMESTAMP",
      "TABLE",
      "TABLESAMPLE",
      "TABLESPACE",
      "TAN",
      "TANH",
      "TBL_CS",
      "TEMPORARY",
      "TERMINATE",
      "TERMINATED",
      "TEXTSIZE",
      "THAN",
      "THEN",
      "THRESHOLD",
      "TIME",
      "TIMESTAMP",
      "TIMEZONE_HOUR",
      "TIMEZONE_MINUTE",
      "TINYBLOB",
      "TINYINT",
      "TINYTEXT",
      "TITLE",
      "TO",
      "TOP",
      "TRACE",
      "TRAILING",
      "TRAN",
      "TRANSACTION",
      "TRANSLATE",
      "TRANSLATE_CHK",
      "TRANSLATE_REGEX",
      "TRANSLATION",
      "TREAT",
      "TRIGGER",
      "TRIM",
      "TRIM_ARRAY",
      "TRUNCATE",
      "TRY_CONVERT",
      "TSEQUAL",
      "TYPE",
      "UC",
      "UESCAPE",
      "UID",
      "UNDEFINED",
      "UNDER",
      "UNDO",
      "UNION",
      "UNIQUE",
      "UNKNOWN",
      "UNLOCK",
      "UNNEST",
      "UNPIVOT",
      "UNSIGNED",
      "UNTIL",
      "UPD",
      "UPDATE",
      "UPDATETEXT",
      "UPPER",
      "UPPERCASE",
      "USAGE",
      "USE",
      "USER",
      "USING",
      "UTC_DATE",
      "UTC_TIME",
      "UTC_TIMESTAMP",
      "VALIDATE",
      "VALIDPROC",
      "VALUE",
      "VALUES",
      "VALUE_OF",
      "VARBINARY",
      "VARBYTE",
      "VARCHAR",
      "VARCHAR2",
      "VARCHARACTER",
      "VARGRAPHIC",
      "VARIABLE",
      "VARIADIC",
      "VARIANT",
      "VARYING",
      "VAR_POP",
      "VAR_SAMP",
      "VCAT",
      "VERBOSE",
      "VERSIONING",
      "VIEW",
      "VIRTUAL",
      "VOLATILE",
      "VOLUMES",
      "WAIT",
      "WAITFOR",
      "WHEN",
      "WHENEVER",
      "WHERE",
      "WHILE",
      "WIDTH_BUCKET",
      "WINDOW",
      "WITH",
      "WITHIN",
      "WITHIN_GROUP",
      "WITHOUT",
      "WLM",
      "WORK",
      "WRITE",
      "WRITETEXT",
      "XMLCAST",
      "XMLEXISTS",
      "XMLNAMESPACES",
      "XOR",
      "YEAR",
      "YEARS",
      "YEAR_MONTH",
      "ZEROFILL",
      "ZEROIFNULL",
      "ZONE"
    ]
    |> Enum.reverse()

  built_in =
    [
      "ARRAY",
      "BIGINT",
      "BINARY",
      "BIT",
      "BLOB",
      "BOOLEAN",
      "CHAR",
      "CHARACTER",
      "DATE",
      "DEC",
      "DECIMAL",
      "FLOAT",
      "INT",
      "INTEGER",
      "INTERVAL",
      "NUMBER",
      "NUMERIC",
      "REAL",
      "SERIAL",
      "SMALLINT",
      "VARCHAR",
      "VARYING",
      "INT8",
      "SERIAL8",
      "TEXT"
    ]
    |> Enum.reverse()

  # lookahead to ensure we don't match keywords as prefixes of identifiers
  not_identifier_char = lookahead_not(ascii_char([?_, ?0..?9, ?a..?z, ?A..?Z]))

  keywords =
    choice((keywords ++ Enum.map(keywords, &String.downcase/1)) |> Enum.map(&string/1))
    |> concat(not_identifier_char)
    |> token(:keyword_reserved)

  built_in =
    choice((built_in ++ Enum.map(built_in, &String.downcase/1)) |> Enum.map(&string/1))
    |> concat(not_identifier_char)
    |> token(:keyword_type)

  @known_whitespace_characters [?\r, ?\s, ?\n, ?\f, ?\t]

  whitespace = ascii_string(@known_whitespace_characters, min: 1) |> token(:whitespace)

  keyword_constants =
    word_from_list(
      ["TRUE", "FALSE", "NULL", "true", "false", "null"],
      :keyword_constant
    )

  any_char = utf8_char([]) |> token(:error)

  single_line_comment =
    string("--") |> repeat(utf8_char([{:not, ?\r}, {:not, ?\n}])) |> token(:comment_single)

  multi_line_comment = many_surrounded_by(utf8_char([]), "/*", "*/") |> token(:comment_multiline)
  escaped_char = string("\\") |> utf8_string([], 1)
  double_quote_string = string_like("\"", "\"", [escaped_char], :string_double)
  single_quote_string = string_like("\'", "\'", [escaped_char], :string_single)

  number =
    ascii_char([?0..?9])
    |> repeat(ascii_char([?0..?9]))
    |> token(:number)

  name =
    ascii_char([?_, ?a..?z, ?A..?Z])
    |> repeat(ascii_char([?_, ?0..?9, ?a..?z, ?A..?Z]))
    |> token(:name)

  interpolation =
    1..9
    |> Enum.map(&"$#{&1}")
    |> Enum.map(&string/1)
    |> choice()
    |> token(:name)

  operator =
    ascii_char([?+, ?*, ?/, ?<, ?>, ?=, ?~, ?!, ?@, ?#, ?%, ?^, ?&, ?|, ?`, ??, ?-])
    |> token(:operator)

  punctuation = ascii_char([?;, ?:, ?(, ?), ?[, ?], ?., ?,]) |> token(:punctuation)

  token =
    choice([
      whitespace,
      single_line_comment,
      multi_line_comment,
      keywords,
      built_in,
      operator,
      double_quote_string,
      single_quote_string,
      interpolation,
      number,
      keyword_constants,
      name,
      punctuation,
      # fallback for not known
      any_char
    ])

  defparsec(
    :root_element,
    token |> map({__MODULE__, :__as_sql_language__, []})
  )

  defparsec(
    :root,
    repeat(parsec(:root_element))
  )

  def __as_sql_language__({ttype, meta, value}) do
    {ttype, Map.put(meta, :language, :sql), value}
  end
end