defmodule MakeupSql do
@moduledoc """
A `Makeup` lexer for ANSI SQL.
"""
import Makeup.Lexer.Combinators
import Makeup.Lexer.Groups
import NimbleParsec
@behaviour Makeup.Lexer
@impl Makeup.Lexer
def lex(text, opts \\ []) do
group_prefix = Keyword.get(opts, :group_prefix, random_prefix(10))
{:ok, tokens, "", _, _, _} = root(text)
tokens
|> postprocess([])
|> match_groups(group_prefix)
end
@impl Makeup.Lexer
def postprocess(tokens, _opts \\ []), do: tokens
@impl Makeup.Lexer
defgroupmatcher(:match_groups,
parentheses: [
open: [[{:punctuation, %{language: :sql}, "("}]],
close: [[{:punctuation, %{language: :sql}, ")"}]]
],
list: [
open: [
[{:punctuation, %{language: :sql}, "["}]
],
close: [
[{:punctuation, %{language: :sql}, "]"}]
]
],
curly: [
open: [
[{:punctuation, %{language: :sql}, "{"}]
],
close: [
[{:punctuation, %{language: :sql}, "}"}]
]
]
)
keywords =
[
"ABORT",
"ABORTSESSION",
"ABS",
"ABSOLUTE",
"ACCESS",
"ACCESSIBLE",
"ACCESS_LOCK",
"ACCOUNT",
"ACOS",
"ACOSH",
"ACTION",
"ADD",
"ADD_MONTHS",
"ADMIN",
"AFTER",
"AGGREGATE",
"ALIAS",
"ALL",
"ALLOCATE",
"ALLOW",
"ALTER",
"ALTERAND",
"AMP",
"ANALYSE",
"ANALYZE",
"AND",
"ANSIDATE",
"ANY",
"ARE",
"ARRAY",
"ARRAY_AGG",
"ARRAY_EXISTS",
"ARRAY_MAX_CARDINALITY",
"AS",
"ASC",
"ASENSITIVE",
"ASIN",
"ASINH",
"ASSERTION",
"ASSOCIATE",
"ASUTIME",
"ASYMMETRIC",
"AT",
"ATAN",
"ATAN2",
"ATANH",
"ATOMIC",
"AUDIT",
"AUTHORIZATION",
"AUX",
"AUXILIARY",
"AVE",
"AVERAGE",
"AVG",
"BACKUP",
"BEFORE",
"BEGIN",
"BEGIN_FRAME",
"BEGIN_PARTITION",
"BETWEEN",
"BIGINT",
"BINARY",
"BIT",
"BLOB",
"BOOLEAN",
"BOTH",
"BREADTH",
"BREAK",
"BROWSE",
"BT",
"BUFFERPOOL",
"BULK",
"BUT",
"BY",
"BYTE",
"BYTEINT",
"BYTES",
"CALL",
"CALLED",
"CAPTURE",
"CARDINALITY",
"CASCADE",
"CASCADED",
"CASE",
"CASESPECIFIC",
"CASE_N",
"CAST",
"CATALOG",
"CCSID",
"CD",
"CEIL",
"CEILING",
"CHANGE",
"CHAR",
"CHAR2HEXINT",
"CHARACTER",
"CHARACTERS",
"CHARACTER_LENGTH",
"CHARS",
"CHAR_LENGTH",
"CHECK",
"CHECKPOINT",
"CLASS",
"CLASSIFIER",
"CLOB",
"CLONE",
"CLOSE",
"CLUSTER",
"CLUSTERED",
"CM",
"COALESCE",
"COLLATE",
"COLLATION",
"COLLECT",
"COLLECTION",
"COLLID",
"COLUMN",
"COLUMN_VALUE",
"COMMENT",
"COMMIT",
"COMPLETION",
"COMPRESS",
"COMPUTE",
"CONCAT",
"CONCURRENTLY",
"CONDITION",
"CONNECT",
"CONNECTION",
"CONSTRAINT",
"CONSTRAINTS",
"CONSTRUCTOR",
"CONTAINS",
"CONTAINSTABLE",
"CONTENT",
"CONTINUE",
"CONVERT",
"CONVERT_TABLE_HEADER",
"COPY",
"CORR",
"CORRESPONDING",
"COS",
"COSH",
"COUNT",
"COVAR_POP",
"COVAR_SAMP",
"CREATE",
"CROSS",
"CS",
"CSUM",
"CT",
"CUBE",
"CUME_DIST",
"CURRENT",
"CURRENT_CATALOG",
"CURRENT_DATE",
"CURRENT_DEFAULT_TRANSFORM_GROUP",
"CURRENT_LC_CTYPE",
"CURRENT_PATH",
"CURRENT_ROLE",
"CURRENT_ROW",
"CURRENT_SCHEMA",
"CURRENT_SERVER",
"CURRENT_TIME",
"CURRENT_TIMESTAMP",
"CURRENT_TIMEZONE",
"CURRENT_TRANSFORM_GROUP_FOR_TYPE",
"CURRENT_USER",
"CURRVAL",
"CURSOR",
"CV",
"CYCLE",
"DATA",
"DATABASE",
"DATABASES",
"DATABLOCKSIZE",
"DATE",
"DATEFORM",
"DAY",
"DAYS",
"DAY_HOUR",
"DAY_MICROSECOND",
"DAY_MINUTE",
"DAY_SECOND",
"DBCC",
"DBINFO",
"DEALLOCATE",
"DEC",
"DECFLOAT",
"DECIMAL",
"DECLARE",
"DEFAULT",
"DEFERRABLE",
"DEFERRED",
"DEFINE",
"DEGREES",
"DEL",
"DELAYED",
"DELETE",
"DENSE_RANK",
"DENY",
"DEPTH",
"DEREF",
"DESC",
"DESCRIBE",
"DESCRIPTOR",
"DESTROY",
"DESTRUCTOR",
"DETERMINISTIC",
"DIAGNOSTIC",
"DIAGNOSTICS",
"DICTIONARY",
"DISABLE",
"DISABLED",
"DISALLOW",
"DISCONNECT",
"DISK",
"DISTINCT",
"DISTINCTROW",
"DISTRIBUTED",
"DIV",
"DO",
"DOCUMENT",
"DOMAIN",
"DOUBLE",
"DROP",
"DSSIZE",
"DUAL",
"DUMP",
"DYNAMIC",
"EACH",
"ECHO",
"EDITPROC",
"ELEMENT",
"ELSE",
"ELSEIF",
"EMPTY",
"ENABLED",
"ENCLOSED",
"ENCODING",
"ENCRYPTION",
"END",
"END-EXEC",
"ENDING",
"END_FRAME",
"END_PARTITION",
"EQ",
"EQUALS",
"ERASE",
"ERRLVL",
"ERROR",
"ERRORFILES",
"ERRORTABLES",
"ESCAPE",
"ESCAPED",
"ET",
"EVERY",
"EXCEPT",
"EXCEPTION",
"EXCLUSIVE",
"EXEC",
"EXECUTE",
"EXISTS",
"EXIT",
"EXP",
"EXPLAIN",
"EXTERNAL",
"EXTRACT",
"FALLBACK",
"FALSE",
"FASTEXPORT",
"FENCED",
"FETCH",
"FIELDPROC",
"FILE",
"FILLFACTOR",
"FILTER",
"FINAL",
"FIRST",
"FIRST_VALUE",
"FLOAT",
"FLOAT4",
"FLOAT8",
"FLOOR",
"FOR",
"FORCE",
"FOREIGN",
"FORMAT",
"FOUND",
"FRAME_ROW",
"FREE",
"FREESPACE",
"FREETEXT",
"FREETEXTTABLE",
"FREEZE",
"FROM",
"FULL",
"FULLTEXT",
"FUNCTION",
"FUSION",
"GE",
"GENERAL",
"GENERATED",
"GET",
"GIVE",
"GLOBAL",
"GO",
"GOTO",
"GRANT",
"GRAPHIC",
"GROUP",
"GROUPING",
"GROUPS",
"GT",
"HANDLER",
"HASH",
"HASHAMP",
"HASHBAKAMP",
"HASHBUCKET",
"HASHROW",
"HAVING",
"HELP",
"HIGH_PRIORITY",
"HOLD",
"HOLDLOCK",
"HOST",
"HOUR",
"HOURS",
"HOUR_MICROSECOND",
"HOUR_MINUTE",
"HOUR_SECOND",
"IDENTIFIED",
"IDENTITY",
"IDENTITYCOL",
"IDENTITY_INSERT",
"IF",
"IGNORE",
"ILIKE",
"IMMEDIATE",
"IN",
"INCLUSIVE",
"INCONSISTENT",
"INCREMENT",
"INDEX",
"INDICATOR",
"INFILE",
"INHERIT",
"INITIAL",
"INITIALIZE",
"INITIALLY",
"INITIATE",
"INNER",
"INOUT",
"INPUT",
"INS",
"INSENSITIVE",
"INSERT",
"INSTEAD",
"INT",
"INT1",
"INT2",
"INT3",
"INT4",
"INT8",
"INTEGER",
"INTEGERDATE",
"INTERSECT",
"INTERSECTION",
"INTERVAL",
"INTO",
"IO_AFTER_GTIDS",
"IO_BEFORE_GTIDS",
"IS",
"ISNULL",
"ISOBID",
"ISOLATION",
"ITERATE",
"JAR",
"JOIN",
"JOURNAL",
"JSON_ARRAY",
"JSON_ARRAYAGG",
"JSON_EXISTS",
"JSON_OBJECT",
"JSON_OBJECTAGG",
"JSON_QUERY",
"JSON_TABLE",
"JSON_TABLE_PRIMITIVE",
"JSON_VALUE",
"KEEP",
"KEY",
"KEYS",
"KILL",
"KURTOSIS",
"LABEL",
"LAG",
"LANGUAGE",
"LARGE",
"LAST",
"LAST_VALUE",
"LATERAL",
"LC_CTYPE",
"LE",
"LEAD",
"LEADING",
"LEAVE",
"LEFT",
"LESS",
"LEVEL",
"LIKE",
"LIKE_REGEX",
"LIMIT",
"LINEAR",
"LINENO",
"LINES",
"LISTAGG",
"LN",
"LOAD",
"LOADING",
"LOCAL",
"LOCALE",
"LOCALTIME",
"LOCALTIMESTAMP",
"LOCATOR",
"LOCATORS",
"LOCK",
"LOCKING",
"LOCKMAX",
"LOCKSIZE",
"LOG",
"LOG10",
"LOGGING",
"LOGON",
"LONG",
"LONGBLOB",
"LONGTEXT",
"LOOP",
"LOWER",
"LOW_PRIORITY",
"LT",
"MACRO",
"MAINTAINED",
"MAP",
"MASTER_BIND",
"MASTER_SSL_VERIFY_SERVER_CERT",
"MATCH",
"MATCHES",
"MATCH_NUMBER",
"MATCH_RECOGNIZE",
"MATERIALIZED",
"MAVG",
"MAX",
"MAXEXTENTS",
"MAXIMUM",
"MAXVALUE",
"MCHARACTERS",
"MDIFF",
"MEDIUMBLOB",
"MEDIUMINT",
"MEDIUMTEXT",
"MEMBER",
"MERGE",
"METHOD",
"MICROSECOND",
"MICROSECONDS",
"MIDDLEINT",
"MIN",
"MINDEX",
"MINIMUM",
"MINUS",
"MINUTE",
"MINUTES",
"MINUTE_MICROSECOND",
"MINUTE_SECOND",
"MLINREG",
"MLOAD",
"MLSLABEL",
"MOD",
"MODE",
"MODIFIES",
"MODIFY",
"MODULE",
"MONITOR",
"MONRESOURCE",
"MONSESSION",
"MONTH",
"MONTHS",
"MSUBSTR",
"MSUM",
"MULTISET",
"NAMED",
"NAMES",
"NATIONAL",
"NATURAL",
"NCHAR",
"NCLOB",
"NE",
"NESTED_TABLE_ID",
"NEW",
"NEW_TABLE",
"NEXT",
"NEXTVAL",
"NO",
"NOAUDIT",
"NOCHECK",
"NOCOMPRESS",
"NONCLUSTERED",
"NONE",
"NORMALIZE",
"NOT",
"NOTNULL",
"NOWAIT",
"NO_WRITE_TO_BINLOG",
"NTH_VALUE",
"NTILE",
"NULL",
"NULLIF",
"NULLIFZERO",
"NULLS",
"NUMBER",
"NUMERIC",
"NUMPARTS",
"OBID",
"OBJECT",
"OBJECTS",
"OCCURRENCES_REGEX",
"OCTET_LENGTH",
"OF",
"OFF",
"OFFLINE",
"OFFSET",
"OFFSETS",
"OLD",
"OLD_TABLE",
"OMIT",
"ON",
"ONE",
"ONLINE",
"ONLY",
"OPEN",
"OPENDATASOURCE",
"OPENQUERY",
"OPENROWSET",
"OPENXML",
"OPERATION",
"OPTIMIZATION",
"OPTIMIZE",
"OPTIMIZER_COSTS",
"OPTION",
"OPTIONALLY",
"OR",
"ORDER",
"ORDINALITY",
"ORGANIZATION",
"OUT",
"OUTER",
"OUTFILE",
"OUTPUT",
"OVER",
"OVERLAPS",
"OVERLAY",
"OVERRIDE",
"PACKAGE",
"PAD",
"PADDED",
"PARAMETER",
"PARAMETERS",
"PART",
"PARTIAL",
"PARTITION",
"PARTITIONED",
"PARTITIONING",
"PASSWORD",
"PATH",
"PATTERN",
"PCTFREE",
"PER",
"PERCENT",
"PERCENTILE_CONT",
"PERCENTILE_DISC",
"PERCENT_RANK",
"PERIOD",
"PERM",
"PERMANENT",
"PIECESIZE",
"PIVOT",
"PLACING",
"PLAN",
"PORTION",
"POSITION",
"POSITION_REGEX",
"POSTFIX",
"POWER",
"PRECEDES",
"PRECISION",
"PREFIX",
"PREORDER",
"PREPARE",
"PRESERVE",
"PREVVAL",
"PRIMARY",
"PRINT",
"PRIOR",
"PRIQTY",
"PRIVATE",
"PRIVILEGES",
"PROC",
"PROCEDURE",
"PROFILE",
"PROGRAM",
"PROPORTIONAL",
"PROTECTION",
"PSID",
"PTF",
"PUBLIC",
"PURGE",
"QUALIFIED",
"QUALIFY",
"QUANTILE",
"QUERY",
"QUERYNO",
"RADIANS",
"RAISERROR",
"RANDOM",
"RANGE",
"RANGE_N",
"RANK",
"RAW",
"READ",
"READS",
"READTEXT",
"READ_WRITE",
"REAL",
"RECONFIGURE",
"RECURSIVE",
"REF",
"REFERENCES",
"REFERENCING",
"REFRESH",
"REGEXP",
"REGR_AVGX",
"REGR_AVGY",
"REGR_COUNT",
"REGR_INTERCEPT",
"REGR_R2",
"REGR_SLOPE",
"REGR_SXX",
"REGR_SXY",
"REGR_SYY",
"RELATIVE",
"RELEASE",
"RENAME",
"REPEAT",
"REPLACE",
"REPLICATION",
"REPOVERRIDE",
"REQUEST",
"REQUIRE",
"RESIGNAL",
"RESOURCE",
"RESTART",
"RESTORE",
"RESTRICT",
"RESULT",
"RESULT_SET_LOCATOR",
"RESUME",
"RET",
"RETRIEVE",
"RETURN",
"RETURNING",
"RETURNS",
"REVALIDATE",
"REVERT",
"REVOKE",
"RIGHT",
"RIGHTS",
"RLIKE",
"ROLE",
"ROLLBACK",
"ROLLFORWARD",
"ROLLUP",
"ROUND_CEILING",
"ROUND_DOWN",
"ROUND_FLOOR",
"ROUND_HALF_DOWN",
"ROUND_HALF_EVEN",
"ROUND_HALF_UP",
"ROUND_UP",
"ROUTINE",
"ROW",
"ROWCOUNT",
"ROWGUIDCOL",
"ROWID",
"ROWNUM",
"ROWS",
"ROWSET",
"ROW_NUMBER",
"RULE",
"RUN",
"RUNNING",
"SAMPLE",
"SAMPLEID",
"SAVE",
"SAVEPOINT",
"SCHEMA",
"SCHEMAS",
"SCOPE",
"SCRATCHPAD",
"SCROLL",
"SEARCH",
"SECOND",
"SECONDS",
"SECOND_MICROSECOND",
"SECQTY",
"SECTION",
"SECURITY",
"SECURITYAUDIT",
"SEEK",
"SEL",
"SELECT",
"SEMANTICKEYPHRASETABLE",
"SEMANTICSIMILARITYDETAILSTABLE",
"SEMANTICSIMILARITYTABLE",
"SENSITIVE",
"SEPARATOR",
"SEQUENCE",
"SESSION",
"SESSION_USER",
"SET",
"SETRESRATE",
"SETS",
"SETSESSRATE",
"SETUSER",
"SHARE",
"SHOW",
"SHUTDOWN",
"SIGNAL",
"SIMILAR",
"SIMPLE",
"SIN",
"SINH",
"SIZE",
"SKEW",
"SKIP",
"SMALLINT",
"SOME",
"SOUNDEX",
"SOURCE",
"SPACE",
"SPATIAL",
"SPECIFIC",
"SPECIFICTYPE",
"SPOOL",
"SQL",
"SQLEXCEPTION",
"SQLSTATE",
"SQLTEXT",
"SQLWARNING",
"SQL_BIG_RESULT",
"SQL_CALC_FOUND_ROWS",
"SQL_SMALL_RESULT",
"SQRT",
"SS",
"SSL",
"STANDARD",
"START",
"STARTING",
"STARTUP",
"STATE",
"STATEMENT",
"STATIC",
"STATISTICS",
"STAY",
"STDDEV_POP",
"STDDEV_SAMP",
"STEPINFO",
"STOGROUP",
"STORED",
"STORES",
"STRAIGHT_JOIN",
"STRING_CS",
"STRUCTURE",
"STYLE",
"SUBMULTISET",
"SUBSCRIBER",
"SUBSET",
"SUBSTR",
"SUBSTRING",
"SUBSTRING_REGEX",
"SUCCEEDS",
"SUCCESSFUL",
"SUM",
"SUMMARY",
"SUSPEND",
"SYMMETRIC",
"SYNONYM",
"SYSDATE",
"SYSTEM",
"SYSTEM_TIME",
"SYSTEM_USER",
"SYSTIMESTAMP",
"TABLE",
"TABLESAMPLE",
"TABLESPACE",
"TAN",
"TANH",
"TBL_CS",
"TEMPORARY",
"TERMINATE",
"TERMINATED",
"TEXTSIZE",
"THAN",
"THEN",
"THRESHOLD",
"TIME",
"TIMESTAMP",
"TIMEZONE_HOUR",
"TIMEZONE_MINUTE",
"TINYBLOB",
"TINYINT",
"TINYTEXT",
"TITLE",
"TO",
"TOP",
"TRACE",
"TRAILING",
"TRAN",
"TRANSACTION",
"TRANSLATE",
"TRANSLATE_CHK",
"TRANSLATE_REGEX",
"TRANSLATION",
"TREAT",
"TRIGGER",
"TRIM",
"TRIM_ARRAY",
"TRUE",
"TRUNCATE",
"TRY_CONVERT",
"TSEQUAL",
"TYPE",
"UC",
"UESCAPE",
"UID",
"UNDEFINED",
"UNDER",
"UNDO",
"UNION",
"UNIQUE",
"UNKNOWN",
"UNLOCK",
"UNNEST",
"UNPIVOT",
"UNSIGNED",
"UNTIL",
"UPD",
"UPDATE",
"UPDATETEXT",
"UPPER",
"UPPERCASE",
"USAGE",
"USE",
"USER",
"USING",
"UTC_DATE",
"UTC_TIME",
"UTC_TIMESTAMP",
"VALIDATE",
"VALIDPROC",
"VALUE",
"VALUES",
"VALUE_OF",
"VARBINARY",
"VARBYTE",
"VARCHAR",
"VARCHAR2",
"VARCHARACTER",
"VARGRAPHIC",
"VARIABLE",
"VARIADIC",
"VARIANT",
"VARYING",
"VAR_POP",
"VAR_SAMP",
"VCAT",
"VERBOSE",
"VERSIONING",
"VIEW",
"VIRTUAL",
"VOLATILE",
"VOLUMES",
"WAIT",
"WAITFOR",
"WHEN",
"WHENEVER",
"WHERE",
"WHILE",
"WIDTH_BUCKET",
"WINDOW",
"WITH",
"WITHIN",
"WITHIN_GROUP",
"WITHOUT",
"WLM",
"WORK",
"WRITE",
"WRITETEXT",
"XMLCAST",
"XMLEXISTS",
"XMLNAMESPACES",
"XOR",
"YEAR",
"YEARS",
"YEAR_MONTH",
"ZEROFILL",
"ZEROIFNULL",
"ZONE"
]
|> Enum.reverse()
built_in =
[
"ARRAY",
"BIGINT",
"BINARY",
"BIT",
"BLOB",
"BOOLEAN",
"CHAR",
"CHARACTER",
"DATE",
"DEC",
"DECIMAL",
"FLOAT",
"INT",
"INTEGER",
"INTERVAL",
"NUMBER",
"NUMERIC",
"REAL",
"SERIAL",
"SMALLINT",
"VARCHAR",
"VARYING",
"INT8",
"SERIAL8",
"TEXT"
]
|> Enum.reverse()
keywords =
choice((keywords ++ Enum.map(keywords, &String.downcase/1)) |> Enum.map(&string/1))
|> token(:keyword_reserved)
built_in =
choice((built_in ++ Enum.map(built_in, &String.downcase/1)) |> Enum.map(&string/1))
|> token(:keyword_type)
whitespace = ascii_char([?\s, ?\t, ?\r, ?\n, ?\f]) |> token(:whitespace)
newline = ascii_char([?\r, ?\n])
single_line_comment =
string("--") |> lookahead_not(newline) |> repeat(utf8_char([])) |> token(:comment_single)
multi_line_comment = many_surrounded_by(utf8_char([]), "/*", "*/") |> token(:comment_multiline)
escaped_char = string("\\") |> utf8_string([], 1)
double_quote_string = string_like("\"", "\"", [escaped_char], :string_double)
single_quote_string = string_like("\'", "\'", [escaped_char], :string_single)
name =
ascii_char([?_, ?a..?z, ?A..?Z])
|> repeat(ascii_char([?_, ?0..?9, ?a..?z, ?A..?Z]))
|> token(:name)
operator =
ascii_char([?+, ?*, ?/, ?<, ?>, ?=, ?~, ?!, ?@, ?#, ?%, ?^, ?&, ?|, ?`, ??, ?-])
|> token(:operator)
punctuation = ascii_char([?;, ?:, ?(, ?), ?[, ?], ?.]) |> token(:punctuation)
token =
choice([
whitespace,
single_line_comment,
multi_line_comment,
keywords,
built_in,
operator,
integer(min: 1),
double_quote_string,
single_quote_string,
name,
punctuation
])
defparsec(
:root_element,
token |> map({__MODULE__, :__as_sql_language__, []})
)
defparsec(
:root,
repeat(parsec(:root_element))
)
def __as_sql_language__({ttype, meta, value}) do
{ttype, Map.put(meta, :language, :sql), value}
end
end