defmodule Ch.Types do
@moduledoc """
Helpers to turn ClickHouse types into Elixir terms for easier processing.
"""
types =
[
{_encoded = "String", _decoded = :string, _args = []},
{"Bool", :boolean, []},
for size <- [8, 16, 32, 64, 128, 256] do
[
{"UInt#{size}", :"u#{size}", []},
{"Int#{size}", :"i#{size}", []}
]
end,
for size <- [32, 64] do
{"Float#{size}", :"f#{size}", []}
end,
{"Array", :array, [:type]},
{"Tuple", :tuple, [:type]},
{"Map", :map, [:type]},
{"FixedString", :fixed_string, [:int]},
{"Nullable", :nullable, [:type]},
{"DateTime64", :datetime64, [:int, :string]},
{"DateTime", :datetime, [:string]},
# {"DateTime", :datetime, []},
{"Date32", :date32, []},
{"Date", :date, []},
{"LowCardinality", :low_cardinality, [:type]},
for size <- [32, 64, 128, 256] do
{"Decimal#{size}", :"decimal#{size}", [:int]}
end,
{"Decimal", :decimal, [:int, :int]},
{"SimpleAggregateFunction", :simple_aggregate_function, [:identifier, :type]},
{"Enum8", :enum8, [:string, :eq, :int]},
{"Enum16", :enum16, [:string, :eq, :int]},
{"UUID", :uuid, []},
{"IPv4", :ipv4, []},
{"IPv6", :ipv6, []},
{"Point", :point, []},
{"Ring", :ring, []},
{"Polygon", :polygon, []},
{"MultiPolygon", :multipolygon, []},
{"Nothing", :nothing, []}
]
|> List.flatten()
for {encoded, name, []} <- types do
@doc """
Helper for `#{encoded}` ClickHouse type:
iex> #{name}()
:#{name}
iex> encode(#{name}())
"#{encoded}"
iex> decode("#{encoded}")
#{name}()
"""
def unquote(name)(), do: unquote(name)
end
@doc """
Helper for `DateTime` ClickHouse type:
iex> datetime()
:datetime
iex> to_string(encode(datetime()))
"DateTime"
iex> decode("DateTime")
datetime()
"""
def datetime, do: :datetime
@doc """
Helper for `DateTime(timezone)` ClickHouse type:
iex> datetime("Europe/Vienna")
{:datetime, "Europe/Vienna"}
iex> to_string(encode(datetime("UTC")))
"DateTime('UTC')"
iex> decode("DateTime('UTC')")
datetime("UTC")
"""
def datetime(timezone) when is_binary(timezone), do: {:datetime, timezone}
@doc """
Helper for `DateTime64(precision)` ClickHouse type:
iex> datetime64(3)
{:datetime64, 3}
iex> to_string(encode(datetime64(3)))
"DateTime64(3)"
iex> decode("DateTime64(3)")
datetime64(3)
"""
def datetime64(precision) when is_integer(precision), do: {:datetime64, precision}
@doc """
Helper for `DateTime64(precision, timezone)` ClickHouse type:
iex> datetime64(3, "UTC")
{:datetime64, 3, "UTC"}
iex> to_string(encode(datetime64(3, "UTC")))
"DateTime64(3, 'UTC')"
iex> decode("DateTime64(3, 'UTC')")
datetime64(3, "UTC")
"""
def datetime64(precision, timezone) when is_integer(precision) and is_binary(timezone) do
{:datetime64, precision, timezone}
end
@doc """
Helper for `FixedString(n)` ClickHouse type:
iex> fixed_string(3)
{:fixed_string, 3}
iex> to_string(encode(fixed_string(16)))
"FixedString(16)"
iex> decode("FixedString(16)")
fixed_string(16)
"""
def fixed_string(n) when is_integer(n), do: {:fixed_string, n}
@doc """
Helper for `Decimal(P, S)` ClickHouse type:
iex> decimal(18, 4)
{:decimal, 18, 4}
iex> to_string(encode(decimal(18, 4)))
"Decimal(18, 4)"
iex> decode("Decimal(18, 4)")
decimal(18, 4)
"""
def decimal(precision, scale) when is_integer(precision) and is_integer(scale) do
{:decimal, precision, scale}
end
for size <- [32, 64, 128, 256] do
name = :"decimal#{size}"
# `select toTypeName(cast(1 as Decimal32(2)))` etc.
precision =
case size do
32 -> 9
64 -> 18
128 -> 38
256 -> 76
end
@doc """
Helper for `Decimal#{size}(S)` ClickHouse type:
iex> #{name}(4)
{:#{name}, 4}
iex> to_string(encode(#{name}(4)))
"Decimal(#{precision}, 4)"
iex> decode("Decimal#{size}(4)")
{:#{name}, 4}
"""
def unquote(name)(scale) when is_integer(scale), do: {unquote(name), scale}
end
defguardp is_type(type) when is_atom(type) or is_tuple(type)
@doc """
Helper for `Array(T)` ClickHouse type:
iex> array(u64())
{:array, :u64}
iex> to_string(encode(array(u64())))
"Array(UInt64)"
iex> decode("Array(UInt64)")
array(u64())
"""
def array(type) when is_type(type), do: {:array, type}
@doc """
Helper for `Tuple(T1, T2, ...)` ClickHouse type:
iex> tuple([u64(), array(string())])
{:tuple, [:u64, {:array, :string}]}
iex> to_string(encode(tuple([u64(), array(string())])))
"Tuple(UInt64, Array(String))"
iex> decode("Tuple(UInt64, Array(String))")
tuple([u64(), array(string())])
"""
def tuple(types) when is_list(types), do: {:tuple, types}
@doc """
Helper for `Map(K, V)` ClickHouse type:
iex> map(string(), array(string()))
{:map, :string, {:array, :string}}
iex> to_string(encode(map(string(), array(string()))))
"Map(String, Array(String))"
iex> decode("Map(String, Array(String))")
map(string(), array(string()))
"""
def map(key_type, value_type) when is_type(key_type) and is_type(value_type) do
{:map, key_type, value_type}
end
@doc """
Helper for `Nullable(T)` ClickHouse type:
iex> nullable(array(boolean()))
{:nullable, {:array, :boolean}}
iex> to_string(encode(nullable(array(boolean()))))
"Nullable(Array(Bool))"
iex> decode("Nullable(Array(Bool))")
nullable(array(boolean()))
"""
def nullable(type) when is_type(type), do: {:nullable, type}
@doc """
Helper for `LowCardinality(T)` ClickHouse type:
iex> low_cardinality(string())
{:low_cardinality, :string}
iex> to_string(encode(low_cardinality(string())))
"LowCardinality(String)"
iex> decode("LowCardinality(String)")
low_cardinality(string())
"""
def low_cardinality(type) when is_type(type), do: {:low_cardinality, type}
@doc """
Helper for `SimpleAggregateFunction(name, type)` ClickHouse type:
iex> simple_aggregate_function("any", u8())
{:simple_aggregate_function, "any", :u8}
iex> to_string(encode(simple_aggregate_function("any", u8())))
"SimpleAggregateFunction(any, UInt8)"
iex> decode("SimpleAggregateFunction(any, UInt8)")
simple_aggregate_function("any", u8())
"""
def simple_aggregate_function(name, type) when is_binary(name) and is_type(type) do
{:simple_aggregate_function, name, type}
end
for size <- [8, 16] do
name = :"enum#{size}"
@doc """
Helper for `Enum#{size}` ClickHouse type:
iex> #{name}([{"hello", 1}, {"world", 2}])
{:#{name}, [{"hello", 1}, {"world", 2}]}
iex> to_string(encode(#{name}([{"hello", 1}, {"world", 2}])))
"Enum#{size}('hello' = 1, 'world' = 2)"
iex> decode("Enum#{size}('hello' = 1, 'world' = 2)")
#{name}([{"hello", 1}, {"world", 2}])
"""
def unquote(name)(mapping) when is_list(mapping), do: {unquote(name), mapping}
end
@doc """
Decodes a ClickHouse type into an intermediary Elixir term.
iex> decode("String")
:string
iex> decode("Array(String)")
{:array, :string}
iex> decode("Enum8('hello' = 1, 'world' = 2)")
{:enum8, [{"hello", 1}, {"world", 2}]}
iex> decode("Nullable(Decimal(18, 4))")
{:nullable, {:decimal, 18, 4}}
"""
def decode(type)
for {encoded, decoded, []} <- types do
def decode(unquote(encoded)), do: unquote(decoded)
end
def decode("DateTime"), do: :datetime
def decode(type) do
try do
decode([:type], type, [])
rescue
e ->
message = "failed to decode #{inspect(type)} as ClickHouse type (#{Exception.message(e)})"
reraise(ArgumentError, message, __STACKTRACE__)
end
end
defguardp is_whitespace(char) when char == ?\s or char == ?\t
defp decode(stack, <<whitespace, rest::bytes>>, acc) when is_whitespace(whitespace) do
decode(stack, rest, acc)
end
for {encoded, decoded, [_ | _] = args} <- types do
defp decode([:type | stack], unquote(encoded) <> rest, acc) do
decode(
[:open | unquote(args)] ++ [:close, {unquote(decoded), unquote(args)}, acc | stack],
rest,
[]
)
end
end
for {encoded, decoded, []} <- types do
defp decode([:type | stack], unquote(encoded) <> rest, acc) do
decode(stack, rest, [unquote(decoded) | acc])
end
end
defp decode([:open | stack], <<rest::bytes>>, acc) do
case rest do
<<?(, rest::bytes>> ->
decode(stack, rest, acc)
_ ->
# handles DateTime and Type()
[{type, _args}, prev_acc | stack] = close(stack)
decode(stack, rest, [type | prev_acc])
end
end
defp decode(stack, <<?), rest::bytes>>, acc) do
[{type, _args}, prev_acc | stack] = close(stack)
decode(stack, rest, [build_type(type, acc) | prev_acc])
end
defp decode([:close, {_type, args} | _] = stack, <<?,, rest::bytes>>, acc) do
decode(args ++ stack, rest, acc)
end
defp decode(stack, <<?,, rest::bytes>>, acc) do
decode(stack, rest, acc)
end
defp decode([:string | stack], <<?', rest::bytes>>, acc) do
decode_string(rest, 0, rest, stack, acc)
end
defp decode([:int | stack], <<rest::bytes>>, acc) do
decode_int(rest, stack, acc)
end
defp decode([:identifier | stack], <<rest::bytes>>, acc) do
decode_identifier(rest, 0, rest, stack, acc)
end
defp decode([:eq | stack], <<?=, rest::bytes>>, acc) do
decode(stack, rest, acc)
end
defp decode([], <<>>, [type]), do: type
defp close([:close | stack]), do: stack
defp close([_ | stack]), do: close(stack)
defp build_type(:array = a, [t]), do: {a, t}
defp build_type(:tuple = t, ts), do: {t, :lists.reverse(ts)}
defp build_type(:fixed_string = fs, [n]), do: {fs, n}
defp build_type(:datetime = d, [tz]), do: {d, tz}
defp build_type(:datetime64 = d, [precision]), do: {d, precision}
defp build_type(:datetime64 = d, [tz, p]), do: {d, p, tz}
defp build_type(:map = m, [v, k]), do: {m, k, v}
defp build_type(:nullable = n, [t]), do: {n, t}
defp build_type(:low_cardinality = l, [t]), do: {l, t}
defp build_type(:enum8 = e, mapping), do: {e, build_enum_mapping(mapping)}
defp build_type(:enum16 = e, mapping), do: {e, build_enum_mapping(mapping)}
defp build_type(:simple_aggregate_function = saf, [t, f]), do: {saf, f, t}
defp build_type(:decimal32 = d, [s]), do: {d, s}
defp build_type(:decimal64 = d, [s]), do: {d, s}
defp build_type(:decimal128 = d, [s]), do: {d, s}
defp build_type(:decimal256 = d, [s]), do: {d, s}
defp build_type(:decimal = d, [s, p]), do: {d, p, s}
defp build_enum_mapping(mapping) do
mapping |> :lists.reverse() |> Enum.chunk_every(2) |> Enum.map(fn [k, v] -> {k, v} end)
end
# TODO '', \'
defp decode_string(<<?', rest::bytes>>, len, original, stack, acc) do
part = :binary.part(original, 0, len)
decode(stack, rest, [:binary.copy(part) | acc])
end
defp decode_string(<<u::utf8, rest::bytes>>, len, original, stack, acc) do
decode_string(rest, len + utf8_size(u), original, stack, acc)
end
@compile inline: [utf8_size: 1]
defp utf8_size(codepoint) when codepoint <= 0x7F, do: 1
defp utf8_size(codepoint) when codepoint <= 0x7FF, do: 2
defp utf8_size(codepoint) when codepoint <= 0xFFFF, do: 3
defp utf8_size(codepoint) when codepoint <= 0x10FFFF, do: 4
defguardp is_alpha(a) when (a >= ?a and a <= ?z) or (a >= ?A and a <= ?Z)
defp decode_identifier(<<a, rest::bytes>>, len, original, stack, acc) when is_alpha(a) do
decode_identifier(rest, len + 1, original, stack, acc)
end
defp decode_identifier(<<rest::bytes>>, len, original, stack, acc) do
part = :binary.part(original, 0, len)
decode(stack, rest, [:binary.copy(part) | acc])
end
defguardp is_numeric(char) when char >= ?0 and char <= ?9
defp decode_int(<<?-, i, rest::bytes>>, stack, outer_acc) when is_numeric(i) do
decode_int_cont(rest, -(i - ?0), stack, outer_acc)
end
defp decode_int(<<i, rest::bytes>>, stack, outer_acc) when is_numeric(i) do
decode_int_cont(rest, i - ?0, stack, outer_acc)
end
defp decode_int_cont(<<i, rest::bytes>>, acc, stack, outer_acc) when is_numeric(i) do
decode_int_cont(rest, acc * 10 + i - ?0, stack, outer_acc)
end
defp decode_int_cont(<<rest::bytes>>, int, stack, acc) do
decode(stack, rest, [int | acc])
end
@doc """
Encodes a type from Elixir atom / tuple to proper ClickHouse name.
iex> encode(:string)
"String"
iex> IO.iodata_to_binary(encode({:nullable, :i8}))
"Nullable(Int8)"
"""
def encode(type)
for {encoded, decoded, []} <- types do
def encode(unquote(decoded)), do: unquote(encoded)
end
def encode(:datetime), do: "DateTime"
def encode({:nullable, type}), do: ["Nullable(", encode(type), ?)]
def encode({:fixed_string, n}), do: ["FixedString(", String.Chars.Integer.to_string(n), ?)]
def encode({:array, type}), do: ["Array(", encode(type), ?)]
def encode({:tuple, types}), do: ["Tuple(", encode_intersperse(types, ", "), ?)]
def encode({:map, key_type, value_type}) do
["Map(", encode(key_type), ", ", encode(value_type), ?)]
end
def encode({:low_cardinality, type}), do: ["LowCardinality(", encode(type), ?)]
for size <- [32, 64, 128, 256] do
# `select toTypeName(cast(1 as Decimal32(2)))` etc.
precision =
case size do
32 -> 9
64 -> 18
128 -> 38
256 -> 76
end
def encode({unquote(:"decimal#{size}"), scale}) do
encode({:decimal, unquote(precision), scale})
end
end
def encode({:decimal, precision, scale}) do
[
"Decimal(",
String.Chars.Integer.to_string(precision),
", ",
String.Chars.Integer.to_string(scale),
?)
]
end
def encode({:datetime, timezone}) when is_binary(timezone) do
["DateTime('", timezone, "')"]
end
def encode({:datetime64, precision}) do
["DateTime64(", String.Chars.Integer.to_string(precision), ?)]
end
def encode({:datetime64, precision, timezone}) when is_binary(timezone) do
["DateTime64(", String.Chars.Integer.to_string(precision), ", '", timezone, "')"]
end
def encode({:enum8, mapping}) do
["Enum8('", encode_mapping(mapping), ?)]
end
def encode({:enum16, mapping}) do
["Enum16('", encode_mapping(mapping), ?)]
end
def encode({:simple_aggregate_function, name, type}) when is_binary(name) do
["SimpleAggregateFunction(", name, ", ", encode(type), ?)]
end
defp encode_intersperse([last_type], _separator) do
[encode(last_type)]
end
defp encode_intersperse([type | types], separator) do
[encode(type), separator | encode_intersperse(types, separator)]
end
defp encode_intersperse([] = empty, _separator), do: empty
defp encode_mapping([{k, v}]) when is_binary(k) do
[k, "' = ", String.Chars.Integer.to_string(v)]
end
defp encode_mapping([{k, v} | mapping]) when is_binary(k) do
[k, "' = ", String.Chars.Integer.to_string(v), ", '" | encode_mapping(mapping)]
end
defp encode_mapping([] = empty), do: empty
end