defmodule Re do
@moduledoc """
Write readable regular expressions in functional style.
## Examples
Match subdomains of `example.com`:
iex> require Re
iex> require Re.Chars
iex> regex =
...> Re.sequence([
...> Re.one_or_more(Re.any_of([Re.Chars.any_ascii, Re.any_of('.-_')])),
...> Re.text(".example.com")
...> ]) |> Re.compile()
~r/(?:[\\\\0-\\x7f]|\\.|\\-|_)+\\.example\\.com/
iex> "hello.example.com" =~ regex
true
iex> "hello.world.example.com" =~ regex
true
iex> "hello.orsinium.dev" =~ regex
false
"""
require Re.Chars
@typedoc """
internal Re representation of regular expressions.
"""
@opaque re_ast :: {:re_expr, String.t()} | {:re_group, String.t()}
@doc """
Guard for matching the internal Re AST representation.
## Examples
iex> Re.is_re(Re.text("hello"))
true
iex> Re.is_re("something else")
false
iex> Re.is_re(~r"hi")
false
"""
@spec is_re(any) :: any
defguard is_re(v)
when is_tuple(v) and tuple_size(v) == 2 and
(elem(v, 0) == :re_expr or elem(v, 0) == :re_group)
# Internal macros that evaluates quoted expression if all params are static literals.
defmacrop eager(params, do: block) do
quote generated: true do
if Macro.quoted_literal?(unquote(params)) do
{term, _} = unquote(block) |> Code.eval_quoted()
Macro.escape(term)
else
unquote(block)
end
end
end
@doc """
Convert the given Re AST into a string.
## Examples
iex> Re.to_string(Re.Chars.any_digit)
"\\\\d"
iex> Re.to_string(Re.Chars.any_ascii)
"[\\\\\\\\0-\\\\x7f]"
"""
@spec to_string(re_ast() | String.t() | char()) :: String.t()
defmacro to_string(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
case unquote(expr) do
{t, result} when is_atom(t) ->
result
result when is_integer(result) ->
to_string([result]) |> Regex.escape()
result when is_bitstring(result) ->
result
end
end
end
end
@doc """
Compile Re AST (or string) into native Regex type.
The result can be used with any functions from the Regex module.
https://hexdocs.pm/elixir/1.13/Regex.html#compile!/2
## Examples
iex> "1" =~ Re.compile(Re.Chars.any_digit)
true
iex> "a" =~ Re.compile(Re.Chars.any_digit)
false
"""
@spec compile(re_ast() | String.t(), binary() | [term()]) :: any()
defmacro compile(expr, options \\ "") do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
unquote(expr) |> Re.to_string() |> Regex.compile!(unquote(options))
end
end
end
@doc """
Group (but not capture) the pattern if needed.
Usually, you don't need to call this function.
All other functions call this one when needed.
PCRE: `(?:X)`
## Examples
iex> 'abc' |> Re.raw |> Re.group |> Re.to_string
"(?:abc)"
"""
@spec group(re_ast() | String.t()) :: re_ast()
defmacro group(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
case unquote(expr) do
{:re_expr, val} -> {:re_group, "(?:#{val})"}
{:re_group, val} = expr -> expr
val when is_integer(val) -> {:re_group, to_string([val])}
val when byte_size(val) == 1 -> {:re_group, to_string([val])}
val -> {:re_group, "(?:#{val})"}
end
end
end
end
@doc """
Include a raw regex as is into the resulting pattern.
Can be dangerous. Don't let untrusted users to pass values there.
Use `Re.text` if you need the input text to be escaped.
## Examples
iex> "example.com" =~ Re.raw("example.com") |> Re.compile()
true
iex> "examplescom" =~ Re.raw("example.com") |> Re.compile()
true
iex> "examplscom" =~ Re.raw("example.com") |> Re.compile()
false
"""
@spec raw(String.t() | Regex.t()) :: re_ast()
defmacro raw(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
case unquote(expr) do
%Regex{} = val -> {:re_expr, Regex.source(val)}
val -> {:re_expr, val}
end
end
end
end
@doc """
Include a text into the resulting pattern.
All unsafe symbols will be escaped if necessary.
## Examples
iex> rex = Re.text("example.com") |> Re.compile()
iex> "example.com" =~ rex
true
iex> "examplescom" =~ rex
false
"""
@spec text(String.t() | integer()) :: re_ast()
defmacro text(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
case unquote(expr) do
val when is_integer(val) -> {:re_group, Regex.escape(to_string([val]))}
val when byte_size(val) == 1 -> {:re_group, Regex.escape(val)}
val -> {:re_expr, Regex.escape(val)}
end
end
end
end
@doc """
Chain multiple patterns together.
PCRE: `XY`
## Examples
iex> rex = Re.sequence([Re.text("a"), Re.Chars.any_digit]) |> Re.compile
iex> "a1" =~ rex
true
iex> "a" =~ rex
false
iex> "1" =~ rex
false
"""
@spec sequence([re_ast() | String.t()]) :: re_ast()
defmacro sequence(exprs) do
exprs = exprs |> Enum.map(&Macro.expand(&1, __ENV__))
eager exprs do
quote generated: true do
require Re
result = unquote(exprs) |> Enum.map_join(&Re.to_string/1)
{:re_expr, result}
end
end
end
@doc """
Match any of the given patters or symbols.
PCRE: `[XY]` and `X|Y`
## Examples
iex> rex = Re.any_of([Re.text(?a), Re.text(?b)]) |> Re.compile
iex> "a" =~ rex
true
iex> "b" =~ rex
true
iex> "c" =~ rex
false
iex> "a" =~ Re.any_of([?a, ?b]) |> Re.compile
true
"""
@spec any_of([re_ast() | String.t() | char()]) :: re_ast()
defmacro any_of(exprs) do
exprs = exprs |> Enum.map(&Macro.expand(&1, __ENV__))
eager exprs do
quote generated: true do
require Re
strings = unquote(exprs) |> Enum.map(&Re.to_string/1)
if strings |> Enum.all?(&(byte_size(&1) == 1)) do
{:re_group, "[#{Enum.join(strings)}]"}
else
{:re_expr, Enum.join(strings, "|")}
end
end
end
end
@doc """
Match anything except the given symbols.
PCRE: `[^XY]`
## Examples
iex> "a" =~ Re.none_of('abc') |> Re.compile()
false
iex> "d" =~ Re.none_of('abc') |> Re.compile()
true
"""
@spec none_of(list(char())) :: re_ast()
defmacro none_of(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
{:re_group, "[^#{unquote(expr)}]"}
end
end
end
@doc """
Match any symbol in the given range.
PCRE: `[X-Y]`
## Examples
iex> rex = Re.in_range(?a, ?d) |> Re.compile()
~r/[a-d]/
iex> "a" =~ rex
true
iex> "c" =~ rex
true
iex> "d" =~ rex
true
iex> "e" =~ rex
false
"""
@spec in_range(char(), char()) :: re_ast()
defmacro in_range(expr1, expr2) do
expr1 = Macro.expand(expr1, __ENV__)
expr2 = Macro.expand(expr2, __ENV__)
eager [expr1, expr2] do
quote bind_quoted: [expr1: expr1, expr2: expr2] do
val1 =
if is_integer(expr1) do
to_string([expr1])
else
expr1
end
val2 =
if is_integer(expr2) do
to_string([expr2])
else
expr2
end
{:re_expr, "[#{val1}-#{val2}]"}
end
end
end
@doc """
Match zero or more repetitions of the pattern.
PCRE: `X*`
"""
@spec zero_or_more(any) :: re_ast()
defmacro zero_or_more(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
{:re_group, value} = Re.group(unquote(expr))
{:re_group, "#{value}*"}
end
end
end
@doc """
Match one or more repetitions of the pattern.
PCRE: `X+`
## Examples
iex> "a" =~ "a" |> Re.text |> Re.one_or_more |> Re.compile()
true
iex> "aaa" =~ "a" |> Re.text |> Re.one_or_more |> Re.compile()
true
iex> "b" =~ "a" |> Re.text |> Re.one_or_more |> Re.compile()
false
iex> "" =~ "a" |> Re.text |> Re.one_or_more |> Re.compile()
false
"""
@spec one_or_more(any) :: re_ast()
defmacro one_or_more(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
{:re_group, value} = Re.group(unquote(expr))
{:re_group, "#{value}+"}
end
end
end
@doc """
Match zero or one repetition of the pattern.
PCRE: `X?`
"""
@spec optional(any) :: re_ast()
defmacro optional(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
{:re_group, value} = Re.group(unquote(expr))
{:re_group, "#{value}?"}
end
end
end
@doc """
Match exactly N repetitions of the pattern.
PCRE: `X{N}`
## Examples
iex> rex = Re.text("ab") |> Re.repeated(2) |> Re.compile
~r/(?:ab){2}/
iex> "ab" =~ rex
false
iex> "abab" =~ rex
true
"""
@spec repeated(re_ast() | String.t(), integer()) :: re_ast()
defmacro repeated(expr, n) do
expr = Macro.expand(expr, __ENV__)
n = Macro.expand(n, __ENV__)
eager [expr, n] do
quote generated: true do
require Re
{:re_group, val} = unquote(expr) |> Re.group()
{:re_group, "#{val}{#{unquote(n)}}"}
end
end
end
@doc """
Match from at_least to at_most repetitions of the pattern.
PCRE: `X{N,M}`
"""
@spec repeated(any, any, any) :: re_ast()
defmacro repeated(expr, at_least, at_most) do
expr = Macro.expand(expr, __ENV__)
at_least = Macro.expand(at_least, __ENV__)
at_most = Macro.expand(at_most, __ENV__)
eager [expr, at_least, at_most] do
quote generated: true do
require Re
{:re_group, val} = unquote(expr) |> Re.group()
{:re_group, "#{val}{#{unquote(at_least)},#{unquote(at_most)}}"}
end
end
end
@doc """
Capture the pattern.
https://hexdocs.pm/elixir/1.13/Regex.html#module-captures
PCRE: `(X)`
## Examples
iex> rex = Re.sequence([Re.text(?a), Re.capture(Re.Chars.any_digit)]) |> Re.compile
~r/a(\\d)/
iex> Regex.run(rex, "a1", capture: :all_but_first)
["1"]
"""
@spec capture(any) :: re_ast()
defmacro capture(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
val = unquote(expr) |> Re.to_string()
{:re_group, "(#{val})"}
end
end
end
@doc """
Named capture of the pattern.
https://hexdocs.pm/elixir/1.13/Regex.html#module-captures
PCRE: `(?P<N>X)`
## Examples
iex> rex = Re.sequence([Re.text(?a), Re.capture(Re.Chars.any_digit, "number")]) |> Re.compile
~r/a(?P<number>\\d)/
iex> Regex.named_captures(rex, "a1")
%{"number" => "1"}
"""
@spec capture(any, any) :: re_ast()
defmacro capture(expr, name) do
expr = Macro.expand(expr, __ENV__)
name = Macro.expand(name, __ENV__)
eager [expr, name] do
quote generated: true do
require Re
val = unquote(expr) |> Re.to_string()
{:re_group, "(?P<#{unquote(name)}>#{val})"}
end
end
end
@doc """
"Ungreedy" the pattern.
By default, all patterns greedy and try to match as much as possbile.
This function reverts this behavior for the given pattern,
making it match as less as possible.
PCRE: `X?`
## Examples
iex> rex = Re.sequence([
...> Re.text(?a),
...> Re.Chars.any_digit |> Re.one_or_more() |> Re.capture
...> ]) |> Re.compile()
~r/a(\\d+)/
iex> Regex.run(rex, "a111", capture: :all_but_first)
["111"]
iex> rex = Re.sequence([
...> Re.text(?a),
...> Re.Chars.any_digit |> Re.one_or_more() |> Re.lazy |> Re.capture
...> ]) |> Re.compile()
~r/a(\\d+?)/
iex> Regex.run(rex, "a111", capture: :all_but_first)
["1"]
"""
@spec lazy(re_ast()) :: re_ast()
defmacro lazy(expr) do
expr = Macro.expand(expr, __ENV__)
eager [expr] do
quote generated: true do
require Re
{:re_group, value} = unquote(expr)
{:re_group, "#{value}?"}
end
end
end
end