lib/glob_ex.ex

defmodule GlobEx do
  @moduledoc """
  Provides glob expressions for Elixir.

  A glob expression looks like an ordinary path, except that the following
  "wildcard characters" are interpreted in a special way:

    * `?` - matches one character.
    * `*` - matches any number of characters up to the end of the filename, the
      next dot, or the next slash.
    * `**` - two adjacent `*`'s used as a single pattern will match all
      files and zero or more directories and subdirectories.
    * `[char1,char2,...]` - matches any of the characters listed; two
      characters separated by a hyphen will match a range of characters.
      Do not add spaces before and after the comma as it would then match
      paths containing the space character itself.
    * `{item1,item2,...}` - matches one of the alternatives.
      Do not add spaces before and after the comma as it would then match
      paths containing the space character itself.

  Other characters represent themselves. Only paths that have
  exactly the same character in the same position will match. Note
  that matching is case-sensitive: `"a"` will not match `"A"`.

  Directory separators must always be written as `/`, even on Windows.
  You may call `Path.expand/1` to normalize the path before invoking
  this function.

  A character preceded by \ loses its special meaning.
  Note that \ must be written as \\ in a string literal.
  For example, "\\?*" will match any filename starting with ?.

  Glob expressions in can be created using `compile/2`, `compile!/2` or the
  sigils ~g (see `GlobEx.Sigils.sigil_g/2`) or ~G (see
  `GlobEx.Sigils.sigil_G/2`).

  ```elixir
  # A simple glob expressions matching all `.exs` files in a tree. By default,
  # the patterns `*` and `?` do not match files starting with a `.`.
  ~g|**/*.exs|

  # The modifier `d` let the glob treat files starting with a `.` as any other
  # file.
  ~g|**/*.exs|d
  ```
  """

  alias GlobEx.CompileError
  alias GlobEx.Compiler

  defstruct source: "", match_dot: false, compiled: []

  @type t :: %__MODULE__{source: binary(), match_dot: boolean(), compiled: [term()]}

  @cwd '.'
  @root '/'

  @doc """
  Compiles the glob expression and raises GlobEx.CompileError in case of errors.

  See the module documentation for how to write a glob expression and
  `compile/2` for the available options.
  """
  @spec compile!(binary(), keyword()) :: t()
  def compile!(glob, opts \\ []) do
    case compile(glob, opts) do
      {:ok, glob} -> glob
      {:error, exception} -> raise exception
    end
  end

  @doc """
  Compiles the glob expression.

  It returns `{:ok, regex}` in case of success, `{:error, reason}` otherwise.

  See the module documentation for how to write a glob expression.

  By default, the patterns `*` and `?` do not match files starting with a `.`.
  This behaviour can be change with the option `:match_dot`.

  ## Options

    * `:match_dot` - (boolean) if `false`, the special wildcard characters `*` and `?`
      will not match files starting with a `.`. If `true`, files starting with
      a `.` will not be treated specially. Defaults to `false`.

  ## Examples

      iex> GlobEx.compile("src/**/?oo.ex")
      {:ok, ~g|src/**/?oo.ex|}

      iex> GlobEx.compile("src/{a,b/?oo.ex")
      {:error, %GlobEx.CompileError{reason: {:missing_delimiter, 5}}}
  """
  @spec compile(binary(), keyword()) :: {:ok, t()} | {:error, CompileError.t()}
  def compile(glob, opts \\ []) do
    match_dot = Keyword.get(opts, :match_dot, false)

    case Compiler.compile(glob) do
      {:ok, compiled} ->
        {:ok, %GlobEx{source: glob, compiled: compiled, match_dot: match_dot}}

      {:error, reason} ->
        {:error, %CompileError{reason: reason}}
    end
  end

  @doc """
  Traverses paths according to the given `glob` expression and returns a list of
  matches.

  See the module documentation for how to write a glob expression.

  ## Examples

      iex> GlobEx.ls(~g|{lib,test}/**/*.{ex,exs}|)
      [
        "lib/glob_ex.ex",
        "lib/glob_ex/compiler.ex",
        "lib/glob_ex/compiler_error.ex",
        "lib/glob_ex/sigils.ex",
        "test/glob_ex/compiler_test.exs",
        "test/glob_ex_test.exs",
        "test/test_helper.exs"
      ]
  """
  @spec ls(t()) :: [Path.t()]
  def ls(%GlobEx{compiled: compiled, match_dot: match_dot}) do
    compiled
    |> list(match_dot)
    |> Enum.map(&IO.chardata_to_string/1)
    |> Enum.sort()
  end

  @doc """
  Returns a boolean indicating whether there was a match or not.

  See the module documentation for how to write a glob expression.

  ## Examples

      iex> GlobEx.match?(~g|{lib,test}/**/*.{ex,exs}|, "lib/foo/bar.ex")
      true

      iex> GlobEx.match?(~g|{lib,test}/**/*.{ex,exs}|, "lib/foo/bar.java")
      false
  """
  @spec match?(t(), Path.t()) :: boolean()
  def match?(%GlobEx{compiled: compiled, match_dot: match_dot}, path) do
    path = split(path, [[]])

    with {compiled, path} <- exact(compiled, path) do
      match?(compiled, match_dot, path)
    end
  end

  defp match?([{:exact, '..'} | glob], match_dot, [comp | path]) do
    if '..' == comp, do: match?(glob, match_dot, path), else: false
  end

  defp match?([:double_star], _match_dot, []) do
    true
  end

  defp match?([:double_star], true, _path) do
    true
  end

  defp match?([:double_star] = glob, false, [comp | path]) do
    if hd(comp) == ?., do: false, else: match?(glob, false, path)
  end

  defp match?([:double_star, pattern | rest] = glob, match_dot, [comp | path]) do
    with true <- match_dot?(comp, match_dot) do
      case match_comp?(comp, pattern) do
        true -> match?(rest, match_dot, path) or match?(glob, match_dot, path)
        false -> match?(glob, match_dot, path)
      end
    end
  end

  defp match?([], _match_dot, []) do
    true
  end

  defp match?([], _match_dot, _path) do
    false
  end

  defp match?(_glob, _match_dot, []) do
    false
  end

  defp match?([pattern | glob], match_dot, [comp | path]) do
    with true <- match_comp?(comp, match_dot, pattern) do
      match?(glob, match_dot, path)
    end
  end

  defp exact([{:exact, exact} | glob], [comp | path]) do
    if exact == comp, do: exact(glob, path), else: false
  end

  defp exact([], []), do: true

  defp exact(glob, path), do: {glob, path}

  defp list([:root | glob], match_dot), do: list(glob, match_dot, [@root])

  defp list([{:root, vol} | glob], match_dot) do
    list(glob, match_dot, [vol])
  end

  defp list(glob, match_dot), do: list(glob, match_dot, [@cwd])

  defp list(_glob, _match_dot, []), do: []

  defp list([], _match_dot, result), do: result

  defp list([{:exact, _file} | _glob] = glob, match_dot, [match]) when match in [@cwd, @root] do
    list_exact(glob, match_dot, match)
  end

  defp list([{:exact, _file} | _glob] = glob, match_dot, [[_vol | ':/'] = match]) do
    list_exact(glob, match_dot, match)
  end

  defp list([:double_star], match_dot, matches) do
    trees(matches, match_dot)
  end

  defp list([:double_star, :star | glob], match_dot, matches) do
    list([:double_star | glob], match_dot, matches)
  end

  defp list([:double_star, next | glob], match_dot, matches) do
    matches = double_star_matches(matches, match_dot, next, [])
    list(glob, match_dot, matches)
  end

  defp list([{:exact, '..'} | glob], match_dot, matches) do
    matches =
      Enum.reduce(matches, [], fn match, acc ->
        case :filelib.is_dir(match) do
          true -> [join(match, '..') | acc]
          false -> acc
        end
      end)

    list(glob, match_dot, matches)
  end

  defp list([pattern | glob], match_dot, matches) do
    matches = matches(matches, match_dot, pattern)
    list(glob, match_dot, matches)
  end

  defp list_exact([{:exact, file} | glob], match_dot, dir) do
    {path, glob} = path(file, glob)

    path =
      case dir do
        @cwd -> path
        @root -> '/' ++ path
        vol -> vol ++ path
      end

    case file_exists?(path) do
      true -> list(glob, match_dot, [path])
      false -> []
    end
  end

  defp path(path, [{:exact, file} | glob]) do
    path(:filename.join(path, file), glob)
  end

  defp path(path, glob), do: {path, glob}

  defp matches(matches, match_dot, pattern) do
    Enum.reduce(matches, [], fn match, acc ->
      match
      |> list_dir()
      |> Enum.reduce([], fn comp, acc ->
        match = if match == '/', do: '', else: match

        case match_comp?(comp, match_dot, pattern) do
          true -> [join(match, comp) | acc]
          false -> acc
        end
      end)
      |> Enum.concat(acc)
    end)
  end

  defp match_comp?([?. | _rest], false, _pattern) do
    false
  end

  defp match_comp?(comp, _match_dot, pattern) do
    match_comp?(comp, pattern)
  end

  defp match_comp?(comp, {:exact, exact}) do
    exact == comp
  end

  defp match_comp?(_comp, :star) do
    true
  end

  defp match_comp?(comp, {:seq, seq}) do
    seq?(seq, comp)
  end

  defp match_comp?(comp, {:ends_with, ends_with}) do
    ends_with?(comp, ends_with)
  end

  defp match_comp?(comp, {:alt, alt}) do
    Enum.any?(alt, fn
      :empty -> true
      [pattern] -> match_comp?(comp, pattern)
    end)
  end

  defp match_comp?([comp], {:one_of, one_of}) do
    comp in one_of
  end

  defp match_comp?(comp, :root) do
    comp == []
  end

  defp match_comp?(_comp, {:one_of, _one_of}) do
    false
  end

  defp match_dot?([?. | _charlist], false) do
    false
  end

  defp match_dot?(_charlist, _match_dot) do
    true
  end

  defp join(@cwd, right) do
    right
  end

  defp join(left, right) do
    left ++ '/' ++ right
  end

  defp seq?([:star], _file) do
    true
  end

  defp seq?([x | seq], [x | file]) do
    seq?(seq, file)
  end

  defp seq?([:question | seq], [_x | file]) do
    seq?(seq, file)
  end

  defp seq?([], []) do
    true
  end

  defp seq?([_ | _], []) do
    false
  end

  defp seq?([:star, next | seq], [next | file]) do
    seq?(seq, file)
  end

  defp seq?([:star, next | _rest] = seq, [_char | file]) when is_integer(next) do
    seq?(seq, file)
  end

  defp seq?([:star | seq], file) do
    with false <- seq?(seq, file) do
      seq?([:star | seq], tl(file))
    end
  end

  defp seq?([{:alt, _alt} = pattern], file) do
    match_comp?(file, pattern)
  end

  defp seq?([{:one_of, one_of} | seq], [char | file]) do
    with true <- char in one_of do
      seq?(seq, file)
    end
  end

  defp seq?(_seq, _file) do
    false
  end

  defp ends_with?(a, b), do: a |> Enum.reverse() |> do_ends_with?(b)

  defp do_ends_with?(_a, []), do: true
  defp do_ends_with?([x | as], [x | bs]), do: do_ends_with?(as, bs)
  defp do_ends_with?(_a, _b), do: false

  defp list_dir(cwd) do
    case :file.list_dir(cwd) do
      {:ok, files} -> files
      {:error, _reason} -> []
    end
  end

  defp trees(dirs, match_dot) do
    trees(dirs, match_dot, [])
  end

  defp trees([], _match_dot, acc) do
    acc
  end

  defp trees(dirs, match_dot, acc) do
    dirs = dirs(dirs, match_dot)
    trees(dirs, match_dot, dirs ++ acc)
  end

  defp dirs(dirs, match_dot) do
    Enum.reduce(dirs, [], fn dir, acc ->
      case list_dir(dir) do
        [] -> acc
        list -> sub_dirs(list, dir, match_dot, acc)
      end
    end)
  end

  defp sub_dirs(sub_dirs, dir, match_dot, acc) do
    sub_dirs
    |> Enum.reduce([], fn sub_dir, sub_dirs ->
      case match_dot?(sub_dir, match_dot) do
        true -> [join(dir, sub_dir) | sub_dirs]
        false -> sub_dirs
      end
    end)
    |> Enum.concat(acc)
  end

  defp double_star_matches([], _match_dot, _pattern, acc), do: acc

  defp double_star_matches(matches, match_dot, pattern, acc) do
    {matches, acc} =
      Enum.reduce(matches, {[], acc}, fn match, {matches, acc} ->
        match
        |> list_dir()
        |> Enum.reduce({matches, acc}, fn item, {matches, acc} ->
          double_star_match(item, match_dot, pattern, match, matches, acc)
        end)
      end)

    double_star_matches(matches, match_dot, pattern, acc)
  end

  defp double_star_match([?. | _rest], false, _pattern, _match, matches, acc) do
    {matches, acc}
  end

  defp double_star_match(item, _match_dot, pattern, match, matches, acc) do
    path = join(match, item)

    case match_comp?(item, pattern) do
      true -> {[path | matches], [path | acc]}
      false -> {[path | matches], acc}
    end
  end

  defp split(<<>>, [head | tail]) do
    Enum.reverse([Enum.reverse(head) | tail])
  end

  defp split(<<?/>>, [head | tail]) do
    Enum.reverse([Enum.reverse(head) | tail])
  end

  defp split(<<?/, rest::binary>>, [head | tail]) do
    split(rest, [[], Enum.reverse(head) | tail])
  end

  defp split(<<char::utf8, rest::binary>>, [comp | acc]) do
    split(rest, [[char | comp] | acc])
  end

  defp file_exists?(file) do
    # On Windows, "pathname/.." will seem to exist even if pathname does not
    # refer to a directory.
    case :os.type() do
      {:win32, _} -> do_file_exists?(file)
      _else -> File.exists?(file)
    end
  end

  defp do_file_exists?([vol, ?:, ?/ | file]) do
    file
    |> IO.chardata_to_string()
    |> do_file_exists?([vol | ':/'])
  end

  defp do_file_exists?(file) do
    file
    |> IO.chardata_to_string()
    |> do_file_exists?(@cwd)
  end

  defp do_file_exists?(file, dir) when is_binary(file) do
    file
    |> split([[]])
    |> do_file_exists?(dir)
  end

  defp do_file_exists?([comp, '..' | path], acc) do
    dir = :filename.join(acc, comp)

    case :filelib.is_dir(dir) do
      true -> do_file_exists?(path, :filename.join(dir, '..'))
      false -> false
    end
  end

  defp do_file_exists?([comp | path], acc) do
    acc = :filename.join(acc, comp)
    do_file_exists?(path, acc)
  end

  defp do_file_exists?([], acc), do: File.exists?(acc)

  @doc false
  # Unescape map function used by Macro.unescape_string.
  def unescape_map(:newline), do: true
  def unescape_map(?f), do: ?\f
  def unescape_map(?n), do: ?\n
  def unescape_map(?r), do: ?\r
  def unescape_map(?t), do: ?\t
  def unescape_map(?v), do: ?\v
  def unescape_map(?a), do: ?\a
  def unescape_map(_), do: false

  defimpl Inspect do
    def inspect(%{source: source, match_dot: match_dot}, _opts) do
      modifier = if match_dot, do: "d", else: ""

      {escaped, _} =
        source
        |> normalize(<<>>)
        |> Code.Identifier.escape(?|, :infinity, &escape_map/1)

      IO.iodata_to_binary([?~, ?g, ?|, escaped, ?|, modifier])
    end

    defp normalize(<<?\\, ?\\, rest::binary>>, acc),
      do: normalize(rest, <<acc::binary, ?\\, ?\\>>)

    defp normalize(<<?\\, ?|, rest::binary>>, acc), do: normalize(rest, <<acc::binary, ?|>>)
    defp normalize(<<char, rest::binary>>, acc), do: normalize(rest, <<acc::binary, char>>)
    defp normalize(<<>>, acc), do: acc

    defp escape_map(?\a), do: [?\\, ?a]
    defp escape_map(?\f), do: [?\\, ?f]
    defp escape_map(?\n), do: [?\\, ?n]
    defp escape_map(?\r), do: [?\\, ?r]
    defp escape_map(?\t), do: [?\\, ?t]
    defp escape_map(?\v), do: [?\\, ?v]
    defp escape_map(_), do: false
  end
end