lib/xfile.ex

defmodule Xfile do
  @moduledoc """
  `Xfile` contains augmentations of the built-in `File` module, including the
  support of streams, the recursive listing of files, counting lines, grep, and
  programmatic filtering.
  """

  @doc """
  Like the venerable command-line utility, `grep` searches lines in the given file
  using the given pattern, returning only the matching lines as a stream.

  The given pattern can be one of the following:

  - an arity 1 function which returns a boolean; `true` indicates a match.
  - a string
  - a list of strings
  - a regular expression

  See `String.contains?/2` for viable inputs.

  > #### Stream {: .info}
  >
  > `Xfile.grep/2` returns its result as a `Stream`, so you must remember to convert
  > it to a list via `Enum.to_list/1` if you are not lazily evaluating its result.

  ## Examples

      iex> Xfile.grep(~r/needle/, "path/to/file")
      #Function<59.58486609/2 in Stream.transform/3>

      iex> Xfile.grep("dir", ".gitignore") |> Enum.to_list()
      ["# The directory Mix will write compiled artifacts to.\\n",
      "# The directory Mix downloads your dependencies sources to.\\n"]

  Using a function to evaluate file lines:

      iex> f = fn line ->
        [serial_number, _] = String.split(line, " ")
        String.to_integer(num) > 214
      end
      iex> Xfile.grep(f, "store/products.csv") |> Enum.to_list()
      ["215,Sprocket,9.99\\n", "216,Gear,5.00\\n", ...]
  """
  @doc since: "0.2.0"
  @spec grep(pattern :: String.pattern() | (String.t() -> boolean()), file :: Path.t()) ::
          Enumerable.t()
  def grep(pattern, file) when is_function(pattern, 1) do
    file
    |> File.stream!()
    |> Stream.filter(fn line -> pattern.(line) end)
  end

  def grep(%Regex{} = pattern, file) do
    file
    |> File.stream!()
    |> Stream.filter(fn line -> Regex.match?(pattern, line) end)
  end

  def grep(pattern, file) do
    file
    |> File.stream!()
    |> Stream.filter(fn line -> String.contains?(line, pattern) end)
  end

  @doc """
  This function mimics the functionality of `grep -rl`: it recursively searches
  all files in the given path, returning only a list of file names (i.e. paths)
  whose contents have one or more lines that match the pattern.

  ## Pattern

  The given pattern can be one of the following:

  - an arity 1 function which returns a boolean; `true` indicates a match.
  - a string
  - a list of strings
  - a regular expression

  Internally, this relies on `grep/2`.

  > #### Stream {: .info}
  >
  > `Xfile.grep_rl/3` returns its result as a `Stream`, so you must
  > remember to convert it to a list via `Enum.to_list/1` if you are not lazily
  > evaluating its result.

  ## Options

  Options are the same as those supported by `ls/2`. Use them to control which files
  are subjected to the pattern matching.

  ## Examples

      iex> Xfile.grep_rl("[error]", "tmp/logs", recursive: false) |> Enum.to_list()
      [
        "tmp/logs/server.1.log",
        "tmp/logs/cache.log",
        "tmp/logs/server.2.log"
      ]

  ## See Also

  - `grep/2` for searching a single file and returning the matching lines
  - `ls/2` using the `:filter` option to evaluate only the _names_ of the files.
  """
  @doc since: "0.2.0"
  @spec grep_rl(pattern :: String.pattern(), path :: Path.t(), opts :: Keyword.t()) ::
          Enumerable.t()
  def grep_rl(pattern, path, opts \\ []) do
    path
    |> ls!(opts)
    |> Stream.filter(fn file ->
      pattern
      |> grep(file)
      |> Enum.count()
      |> Kernel.>(0)
    end)
  end

  @doc """
  Displays first `n` lines of the file, returned as an enumerable stream.

  ## Examples

      iex> Xfile.head(".gitignore", 3) |> Enum.to_list()
      [
        "# The directory Mix will write compiled artifacts to.\\n",
        "/_build/\\n",
        "\\n"
      ]
  """
  @doc since: "0.3.0"
  @spec head(file :: Path.t(), n :: non_neg_integer()) :: Enumerable.t()
  def head(file, n) when is_binary(file) and is_integer(n) and n > 0 do
    file
    |> File.stream!()
    |> Stream.transform(0, fn line, acc ->
      if acc < n, do: {[line], acc + 1}, else: {:halt, acc}
    end)
  end

  @doc """
  Counts the number of lines in the given file, offering functionality similar to `wc -l`.
  Directories are not allowed. This is just some sugar around `File.stream!/1`.

  > #### Newlines {: .info}
  >
  > This function technically counts new lines, which may result in "off-by-one"
  > errors when the last line of a file is not terminated with a newline.

  ## Examples

      iex> Xfile.line_count(".gitignore")
      {:ok, 27}
      iex> Xfile.line_count("/tmp"}
      {:error, "Invalid input"}
  """
  @doc since: "0.2.0"
  @spec(line_count(file :: Path.t()) :: {:ok, non_neg_integer()}, {:error, any()})
  def line_count(file) when is_binary(file) do
    file
    |> File.dir?()
    |> case do
      true ->
        {:error, "Invalid input"}

      false ->
        {:ok,
         file
         |> File.stream!()
         |> Enum.count()}
    end
  end

  @doc """
  As `Xfile.line_count/1`, but returns raw results on success or raises on `:error`.

  ## Examples

      iex> Xfile.line_count!(".gitignore")
      27
  """
  @doc since: "0.2.0"
  @spec line_count!(file :: Path.t()) :: non_neg_integer() | none()
  def line_count!(file) when is_binary(file) do
    file
    |> File.stream!()
    |> Enum.count()
  end

  @doc """
  Returns the list of _files_ in the given directory with the ability to control
  listing files recursively and filtering results programmatically.

  > #### Stream {: .info}
  >
  > Unlike `File.ls/1`, `Xfile.ls/2` returns its result as a `Stream`, so you must
  > remember to convert it to a list via `Enum.to_list/1` if you are not lazily
  > evaluating its result.

  ## Differences between `File.ls/1`

  - `Xfile.ls/2` returns results as a `Stream`
  - `Xfile.ls/2` returns full paths (relative or absolute) instead of just basenames.

  ## Options

  - `:recursive` indicates whether the directory and its subdirectories should be
    recursively searched. This can be expressed either as a simple boolean or as a
    positive integer indicating the maximum depth (where `false` is equivalent to `0`
    and would list only the contents of the given directory). Default: `true`

  - `:filter` can be either a regular expression to be used with `String.match?/2`,
    a string or a list of strings to be used with `String.contains?/2`, OR an
    arity 1 function that receives the full file path and returns a boolean value.
    If the filter operation returns `true`, the file will be included in the
    output. Any other output will cause the file to be filtered from the output. Optional.

  - `:show_dirs?` boolean. When listing the contents of a directory that contains
    sub-directories _and_ `:recursive` option is not `true`, this boolean controls
    whether or not the sub-directories should be included in the output (provided
    they pass any defined `:filter`). This option is ignored when `:recursive` is
    `true`. Setting this option to `true` will yield results closer to what `File.ls/1`
    returns. Default: `false`.

  ## Examples

  Use a regular expression to return only `.txt` files:

      iex> {:ok, stream} = Xfile.ls("path/to/files", filter: ~r/\\.txt$/)
      {:ok, #Function<59.58486609/2 in Stream.transform/3>}
      iex> Enum.to_list(stream)
      [
        "path/to/files/a.txt",
        "path/to/files/b.txt",
        "path/to/files/subdir/c.txt"
      ]

  Use a function to apply more complex logic to filter the results:

      iex> {:ok, stream} = Xfile.ls("mydir", filter: fn x ->
        stat = File.stat!(x)
        stat.size > 1024
      end)
      {:ok, #Function<59.58486609/2 in Stream.transform/3>}
      iex> Enum.to_list(stream)
      [
        "mydir/big-file",
        "mydir/big-file2",
        # ...
      ]

  Limit the depth of the recursion to the given directory and its subdirectories,
  but no further:

      iex> {:ok, stream} = Xfile.ls("top/dir", recursive: 1)
      {:ok, #Function<59.58486609/2 in Stream.transform/3>}
      iex> Enum.to_list(stream)
      [
        "top/dir/a",
        "top/dir/b",
        # ...
        "top/dir/sub1/x",
        "top/dir/sub1/y"
      ]

  """
  @spec ls(directory :: Path.t(), opts :: Keyword.t()) :: {:ok, Enumerable.t()} | {:error, any()}
  def ls(directory, opts \\ []) when is_binary(directory) do
    max_depth =
      opts
      |> Keyword.get(:recursive, true)
      |> case do
        false -> 0
        other -> other
      end

    case File.dir?(directory) do
      true -> {:ok, directory |> File.ls() |> traverse(directory, opts, 0, max_depth)}
      false -> {:error, "#{directory} is not a directory"}
    end
  end

  @doc """
  As `Xfile.ls/2`, but returns raw results on success or raises on `:error`.
  """
  @spec ls!(directory :: Path.t(), opts :: Keyword.t()) :: Enumerable.t() | none()
  def ls!(directory, opts \\ []) when is_binary(directory) do
    case ls(directory, opts) do
      {:ok, results} -> results
      {:error, error} -> raise error
    end
  end

  @doc """
  Displays the last `n` lines of the file, returned as an enumerable stream.

  ## Examples

      iex> Xfile.tail(".gitignore", 3) |> Enum.to_list()
      [
        "\\n",
        "# Temporary files for e.g. tests\\n",
        "/tmp\\n"
      ]
  """
  @doc since: "0.3.0"
  @spec tail(file :: Path.t(), n :: non_neg_integer()) :: Enumerable.t()
  def tail(file, n) when is_binary(file) and is_integer(n) and n > 0 do
    start_line = line_count!(file) - n

    file
    |> File.stream!()
    |> Stream.transform(0, fn line, acc ->
      if acc >= start_line, do: {[line], acc + 1}, else: {[], acc + 1}
    end)
  end

  # `traverse/2` receives the result of `File.ls/1`, which acts as like `File.dir?/2`.
  # If the result is `:ok`, we proceed deeper into the directory structure.
  # If the result is an `:error`, then the path being evaluated is accumulated as a file.
  defp traverse({:ok, files}, path, opts, current_depth, max_depth)
       when max_depth == true or current_depth < max_depth do
    files
    |> Stream.flat_map(fn f ->
      "#{path}/#{f}" |> File.ls() |> traverse("#{path}/#{f}", opts, current_depth + 1, max_depth)
    end)
  end

  # at max depth
  defp traverse({:ok, files}, path, opts, _current_depth, _max_depth) do
    show_dirs? = Keyword.get(opts, :show_dirs?, false)
    filter = Keyword.get(opts, :filter)

    files
    |> Stream.flat_map(fn f ->
      cond do
        !File.dir?("#{path}/#{f}") && filter_file("#{path}/#{f}", filter) ->
          ["#{path}/#{f}"]

        File.dir?("#{path}/#{f}") && show_dirs? && filter_file("#{path}/#{f}", filter) ->
          ["#{path}/#{f}"]

        true ->
          []
      end
    end)
  end

  defp traverse({:error, _}, file, opts, _, _) do
    case filter_file(file, Keyword.get(opts, :filter)) do
      true -> [file]
      _ -> []
    end
  end

  defp filter_file(_, nil), do: true

  defp filter_file(file, function) when is_function(function, 1), do: function.(file)

  defp filter_file(file, %Regex{} = regex), do: String.match?(file, regex)

  defp filter_file(file, pattern), do: String.contains?(file, pattern)
end