lib/git_module.ex

# Copyright (C) 2020 by the Georgia Tech Research Institute (GTRI)
# This software may be modified and distributed under the terms of
# the BSD 3-Clause license. See the LICENSE file for details.

defmodule GitModule do
  @moduledoc """
  Collections of functions for interacting with the `git` command to perform queries.
  """
  require Logger

  @doc """
  clone_repo/2: clones the repo
  """
  @spec clone_repo(String.t(), String.t()) :: {:ok, String.t()} | {:error, String.t()}
  def clone_repo(url, tmp_path) do
    {:ok, slug} = url |> Helpers.get_slug()
    {:ok, _, repo_name} = Helpers.split_slug(slug)

    ## repo_name needs to go to a tmp path struct
    tmp_repo_path = Path.join(tmp_path, repo_name)

    with {:ok, repo} <- Git.clone([url, tmp_repo_path]),
         {:ok, _} <- Git.log(repo) do
      {:ok, repo}
    else
      _error -> {:error, "Repository not found"}
    end
  end

  @doc """
  get_repo/1: gets a repo by path, returns Repository struct
  """
  @spec get_repo(String.t()) :: {:ok, Git.Repository.t()} | {:error, String.t()}
  def get_repo(path) do
    with repo <- Git.new(path),
         {:ok, _} <- Git.status(repo) do
      {:ok, repo}
    else
      {:error, msg} -> {:error, msg}
    end
  end

  @doc """
  get_contributors_count/1: returns the number of contributors for
  a given Git repo
  """
  @spec get_contributor_count(Git.Repository.t()) :: {:ok, non_neg_integer}
  def get_contributor_count(repo) do
    count =
      Git.shortlog!(repo, ["-s", "-n", "HEAD", "--"])
      |> String.trim()
      |> String.split(~r{\s\s+})
      |> Enum.count()

    {:ok, count}
  end

  @doc """
  get_last_commit_date/1: returns the date of the last commit
  """
  @spec get_last_commit_date(Git.Repository.t()) :: {:ok, String.t()}
  def get_last_commit_date(repo) do
    date = List.last(git_log_split(repo, ["-1", "--pretty=format:%cI"]))
    {:ok, date}
  end

  @spec delete_repo(
          atom
          | %{
              :path =>
                binary
                | maybe_improper_list(
                    binary | maybe_improper_list(any, binary | []) | char,
                    binary | []
                  ),
              optional(any) => any
            }
        ) :: [binary]
  def delete_repo(repo) do
    File.rm_rf!(repo.path)
  end

  @doc """
  get_current_hash/1: returns the hash of the repo's HEAD
  """
  @spec get_hash(Git.Repository.t()) :: {:ok, String.t()}
  def get_hash(repo) do
    hash = Git.rev_parse!(repo, "HEAD") |> String.trim()
    {:ok, hash}
  end

  @doc """
  get_default_branch/1: returns the default branch of the remote repo
  """
  @spec get_default_branch(Git.Repository.t()) :: {:ok, String.t()}
  def get_default_branch(repo) do
    try do
      default_branch = Git.symbolic_ref!(repo, "refs/remotes/origin/HEAD") |> String.trim()
      {:ok, default_branch}
    rescue
      _e in Git.Error -> {:ok, "undeterminable, not at HEAD"}
    end
  end

  @doc """
  get_total_commit_count/2: returns the count of commits for a provided branch
  """
  def get_total_commit_count(repo) do
    try do
      count = Git.rev_list!(repo, ["--count", "refs/remotes/origin/HEAD"]) |> String.trim_trailing() |> String.to_integer()
      {:ok, count}
    rescue
      _e in Git.Error -> {:ok, "undeterminable, branch issue"}
    end
  end

  @doc """
  get_commit_dates/1: returns a list of unix timestamps representing commit times
  """
  @spec get_commit_dates(Git.Repository.t()) :: {:ok, [non_neg_integer]}
  def get_commit_dates(repo) do
    dates = git_log_split(repo, ["--pretty=format:%ct"])

    dates_int = Enum.map(dates, fn x -> String.to_integer(x, 10) end)
    {:ok, dates_int}
  end

  @spec get_tag_and_commit_dates(Git.Repository.t()) :: {:ok, [[...]]}
  @doc """
  get_tag_and_commit_dates/1: returns a list of lists of unix timestamps
  representing commit times with each lsit belonging to a different tag
  """
  def get_tag_and_commit_dates(repo) do
    tag_and_date =
      git_log_split(repo, ["--pretty=format:%d$%ct"])
      |> Enum.map(fn element -> String.split(element, "$") end)
      |> Enum.map(fn [head | tail] ->
        if head == "" do
          ["" | String.to_integer(Enum.at(tail, 0), 10)]
        else
          [
            String.trim(String.trim(String.trim(head), "("), ")")
            | String.to_integer(Enum.at(tail, 0), 10)
          ]
        end
      end)

    GitHelper.split_commits_by_tag(tag_and_date)
  end

  @doc """
  get_last_n_commits/1: returns a list of the short hashes of the last n commits
  """
  @spec get_last_n_commits(Git.Repository.t(), non_neg_integer) :: {:ok, [any]}
  def get_last_n_commits(repo, n) do
    output = git_log_split(repo, ["--pretty=format:%h", "--no-merges", "-#{n}"])
    {:ok, output}
  end

  @doc """
  get_last_n_commits/2: returns a list of lines generated from the diff of two commits
  """
  @spec get_diff_2_commits(Git.Repository.t(), [any]) :: {:ok, [String.t()]} | []
  def get_diff_2_commits(repo, [commit1 | [commit2 | []]]) do
    with {:ok, diff} <- Git.diff(repo, ["--stat", commit1, commit2]) do
      {:ok, String.split(String.trim_trailing(diff, "\n"), "\n")}
    else
      _ -> []
    end
  end

  @doc """
  get_total_lines/1: returns the total lines and files contained in a repo as of the latest commit
  """
  @spec get_total_lines(Git.Repository.t()) :: {:ok, non_neg_integer, non_neg_integer}
  def get_total_lines(repo) do
    {:ok, hash} = Git.hash_object(repo, ["-t", "tree", "/dev/null"])
    {:ok, diff} = Git.diff(repo, ["--shortstat", String.replace_suffix(hash, "\n", "")])
    [files_changed | [lines_changed | _tail]] = String.split(diff, ", ")
    [file_num | _tail] = String.split(String.trim(files_changed), " ")
    [line_num | _tail] = String.split(lines_changed, " ")
    {:ok, String.to_integer(line_num), String.to_integer(file_num)}
  end

  @spec get_recent_changes(Git.Repository.t()) :: {:ok, number, number}
  @doc """
  get_recent_changes/1: returns the percentage of changed lines in the last commit by the total lines in the repo
  """
  def get_recent_changes(repo) do
    with {:ok, total_lines, total_files_changed} <- get_total_lines(repo),
         {:ok, file_num, insertions, deletions} = get_last_2_delta(repo) do
      if total_lines == 0 do
        {:ok, 0, 0}
      else
        {:ok, Float.round((insertions + deletions) / total_lines, 5),
         Float.round(file_num / total_files_changed, 5)}
      end
    end
  end

  @doc """
  get_last_2_delta/1: returns the lines changed, files changed, additions and deletions in the last commit
  """
  @spec get_last_2_delta(Git.Repository.t()) ::
          {:ok, non_neg_integer, non_neg_integer, non_neg_integer}
  def get_last_2_delta(repo) do
    {:ok, commits} = get_last_n_commits(repo, 2)

    cond do
      length(commits) >= 2 ->
        {:ok, diffs} = get_diff_2_commits(repo, commits)

        if diffs == [""] do
          {:ok, 0, 0, 0}
        else
          GitHelper.parse_diff(diffs)
        end

      length(commits) < 2 ->
        {:ok, 0, 0, 0}
    end
  end

  @spec get_contributors(Git.Repository.t()) :: {:ok, [Contributor.t()]}
  def get_contributors(repo) do
    list =
      Git.shortlog!(repo, ["-n", "-e", "HEAD", "--"])
      |> String.codepoints()
      |> Enum.map(fn x ->
        if !String.valid?(x) do
          Enum.join(for <<c <- x>>, do: <<c::utf8>>)
        else
          x
        end
      end)
      |> Enum.join()
      |> GitHelper.parse_shortlog()

    {:ok, list}
  end

  @spec get_contributor_distribution(Git.Repository.t()) :: {:ok, map, non_neg_integer}
  def get_contributor_distribution(repo) do
    {:ok, contributors} = get_contributors(repo)
    # Helper function
    get_counts = fn contrib -> contrib.count end
    get_signoff = fn contrib -> contrib.name <> " <" <> contrib.email <> ">" end
    # Calcualte for eache
    counts_kwlist = for a <- contributors, do: {get_signoff.(a), get_counts.(a)}
    counts = Enum.into(counts_kwlist, %{})
    # Calculate for all
    total_contributions = Enum.sum(for a <- contributors, do: get_counts.(a))
    {:ok, counts, total_contributions}
  end

  @spec get_functional_contributors(Git.Repository.t()) :: {:ok, non_neg_integer, [any]}
  def get_functional_contributors(repo) do
    {:ok, counts, total} = get_contributor_distribution(repo)
    {:ok, length, filtered_list} = GitHelper.get_filtered_contributor_count(counts, total)
    {:ok, length, Enum.map(filtered_list, fn {name, _value} -> name end)}
  end

  @doc """
  get_contributions_map/1: returns a map of contributions per git user
  note: this map is unfiltered, dupes aren't identified
  """
  @spec get_contributions_map(Git.Repository.t()) ::
          {:ok, [%{contributions: non_neg_integer, name: String.t()}]}
  def get_contributions_map(repo) do
    {:ok, contrib} = get_contributors(repo)

    map =
      Enum.map(
        contrib,
        fn x -> %{:name => x.name,
                  :contributions => x.count,
                  :last_contribution_date => get_last_contribution_date_by_contributor(repo, x.name)
                  } end
      )

    {:ok, map}
  end

  @spec get_clean_contributions_map(Git.Repository.t()) :: {:ok, list}
  def get_clean_contributions_map(repo) do
    map =
      Git.shortlog!(repo, ["-n", "-e", "HEAD", "--"])
      |> GitHelper.parse_shortlog()
      |> Enum.map(fn contributor ->
        name =
          cond do
            contributor.name == nil -> "UNKNOWN"
            contributor.name == "" -> "UNKNOWN"
            contributor.name != "" -> raw_binary_to_string(contributor.name)
          end

        %{
          name: raw_binary_to_string(name),
          contributions: contributor.count,
          merges: contributor.merges,
          email: contributor.email,
          last_contribution_date: contributor.last_contribution_date
        }
      end)

    {:ok, map}
  end

  @doc """
      get_top10_contributors_map/1: Gets the top 10 contributors and returns it
      as a list of contributors with the commits list stripped from the map.
  """
  @spec get_top10_contributors_map(Git.Repository.t()) :: {:ok, [any]}
  def get_top10_contributors_map(repo) do
    {:ok, contrib} = get_contributors(repo)

    map10 =
      Enum.sort_by(contrib, & &1.count, &>=/2)
      |> Stream.take(10)
      |> Stream.map(fn x ->
        Map.put(x, :contributions, x.count)
      end)
      |> Stream.map(fn x ->
        Map.put(x, :last_contribution_date, get_last_contribution_date_by_contributor(repo, x.name))
      end)
      |> Stream.map(fn x ->
        Map.drop(x, [:commits, :count, :__struct__])
      end)
      |> Enum.to_list()

    {:ok, map10}
  end

  @doc """
  get_last_contribution_date_by_contributor/1: returns the date of the last author or commit whichever
  is more recent.
  """
  def get_last_contribution_date_by_contributor(repo, contributor) do
    ## Using author here, as even if there is a different committer, the author is the contributor
    author_date = List.last(git_log_split(repo, ["--author=#{contributor}", "-1", "--pretty=format:%cI"]))
    author_date
  end

  @spec get_repo_size(Git.Repository.t()) :: {:ok, String.t()}
  def get_repo_size(repo) do
    space =
      elem(System.cmd("git", ["count-objects"], cd: repo.path), 0)
      |> String.trim()
      |> String.split(",")
      |> Enum.at(1)
      |> String.trim()
      |> String.split(" ")
      |> List.first()

    {:ok, space}
  end

  @spec raw_binary_to_string(binary) :: String.t()
  defp raw_binary_to_string(raw) do
    String.codepoints(raw)
    |> Enum.reduce(fn w, result ->
      cond do
        String.valid?(w) ->
          result <> w

        true ->
          <<parsed::8>> = w
          result <> <<parsed::utf8>>
      end
    end)
  end

  # This is a replacement for Git.log!() and String.split() to split out warning tags.
  # Unless we can find a command for Git.log! which can separate out "warning:" tags,
  # we need to manually parse it out here

  @spec git_log_split(Git.Repository.t(), [String.t()]) :: [String.t()]
  defp git_log_split(repo, args) do
    Git.log!(repo, args)
    |> String.split("\n")
    |> Enum.filter(fn x ->
      if not String.contains?(x, "warning:") do
        x
      end
    end)
  end
end