# Copyright (C) 2020 by the Georgia Tech Research Institute (GTRI)
# This software may be modified and distributed under the terms of
# the BSD 3-Clause license. See the LICENSE file for details.
defmodule GitHelper do
@moduledoc """
Collection of lower-level functions for analyzing outputs from git command.
"""
require Logger
@type contrib_count :: %{String.t() => integer}
@doc """
parse_diff/1: returns the relevant information contained in the last array position of a diff array
"""
@spec parse_diff([String.t()]) :: {:ok, non_neg_integer, non_neg_integer, non_neg_integer}
def parse_diff(list) do
last = List.last(list)
last_trimmed = String.trim(last)
commit_info = String.split(last_trimmed, ", ")
file_string = Enum.at(commit_info, 0)
if file_string == nil do
{:ok, 0, 0, 0}
else
insertion_string = Enum.at(commit_info, 1)
if insertion_string == nil do
[file_num | _tail] = String.split(file_string, " ")
{:ok, String.to_integer(file_num), 0, 0}
else
deletion_string = Enum.at(commit_info, 2)
if deletion_string == nil do
[file_num | _tail] = String.split(file_string, " ")
[insertion_num | _tail] = String.split(insertion_string, " ")
{:ok, String.to_integer(file_num), String.to_integer(insertion_num), 0}
else
[file_num | _tail] = String.split(file_string, " ")
[insertion_num | _tail] = String.split(insertion_string, " ")
[deletion_num | _tail] = String.split(deletion_string, " ")
{:ok, String.to_integer(file_num), String.to_integer(insertion_num),
String.to_integer(deletion_num)}
end
end
end
end
@doc """
get_avg_tag_commit_tim_diff/1: return the average time between commits within each subarray representing a tag
"""
def get_avg_tag_commit_time_diff(list) do
get_avg_tag_commit_time_diff(list, [])
end
@doc """
get_total_tag_commit_time_diff/1: return the total time between commits within each subarray representing a tag
"""
def get_total_tag_commit_time_diff(list) do
get_total_tag_commit_time_diff(list, [])
end
@doc """
split_commits_by_tag/1: returns a list with sublists arranged by tag
"""
def split_commits_by_tag(list) do
split_commits_by_tag(list, [])
end
@doc """
get_contributor_counts/1: Gets the number of contributions belonging to each author and return a map of %{name => number}
"""
def get_contributor_counts(list) do
counts = get_contributor_counts(list, %{})
counts
end
@doc """
det_filtered_contributor_count/2: Gets the resolved list of contributers, return count and list
"""
@spec get_filtered_contributor_count(contrib_count, non_neg_integer) ::
{:ok, non_neg_integer, [contrib_count]}
def get_filtered_contributor_count(map, total) do
filtered_list =
Enum.filter(
map,
fn {_key, value} ->
value / total >= 1 / Kernel.map_size(map)
end
)
length = Kernel.length(filtered_list)
{:ok, length, filtered_list}
end
@spec parse_shortlog(String.t()) :: [Contributor.t()]
def parse_shortlog(log) do
split_shortlog(log)
|> Enum.map(fn contributor ->
{name, email, count} = parse_header(contributor)
{merges, commits} = parse_commits(contributor)
{count, _} = Integer.parse(count)
%Contributor{
name: String.trim(name),
email: String.trim(email),
count: count,
merges: merges,
commits: commits
}
end)
|> filter_contributors()
end
defp split_shortlog(log) do
log
|> String.trim()
|> String.split(~r{\n\n})
end
def parse_header(contributor) do
header =
contributor
|> String.split("\n")
|> Enum.at(0)
|> (&Regex.scan(~r{(\d*|[^<]+)<([^;]*)>.\(([^:]+)\)}, &1)).()
cond do
length(header) == 0 ->
Logger.error("Failed to process: " <> contributor)
{"Could not process", "Could not process", "0"}
true ->
header = Enum.at(header, 0)
{Enum.at(header, 1), Enum.at(header, 2), Enum.at(header, 3)}
end
end
defp parse_commits(contributor) do
[_ | commits] = String.split(contributor, "\n")
commits = Enum.map(commits, fn commit -> String.trim(commit) end)
merges = Enum.count(commits, &(&1 =~ ~r/^(merge)+/i))
{merges, commits}
end
defp split_commits_by_tag([], current) do
{:ok, current}
end
defp split_commits_by_tag([first | rest], []) do
split_commits_by_tag(rest, [[first]])
end
defp split_commits_by_tag([first | rest], current) do
[head | _tail] = first
if String.contains?(head, "tag") do
new_current = [[first] | current]
split_commits_by_tag(rest, new_current)
else
[current_head | current_tail] = current
new_current = [[first | current_head] | current_tail]
split_commits_by_tag(rest, new_current)
end
end
defp get_total_tag_commit_time_diff([first | tail], accumulator) do
{:ok, time} = TimeHelper.sum_ts_diff(first)
ret = [time | accumulator]
get_total_tag_commit_time_diff(tail, ret)
end
defp get_total_tag_commit_time_diff([], accumulator) do
{:ok, accumulator}
end
defp get_avg_tag_commit_time_diff([first | tail], accumulator) do
{:ok, time} = TimeHelper.sum_ts_diff(first)
ret = [time / Kernel.length(first) | accumulator]
get_avg_tag_commit_time_diff(tail, ret)
end
defp get_avg_tag_commit_time_diff([], accumulator) do
{:ok, accumulator}
end
defp get_contributor_counts([head | tail], accumulator) do
if head == "" do
get_contributor_counts(tail, accumulator)
else
maybe_new_key = Map.put_new(accumulator, String.trim(head), 0)
{_num, new_value} =
Map.get_and_update(maybe_new_key, head, fn current_value ->
if current_value == nil do
{0, 1}
else
{current_value, current_value + 1}
end
end)
get_contributor_counts(tail, new_value)
end
end
defp get_contributor_counts([], accumulator) do
{:ok, accumulator}
end
defp name_sorter(x) do
# Create a name metric to compare with
10 * length(String.split(x, " ")) + String.length(x)
end
defp filter_contributors([]) do
[]
end
@spec filter_contributors([Contributor.t()]) :: [Contributor.t()]
defp filter_contributors(list) do
is_author = fn x, y -> String.downcase(x.email) == String.downcase(y.email) end
# Divide the list
cur_contrib = for item <- list, is_author.(item, hd(list)) == true, do: item
other = for item <- list, is_author.(item, hd(list)) == false, do: item
# Determine the best name
# for now, just the first one
name_list = for a <- cur_contrib, do: a.name
best_name =
Enum.sort_by(name_list, &name_sorter/1, &>=/2)
|> Enum.at(0)
# Create the new contributor object
contrib_ret = %Contributor{
name: best_name,
email: hd(list).email,
commits: List.flatten(for a <- cur_contrib, do: a.commits),
merges: Enum.sum(for a <- cur_contrib, do: a.merges),
count: Enum.sum(for a <- cur_contrib, do: a.count)
}
[contrib_ret | filter_contributors(other)]
end
end