defmodule Statistics do
alias Statistics.Math
@moduledoc """
Descriptive statistics functions
"""
@doc """
Sum the contents of a list
Calls Enum.sum/1
"""
@spec sum([number]) :: number
def sum(list) when is_list(list), do: do_sum(list, 0)
defp do_sum([], t), do: t
defp do_sum([x | xs], t), do: do_sum(xs, t + x)
@doc """
Calculate the mean from a list of numbers
## Examples
iex> Statistics.mean([])
nil
iex> Statistics.mean([1,2,3])
2.0
"""
@spec mean([number]) :: float() | nil
def mean(list) when is_list(list), do: do_mean(list, 0, 0)
defp do_mean([], 0, 0), do: nil
defp do_mean([], t, l), do: t / l
defp do_mean([x | xs], t, l) do
do_mean(xs, t + x, l + 1)
end
@doc """
Get the median value from a list.
## Examples
iex> Statistics.median([])
nil
iex> Statistics.median([1,2,3])
2
iex> Statistics.median([1,2,3,4])
2.5
"""
@spec median([number]) :: number | nil
def median([]), do: nil
def median(list) when is_list(list) do
midpoint =
(length(list) / 2)
|> Float.floor()
|> round
{l1, l2} =
Enum.sort(list)
|> Enum.split(midpoint)
case length(l2) > length(l1) do
true ->
[med | _] = l2
med
false ->
[m1 | _] = l2
[m2 | _] = Enum.reverse(l1)
mean([m1, m2])
end
end
@doc """
Get the most frequently occuring value
## Examples
iex> Statistics.mode([])
nil
iex> Statistics.mode([1,2,3,2,4,5,2,6,7,2,8,9])
2
"""
@spec mode([number]) :: number | nil
def mode([]), do: nil
def mode(list) when is_list(list) do
h = hist(list)
max = Map.values(h) |> Enum.max()
h |> Enum.find(fn {_, val} -> val == max end) |> elem(0)
end
@doc """
Get a frequency count of the values in a list
## Examples
iex> Statistics.hist([])
nil
iex> Statistics.hist([1,2,3,2,4,5,2,5,1,2,5,5])
%{1 => 2, 2 => 4, 3 => 1, 4 => 1, 5 => 4}
"""
@spec hist([number]) :: map | nil
def hist([]), do: nil
def hist(list) when is_list(list) do
list
|> Enum.reduce(%{}, fn tag, acc -> Map.update(acc, tag, 1, &(&1 + 1)) end)
end
@doc """
Get the minimum value from a list
iex> Statistics.min([])
nil
iex> Statistics.min([1,2,3])
1
If a non-empty list is provided, it is a call to Enum.min/1
"""
@spec min([number]) :: number | nil
def min([]), do: nil
def min(list) do
Enum.min(list)
end
@doc """
Get the maximum value from a list
iex> Statistics.max([])
nil
iex> Statistics.max([1,2,3])
3
If a non-empty list is provided, it is a call to Enum.max/1
"""
@spec max([number]) :: number | nil
def max([]), do: nil
def max(list) do
Enum.max(list)
end
@doc """
Get the quartile cutoff value from a list
responds to only first and third quartile.
## Examples
iex> Statistics.quartile([1,2,3,4,5,6,7,8,9],:first)
3
iex> Statistics.quartile([1,2,3,4,5,6,7,8,9],:third)
7
"""
# TODO change these to call `percentile/2`
@spec quartile([number], :first | :third) :: number
def quartile(list, :first) do
list |> split |> elem(0) |> median
end
def quartile(list, :third) do
list |> split |> elem(1) |> median
end
@doc """
Get the nth percentile cutoff from a list
## Examples
iex> Statistics.percentile([], 50)
nil
iex> Statistics.percentile([1], 50)
1
iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],80)
7.4
iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],100)
9
"""
@spec percentile([number], number) :: number | nil
def percentile([], _), do: nil
def percentile([x], _), do: x
def percentile(list, 0), do: min(list)
def percentile(list, 100), do: max(list)
def percentile(list, n) when is_list(list) and is_number(n) do
s = Enum.sort(list)
r = n / 100.0 * (length(list) - 1)
f = :erlang.trunc(r)
lower = Enum.at(s, f)
upper = Enum.at(s, f + 1)
lower + (upper - lower) * (r - f)
end
@doc """
Get range of data
## Examples
iex> Statistics.range([1,2,3,4,5,6])
5
"""
@spec range([number]) :: number | nil
def range([]), do: nil
def range(list) when is_list(list) do
max(list) - min(list)
end
@doc """
Calculate the inter-quartile range
## Examples
iex> Statistics.iqr([])
nil
iex> Statistics.iqr([1,2,3,4,5,6,7,8,9])
4
"""
@spec iqr([number]) :: number | nil
def iqr([]), do: nil
def iqr(list) when is_list(list) do
{first, second} = split(list)
median(second) - median(first)
end
@doc """
Calculate variance from a list of numbers
## Examples
iex> Statistics.variance([])
nil
iex> Statistics.variance([1,2,3,4])
1.25
iex> Statistics.variance([55,56,60,65,54,51,39])
56.48979591836735
"""
@spec variance([number]) :: number | nil
def variance([]), do: nil
def variance(list) when is_list(list) do
list_mean = mean(list)
list |> Enum.map(fn x -> (list_mean - x) * (list_mean - x) end) |> mean
end
@doc """
Calculate the standard deviation of a list
## Examples
iex> Statistics.stdev([])
nil
iex> Statistics.stdev([1,2])
0.5
"""
@spec stdev([number]) :: number | nil
def stdev([]), do: nil
def stdev(list) do
list |> variance |> Math.sqrt()
end
@doc """
Calculate the trimmed mean of a list.
Can specify cutoff values as a tuple, or simply choose the IQR min/max as the cutoffs
## Examples
iex> Statistics.trimmed_mean([], :iqr)
nil
iex> Statistics.trimmed_mean([1,2,3], {1,3})
2.0
iex> Statistics.trimmed_mean([1,2,3,4,5,5,6,6,7,7,8,8,10,11,12,13,14,15], :iqr)
7.3
"""
@spec trimmed_mean([number], atom | tuple) :: number | nil
def trimmed_mean([], _), do: nil
def trimmed_mean(list, :iqr) do
{first, second} = split(list)
trimmed_mean(list, {median(first), median(second)})
end
def trimmed_mean(list, {low, high}) do
list |> Enum.reject(fn x -> x < low or x > high end) |> mean
end
@doc """
Calculates the harmonic mean from a list
Harmonic mean is the number of values divided by
the sum of the reciprocal of all the values.
## Examples
iex> Statistics.harmonic_mean([])
nil
iex> Statistics.harmonic_mean([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
4.5204836768674568
"""
@spec harmonic_mean([number]) :: number | nil
def harmonic_mean([]), do: nil
def harmonic_mean(list) when is_list(list) do
do_harmonic_mean(list, 0, 0)
end
defp do_harmonic_mean([], t, l), do: l / t
defp do_harmonic_mean([x | xs], t, l) do
do_harmonic_mean(xs, t + 1 / x, l + 1)
end
@doc """
Calculate the geometric mean of a list
Geometric mean is the nth root of the product of n values
## Examples
iex> Statistics.geometric_mean([])
nil
iex> Statistics.geometric_mean([1,2,3])
1.8171205928321397
"""
@spec geometric_mean([number]) :: number | nil
def geometric_mean([]), do: nil
def geometric_mean(list) when is_list(list) do
do_geometric_mean(list, 1, 0)
end
defp do_geometric_mean([], p, l), do: Math.pow(p, 1 / l)
defp do_geometric_mean([x | xs], p, l) do
do_geometric_mean(xs, p * x, l + 1)
end
@doc """
Calculates the nth moment about the mean for a sample.
Generally used to calculate coefficients of skewness and kurtosis.
Returns the n-th central moment as a float
The denominator for the moment calculation is the number of
observations, no degrees of freedom correction is done.
## Examples
iex> Statistics.moment([1,2,3,4,5,6,7,8,9,8,7,6,5,4,3],3)
-1.3440000000000025
iex> Statistics.moment([], 2)
nil
"""
@spec moment([number], pos_integer) :: number | nil
def moment(list, n \\ 1)
# empty list has no moment
def moment([], _), do: nil
# By definition the first moment about the mean is 0.
def moment(_, 1), do: 0.0
# Otherwise
def moment(list, n) when is_list(list) and is_number(n) do
lmean = mean(list)
list |> Enum.map(&Math.pow(&1 - lmean, n)) |> mean
end
@doc """
Computes the skewness of a data set.
For normally distributed data, the skewness should be about 0. A skewness
value > 0 means that there is more weight in the left tail of the
distribution.
## Examples
iex> Statistics.skew([])
nil
iex> Statistics.skew([1,2,3,2,1])
0.3436215967445454
"""
@spec skew([number]) :: number | nil
def skew([]), do: nil
def skew(list) do
m2 = moment(list, 2)
m3 = moment(list, 3)
m3 / Math.pow(m2, 1.5)
end
@doc """
Computes the kurtosis (Fisher) of a list.
Kurtosis is the fourth central moment divided by the square of the variance.
## Examples
iex> Statistics.kurtosis([])
nil
iex> Statistics.kurtosis([1,2,3,2,1])
-1.1530612244897964
"""
@spec kurtosis([number]) :: number | nil
def kurtosis([]), do: nil
def kurtosis(list) do
m2 = moment(list, 2)
m4 = moment(list, 4)
# pearson
p = m4 / Math.pow(m2, 2.0)
# fisher
p - 3
end
@doc """
Calculate a standard `z` score for each item in a list
## Examples
iex> Statistics.zscore([3,2,3,4,5,6,5,4,3])
[-0.7427813527082074, -1.5784103745049407, -0.7427813527082074,
0.09284766908852597, 0.9284766908852594, 1.7641057126819928,
0.9284766908852594, 0.09284766908852597, -0.7427813527082074]
"""
@spec zscore([number]) :: list | nil
def zscore(list) when is_list(list) do
lmean = mean(list)
lstdev = stdev(list)
for n <- list, do: (n - lmean) / lstdev
end
@doc """
Calculate the the Pearson product-moment correlation coefficient of two lists.
The two lists are presumed to represent matched pairs of observations, the `x` and `y` of a simple regression.
## Examples
iex> Statistics.correlation([1,2,3,4], [1,3,5,6])
0.9897782665572894
"""
@spec correlation([number], [number]) :: number
def correlation(x, y) when length(x) == length(y) do
xmean = mean(x)
ymean = mean(y)
numer =
Enum.zip(x, y)
|> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end)
|> sum
denom_x =
x
|> Enum.map(fn xi -> (xi - xmean) * (xi - xmean) end)
|> sum
denom_y =
y
|> Enum.map(fn yi -> (yi - ymean) * (yi - ymean) end)
|> sum
numer / Math.sqrt(denom_x * denom_y)
end
@doc """
Calculate the covariance of two lists.
Covariance is a measure of how much two random variables change together.
The two lists are presumed to represent matched pairs of observations, such as the `x` and `y` of a simple regression.
## Examples
iex> Statistics.covariance([1,2,3,2,1], [1,4,5.2,7,99])
-17.89
"""
@spec covariance([number], [number]) :: number
def covariance(x, y) when length(x) == length(y) do
xmean = mean(x)
ymean = mean(y)
size = length(x)
Enum.zip(x, y)
|> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end)
|> Enum.map(fn i -> i / (size - 1) end)
|> sum
end
## helpers and other flotsam
import Integer, only: [is_even: 1, is_odd: 1]
# Split a list into two equal lists.
# Needed for getting the quartiles.
defp split(list) when is_list(list) do
do_split(Enum.sort(list), length(list))
end
defp do_split(sorted_list, l) when is_even(l) do
m = :erlang.trunc(l / 2)
{Enum.take(sorted_list, m), Enum.drop(sorted_list, m)}
end
defp do_split(sorted_list, l) when is_odd(l) do
m = :erlang.trunc((l + 1) / 2)
{Enum.take(sorted_list, m), Enum.drop(sorted_list, m - 1)}
end
end