lib/distributions/distribution_utilities.ex

defmodule Chi2fit.Distribution.Utilities do

  # Copyright 2012-2017 Pieter Rijken
  #
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #     http://www.apache.org/licenses/LICENSE-2.0
  #
  # Unless required by applicable law or agreed to in writing, software
  # distributed under the License is distributed on an "AS IS" BASIS,
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  # See the License for the specific language governing permissions and
  # limitations under the License.

  @moduledoc """
  Provides various distributions.
  """

  import Chi2fit.Statistics
  alias Chi2fit.Distribution, as: D

  @type model :: any

  defmodule UnsupportedDistributionError do
    defexception message: "Unsupported distribution function"
  end

  @doc """
  Returns the model for a name.

  The kurtosis is the so-called 'excess kurtosis'.

  Supported disributions:
      "wald" - The Wald or Inverse Gauss distribution,
      "weibull" - The Weibull distribution,
      "exponential" - The exponential distribution,
      "poisson" - The Poisson distribution,
      "normal" - The normal or Gaussian distribution,
      "frechet" - The Fréchet distribution,
      "nakagami" - The Nakagami distribution,
      "sep" - The Skewed Exponential Power distribution (Azzalini),
      "erlang" - The Erlang distribution,
      "sep0" - The Skewed Exponential Power distribution (Azzalini) with location parameter set to zero (0),
      "tw" / "tw1" - The Tracy-Widom distributions TW1,
      "tw2" - The Tracy-Widom distributions TW2,
      "tw4" - The Tracy-Widom distributions TW4,
      "wishart" - The Wishart distribution.
      
  ## Options
  Available only for the SEP distribution, see 'sepCDF/5'.
  """
  @spec model(name::String.t, options::Keyword.t) :: any
  def model(name, options \\ []) do
    params = options[:pars] || nil

    case name do
      "constant" -> %D.Constant{pars: params}
      "uniform" -> %D.Uniform{pars: params}
      "dice" -> %D.Dice{mode: :regular}
      "dice_gk4" -> %D.Dice{mode: :gk4}
      "wald" -> %D.Wald{pars: params}
      "weibull" -> %D.Weibull{pars: params}
      "exponential" -> %D.Exponential{pars: params}
      "frechet" -> %D.Frechet{pars: params}
      "nakagami" -> %D.Nakagami{pars: params}
      "poisson" -> %D.Poisson{pars: params}
      {"poisson", period} when is_number(period) and period>0 -> %D.Poisson{pars: params, period: period}
      "erlang" -> %D.Erlang{pars: params}
      {"erlang", batches} when is_number(batches) and batches>0 -> %D.Erlang{pars: params, batches: batches}
      "normal" -> %D.Normal{pars: params}
      "sep" -> %D.SEP{pars: params, options: options}
      "sep0" -> %D.SEP{pars: params, offset: 0.0, options: options}
      "tw" -> %D.TracyWidom{pars: params, type: 1}
      "tw1" -> %D.TracyWidom{pars: params, type: 1}
      "tw2" -> %D.TracyWidom{pars: params, type: 2}
      "tw4" -> %D.TracyWidom{pars: params, type: 4}
      "wishart:" <> dim -> %D.Wishart{pars: params, dim: to_numbers(dim)}
      "wishart" -> %D.Wishart{pars: params, dim: options[:dim]}
      {"wishart", [m,n]} when is_integer(m)and is_integer(n) -> %D.Wishart{pars: params, dim: [m,n]}
      unknown ->
        raise UnsupportedDistributionError, message: "Unsupported cumulative distribution function '#{inspect unknown}'"
    end
  end

  defp to_number(string) when is_binary(string) do
    case Integer.parse(string) do
      {val, ""} -> val
      _ -> String.to_float(string)
    end
  end
  defp to_numbers(list), do: String.split(list, " ") |> Enum.map(& to_number &1)

  @doc ~S"""

  ## Examples

      iex> ~M(3 4 5)
      %Distribution.Uniform{pars: [3, 4, 5]}

      iex> ~M(3 4 5)u
      %Distribution.Uniform{pars: [3, 4, 5]}

      iex> ~M()d
      %Distribution.Dice{mode: :regular}

      iex> ~M()dgk
      %Distribution.Dice{mode: :gk4}

      iex> ~M(1.2)p
      %Distribution.Poisson{pars: [1.2], period: 1.0}

      iex> ~M(1.2 5.4)w
      %Distribution.Weibull{pars: [1.2, 5.4]}

      iex> ~M(1.2 5.4)wald
      %Distribution.Wald{pars: [1.2, 5.4]}

  """
  def sigil_M(str, ''), do: %D.Uniform{pars: to_numbers(str)}
  def sigil_M(str, [?u|_]), do: %D.Uniform{pars: to_numbers(str)}
  def sigil_M("", 'coin'), do: %D.Coin{}
  def sigil_M(str, [?c|_]), do: %D.Constant{pars: [to_number(str)]}
  def sigil_M("", 'dgk'), do: %D.Dice{mode: :gk4}
  def sigil_M("", [?d|_]), do: %D.Dice{mode: :regular}

  def sigil_M(str, [?b|_]), do: %D.Bernoulli{pars: [to_number(str)]}

  def sigil_M(str, 'erlangb') do
    [rate, batches] = to_numbers(str)
    %D.Erlang{pars: [rate], batches: batches}
  end
  def sigil_M(str, 'erlang'), do: %D.Erlang{pars: to_numbers(str)}
  def sigil_M(str, [?e|_]), do: %D.Exponential{pars: [to_number(str)]}
  def sigil_M(str, 'pp') do
    [rate, period] = to_numbers(str)
    %D.Poisson{pars: [rate], period: period}
  end
  def sigil_M(str, [?p|_]), do: %D.Poisson{pars: [to_number(str)]}

  def sigil_M(str, 'nakagami'), do: %D.Nakagami{pars: to_numbers(str)}
  def sigil_M(str, [?n|_]), do: %D.Normal{pars: to_numbers(str)}

  def sigil_M(str, 'wald'), do: %D.Wald{pars: to_numbers(str)}
  def sigil_M(str, 'wishart') do
    case String.split(str,"|") do
      [dim,pars] ->  %D.Wishart{pars: to_numbers(pars), dim: to_numbers(dim)}
      [dim] -> %D.Wishart{pars: nil, dim: to_numbers(dim)}
      _else -> %D.Wishart{pars: nil, dim: nil}
    end
  end
  def sigil_M("", [?w|_]), do: %D.Weibull{}
  def sigil_M(str, [?w|_]), do: %D.Weibull{pars: to_numbers(str)}

  def sigil_M(str, [?f|_]), do: %D.Frechet{pars: to_numbers(str)}
  def sigil_M(str, 'sz'), do: %D.SEP{pars: to_numbers(str), offset: 0.0}
  def sigil_M(str, [?s|_]), do: %D.SEP{pars: to_numbers(str)}

  def sigil_M("", 'tw'), do: %D.TracyWidom{pars: nil, type: 1}
  def sigil_M(str, 'tw'), do: %D.TracyWidom{pars: to_numbers(str), type: 1}
  def sigil_M("", 'tww'), do: %D.TracyWidom{pars: nil, type: 2}
  def sigil_M(str, 'tww'), do: %D.TracyWidom{pars: to_numbers(str), type: 2}
  def sigil_M("", 'twwww'), do: %D.TracyWidom{pars: nil, type: 4}
  def sigil_M(str, 'twwww'), do: %D.TracyWidom{pars: to_numbers(str), type: 4}
  
  def sigil_M(_term, modifiers) do
    raise UnsupportedDistributionError, message: "Unsupported modifiers #{inspect modifiers}"
  end

  @doc """
  Guesses what distribution is likely to fit the sample data
  """
  @spec guess(sample::[number], n::integer, list::[String.t] | String.t) :: [any]
  def guess(sample,n \\ 100,list \\ ["exponential","poisson","normal","erlang","wald","sep","weibull","frechet","nakagami","tw1","tw2"])
  def guess(sample,n,list) when is_integer(n) and n>0 and is_list(list) do
    {{skewness,err_s},{kurtosis,err_k}} = sample |> cullen_frey(n) |> cullen_frey_point
    list
    |> Enum.flat_map(
      fn
        distrib ->
          r = sample
          |> guess(n,distrib)
          |> Enum.map(fn {s,k}->((skewness-s)/err_s)*((skewness-s)/err_s) + ((kurtosis-k)/err_k)*((kurtosis-k)/err_k) end)
          |> Enum.min
          [{distrib,r}]
      end)
    |> Enum.sort(fn {_,r1},{_,r2} -> r1<r2 end)
  end
  def guess(_sample,n,distrib) when is_integer(n) and n>0 do
    model = model(distrib)
    params = 1..D.size(model)
    1..n
    |> Enum.map(fn _ -> Enum.map(params, fn _ -> 50*:rand.uniform end) end)
    |> Enum.flat_map(fn
      pars ->
        try do
          s = D.skewness(model).(pars)
          k = D.kurtosis(model).(pars)
          [{s,k}]
        rescue
          _error -> []
        end
    end)
  end

end