lib/ex_fuzzywuzzy.ex

defmodule ExFuzzywuzzy do
  @external_resource readme = "README.md"
  @moduledoc """
  ex_fuzzywuzzy is a fuzzy string matching library that uses a customizable measure
  to calculate a distance ratio

  #{readme |> File.read!() |> String.split("<!--MDOC !-->") |> Enum.fetch!(1)}
  """

  alias ExFuzzywuzzy.Algorithms.PartialMatch

  @typedoc """
  Ratio calculator-like signature
  """
  @type ratio_calculator :: (String.t(), String.t() -> float())

  @typedoc """
  Configurable runtime option types
  """
  @type fuzzywuzzy_option ::
          {:similarity_fn, ratio_calculator()}
          | {:case_sensitive, boolean()}
          | {:precision, non_neg_integer()}

  @typedoc """
  Configurable runtime options for ratio
  """
  @type fuzzywuzzy_options :: [fuzzywuzzy_option()]

  @typedoc """
  Ratio methods available that match the full string
  """
  @type full_match_method :: :standard | :quick | :token_sort | :token_set

  @typedoc """
  Ratio methods available that works on the best matching substring
  """
  @type partial_match_method :: :partial | :partial_token_sort | :partial_token_set

  @typedoc """
  All ratio methods available
  """
  @type match_method :: full_match_method() | partial_match_method()

  @doc """
  Calculates the standard ratio between two strings as a percentage.
  It demands the calculus to the chosen measure, standardizing the produced output

  ```elixir
  iex> ratio("this is a test", "this is a test!")
  96.55
  ```
  """
  @spec ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def ratio(left, right, options \\ []) do
    apply_ratio(left, right, &do_ratio/3, options)
  end

  @spec do_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_ratio(left, right, ratio_fn), do: ratio_fn.(left, right)

  @doc """
  Like standard ratio, but ignores any non-alphanumeric character

  ```elixir
  iex> quick_ratio("this is a test", "this is a test!")
  100.0
  ```
  """
  @spec quick_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def quick_ratio(left, right, options \\ []) do
    left
    |> quick_ratio_normalizer()
    |> apply_ratio(quick_ratio_normalizer(right), &do_ratio/3, options)
  end

  @spec quick_ratio_normalizer(String.t()) :: String.t()
  defp quick_ratio_normalizer(string) do
    string
    |> string_normalizer()
    |> Enum.join(" ")
  end

  @doc """
  Calculates the partial ratio between two strings, that is the ratio between
  the best matching m-length substrings

  ```elixir
  iex> partial_ratio("this is a test", "this is a test!")
  100.0

  iex> partial_ratio("yankees", "new york yankees")
  100.0
  ```
  """
  @spec partial_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def partial_ratio(left, right, options \\ []) do
    apply_ratio(left, right, &do_partial_ratio/3, options)
  end

  @spec do_partial_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_partial_ratio(left, right, ratio_fn) do
    left
    |> PartialMatch.matching_blocks(right)
    |> Enum.map(fn %PartialMatch{left_block: left_candidate, right_block: right_candidate} ->
      ratio_fn.(left_candidate, right_candidate)
    end)
    |> Enum.max()
  end

  @doc """
  Calculates the token sort ratio between two strings, that is the ratio calculated
  after tokenizing and sorting alphabetically each string

  ```elixir
  iex> token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
  100.0

  iex> token_sort_ratio("fuzzy muzzy was a bear", "wuzzy fuzzy was a bear")
  77.27
  ```
  """
  @spec token_sort_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def token_sort_ratio(left, right, options \\ []) do
    apply_ratio(left, right, &do_token_sort_ratio/3, options)
  end

  @spec do_token_sort_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_token_sort_ratio(left, right, ratio_fn) do
    left
    |> token_sort_normalizer()
    |> ratio_fn.(token_sort_normalizer(right))
  end

  @spec token_sort_normalizer(String.t()) :: String.t()
  defp token_sort_normalizer(string) do
    string
    |> string_normalizer()
    |> Enum.sort()
    |> Enum.join(" ")
  end

  @doc """
  Like token sort ratio, but a partial ratio - instead of a standard one - is applied

  ```elixir
  iex> partial_token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
  100.0

  iex> partial_token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
  81.25
  ```
  """
  @spec partial_token_sort_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def partial_token_sort_ratio(left, right, options \\ []) do
    apply_ratio(left, right, &do_partial_token_sort_ratio/3, options)
  end

  @spec do_partial_token_sort_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_partial_token_sort_ratio(left, right, _) do
    do_token_sort_ratio(left, right, fn a, b -> partial_ratio(a, b) / 100 end)
  end

  @doc """
  Calculates the token set ratio between two strings, that is the ratio calculated
  after tokenizing each string, splitting in two sets (a set with fully matching tokens,
  a set with other tokens), then sorting on set membership and alphabetically

  ```elixir
  iex> token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
  100.0

  iex> token_set_ratio("fuzzy was a bear", "muzzy wuzzy was a bear")
  78.95
  ```
  """
  @spec token_set_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def token_set_ratio(left, right, options \\ []), do: apply_ratio(left, right, &do_token_set_ratio/3, options)

  @spec do_token_set_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_token_set_ratio(left, right, ratio_fn) do
    left_tokens = token_set_normalizer(left)
    right_tokens = token_set_normalizer(right)

    base =
      left_tokens
      |> MapSet.intersection(right_tokens)
      |> Enum.sort()
      |> Enum.join(" ")
      |> String.trim()

    left_minus_right = token_set_diff(left_tokens, right_tokens, base)

    right_minus_left = token_set_diff(right_tokens, left_tokens, base)

    [
      {base, left_minus_right},
      {base, right_minus_left},
      {left_minus_right, right_minus_left}
    ]
    |> Enum.map(fn {left, right} -> ratio_fn.(left, right) end)
    |> Enum.max()
  end

  @spec token_set_normalizer(String.t()) :: MapSet.t()
  defp token_set_normalizer(string) do
    string
    |> string_normalizer()
    |> MapSet.new()
  end

  @spec token_set_diff(MapSet.t(), MapSet.t(), String.t()) :: String.t()
  defp token_set_diff(left, right, prefix) do
    body =
      left
      |> MapSet.difference(right)
      |> Enum.sort()
      |> Enum.join(" ")

    String.trim(prefix <> " " <> body)
  end

  @doc """
  Like token set ratio, but a partial ratio - instead a full one - is applied

  ```elixir
  iex> partial_token_set_ratio("grizzly was a bear", "a grizzly inside a box")
  100.0

  iex> partial_token_set_ratio("grizzly was a bear", "be what you wear")
  43.75
  ```
  """
  @spec partial_token_set_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def partial_token_set_ratio(left, right, options \\ []) do
    apply_ratio(left, right, &do_partial_token_set_ratio/3, options)
  end

  @spec do_partial_token_set_ratio(String.t(), String.t(), ratio_calculator()) :: float()
  defp do_partial_token_set_ratio(left, right, _) do
    do_token_set_ratio(left, right, fn a, b -> partial_ratio(a, b) / 100 end)
  end

  @doc """
  Calculates the ratio between the strings using various methods, returning the best score and algorithm
  """
  @spec best_score_ratio(String.t(), String.t(), boolean(), fuzzywuzzy_options()) :: {match_method(), float()}
  def best_score_ratio(left, right, partial \\ false, options \\ []) do
    [
      {:standard, &ratio/3},
      {:quick, &quick_ratio/3},
      {:token_sort, &token_sort_ratio/3},
      {:token_set, &token_set_ratio/3}
    ]
    |> Enum.concat(
      if partial do
        [
          {:partial, &partial_ratio/3},
          {:partial_token_sort, &partial_token_sort_ratio/3},
          {:partial_token_set, &partial_token_set_ratio/3}
        ]
      else
        []
      end
    )
    |> Enum.map(fn {method, calculator} -> {method, calculator.(left, right, options)} end)
    |> Enum.max_by(&elem(&1, 1))
  end

  @doc """
  Weighted ratio. Not implemented yet
  """

  @spec weighted_ratio(String.t(), String.t(), fuzzywuzzy_options()) :: float()
  def weighted_ratio(_, _, _) do
    raise "not_implemented"
  end

  @doc """
  Process a list of strings, finding the best match on a string reference. Not implemented yet
  """
  @spec process(String.t(), [String.t()], fuzzywuzzy_options()) :: String.t()
  def process(_, _, _) do
    raise "not_implemented"
  end

  @spec string_normalizer(String.t()) :: [String.t()]
  defp string_normalizer(string), do: String.split(string, ~R/[^[:alnum:]\-]/u, trim: true)

  @spec apply_ratio(
          String.t(),
          String.t(),
          (String.t(), String.t(), ratio_calculator() -> float()),
          fuzzywuzzy_options()
        ) ::
          float()
  defp apply_ratio("", _, _, _), do: 0.0
  defp apply_ratio(_, "", _, _), do: 0.0
  defp apply_ratio(string, string, _, _), do: 100.0

  defp apply_ratio(left, right, ratio_fn, options) do
    {left, right} =
      if get_option(options, :case_sensitive),
        do: {left, right},
        else: {String.upcase(left), String.upcase(right)}

    similarity_fn = get_option(options, :similarity_fn)
    precision = get_option(options, :precision)
    Float.round(100 * ratio_fn.(left, right, similarity_fn), precision)
  end

  @spec get_option(fuzzywuzzy_options(), atom()) :: any()
  defp get_option(options, option) do
    Keyword.get(
      options,
      option,
      Application.get_env(:ex_fuzzywuzzy, option)
    )
  end
end