lib/akin/algorithms/jaro_winkler.ex

defmodule Akin.JaroWinkler do
  @moduledoc """
  Calculates the [Jaro-Winkler Distance](http://en.wikipedia.org/wiki/
  Jaro-Winkler_distance) between two strings.
  """
  @behaviour Akin.Task
  alias Akin.Corpus

  @spec compare(%Corpus{}, %Corpus{}) :: float()
  @spec compare(%Corpus{}, %Corpus{}, Keyword.t()) :: float()
  @doc """
  Calculates the Jaro-Winkler distance between two strings.

  ## Examples

    iex> Akin.JaroWinkler.compare(%Akin.Corpus{string: "abc"}, %Akin.Corpus{string: ""})
    0.0
    iex> Akin.JaroWinkler.compare(%Akin.Corpus{string: "abc"}, %Akin.Corpus{string: "xyz"})
    0.0
    iex> Akin.JaroWinkler.compare(%Akin.Corpus{string: "compare me"}, %Akin.Corpus{string: "compare me"})
    1.0
    iex> Akin.JaroWinkler.compare(%Akin.Corpus{string: "natural"}, %Akin.Corpus{string: "nothing"})
    0.5714285714285714
  """
  def compare(%Corpus{string: left}, %Corpus{string: right}, _opts), do: compare(left, right)

  def compare(%Corpus{string: left}, %Corpus{string: right}) do
    compare(left, right)
  end

  def compare(left, right) when is_binary(left) and is_binary(right) do
    left_length = String.length(left)
    right_length = String.length(right)

    cond do
      left_length == 0 or right_length == 0 ->
        0.0

      left == right ->
        1.0

      left_length > right_length ->
        score = score(right, left)
        modified_prefix = modify_prefix(right, left)
        score + modified_prefix * (1 - score) / 10

      true ->
        score = score(left, right)
        modified_prefix = modify_prefix(left, right)
        score + modified_prefix * (1 - score) / 10
    end
  end

  def compare(_, _), do: nil

  @spec score(binary(), binary()) :: integer()
  @doc """
  Score the distance between two strings using String.jaro/2.
  """
  def score(left, right) when is_binary(left) and is_binary(right) do
    left_length = String.length(left)
    right_length = String.length(right)

    if left_length == 0 or right_length == 0 do
      0
    else
      String.jaro_distance(left, right)
    end
  end

  def score(_, _), do: nil

  @spec score(binary(), binary()) :: integer()
  @doc """
  Modifies the prefix scale, which gives a more favorable rating to strings
  that match from the beginning.
  """
  def modify_prefix(left, right) do
    modify_prefix(left, right, 0, Enum.min([4, String.length(left)]))
  end

  def modify_prefix(left, right, prefix_length, last_character) do
    if prefix_length < last_character &&
         String.at(left, prefix_length) == String.at(right, prefix_length) do
      modify_prefix(left, right, prefix_length + 1, last_character)
    else
      prefix_length
    end
  end
end