lib/akin/algorithms/levenshtein.ex

defmodule Akin.Levenshtein do
  @moduledoc """
  Compare two strings for their Levenshtein score.
  """
  @behaviour Akin.Task
  alias Akin.Corpus

  @spec compare(list(), list()) :: float()
  @spec compare(%Corpus{}, %Corpus{}) :: float()
  @spec compare(%Corpus{}, %Corpus{}, Keyword.t()) :: float()
  @doc """
  The score is determined by finding the edit
  distance: the minimum number of single-character edits needed to change one word into the other.
  The distance is substracted from 1.0 and then divided by the longest length between the two strings.
  Result is rounded to the tenths.
  """
  def compare(%Corpus{string: left}, %Corpus{string: right}, _opts), do: compare(left, right)

  def compare(string, string), do: 1

  def compare(string, []), do: length(string)

  def compare([], string), do: length(string)

  def compare(left, right)
      when is_list(left) and is_list(right) do
    rec_lev(left, right, :lists.seq(0, length(right)), 1)
  end

  def compare(left, right) when is_binary(left) and is_binary(right) do
    distance = compare(String.graphemes(left), String.graphemes(right))
    1.0 - distance / Enum.max([String.length(left), String.length(right)])
  end

  defp rec_lev([src_head | src_tail], right, distlist, step) do
    rec_lev(src_tail, right, lev_dist(right, distlist, src_head, [step], step), step + 1)
  end

  defp rec_lev([], _right, distlist, _step) do
    List.last(distlist)
  end

  defp lev_dist(
         [right_head | right_tail],
         [distlist_head | distlist_tail],
         left_char,
         new_distlist,
         last_dist
       )
       when distlist_tail > 0 do
    min =
      Enum.min([
        last_dist + 1,
        hd(distlist_tail) + 1,
        distlist_head + delta(right_head, left_char)
      ])

    lev_dist(right_tail, distlist_tail, left_char, new_distlist ++ [min], min)
  end

  defp lev_dist([], _, _, new_distlist, _), do: new_distlist

  defp delta(a, a), do: 0

  defp delta(_a, _b), do: 1
end