defmodule Akin.Overlap do
@moduledoc """
Implements the Overlap Similarity Metric.
"""
@behaviour Akin.Task
import Akin.Util, only: [ngram_tokenize: 2, opts: 2, intersect: 2]
alias Akin.Corpus
@spec compare(%Corpus{}, %Corpus{}, Keyword.t()) :: float()
@doc """
Compares two values using the Overlap Similarity metric and returns the
coefficient. It takes the ngram size as the third argument.
## Examples
iex> Akin.Overlap.compare(%Akin.Corpus{string: "compare me"}, %Akin.Corpus{string: "to me"}, [])
0.5
iex> Akin.Overlap.compare(%Akin.Corpus{string: "compare me"}, %Akin.Corpus{string: "to me"}, [ngram_size: 1])
0.8
iex> Akin.Overlap.compare(%Akin.Corpus{string: "or me"}, %Akin.Corpus{string: "me"}, [ngram_size: 1])
1.0
"""
def compare(%Corpus{} = left, %Corpus{} = right, opts) do
perform(left, right, opts(opts, :ngram_size))
end
defp perform(%Corpus{string: left}, %Corpus{string: right}, n) when is_integer(n) do
cond do
n <= 0 || String.length(left) < n || String.length(right) < n ->
nil
left == right ->
1.0
true ->
tokens_left = ngram_tokenize(left, n)
tokens_right = ngram_tokenize(right, n)
ms = tokens_left |> intersect(tokens_right) |> length
ms / min(length(tokens_left), length(tokens_right))
end
end
end