lib/akin/algorithms/phonetic/word.ex

defmodule Word do
  defstruct(
    original: nil,
    decoded: nil,
    normalized: nil,
    upper: nil,
    length: 0,
    prepad: nil,
    start_index: 0,
    end_index: 0,
    postpad: nil,
    buffer: nil
  )

  def init(input) do
    prepad = "  "
    postpad = "      "
    decoded = if String.valid?(input), do: input, else: nil

    %Word{
      original: input,
      decoded: decoded,
      normalized: normalize(input),
      upper: String.upcase(input),
      length: String.length(input),
      prepad: prepad,
      start_index: String.length(prepad),
      end_index: String.length(prepad) + (String.length(input) - 1),
      postpad: postpad,
      buffer: prepad <> String.upcase(input) <> postpad
    }
  end

  def is_slavo_germanic?(%Word{upper: upper}) do
    String.contains?(upper, "W") or
      String.contains?(upper, "K") or
      String.contains?(upper, "CZ") or
      String.contains?(upper, "WITZ")
  end

  def get_letters(%Word{start_index: start_index, buffer: buffer}, start \\ 0, close \\ nil) do
    close = if is_nil(close), do: start + 1, else: close
    start = start_index + start
    close = start_index + close
    String.slice(buffer, start, close - start)
  end

  def normalize(input) do
    String.normalize(input, :nfd)
    |> String.graphemes()
    |> Enum.reduce([], fn l, acc -> if Unicode.category(l) == "Mn", do: acc, else: [l | acc] end)
    |> Enum.reverse()
  end
end