lib/markov/text_util.ex

defmodule Markov.TextUtil do
  @doc """
  Strips textual tokens of preceding and trailing non-word characters and
  downcases them

      iex> Markov.TextUtil.sanitize_token(:atom)
      :atom

      iex> Markov.TextUtil.sanitize_token("test")
      "test"

      iex> Markov.TextUtil.sanitize_token("  !!!???///    tEsT     >>>")
      "test"
  """
  def sanitize_token(tok) when not is_binary(tok) do tok end
  def sanitize_token(tok) do
    tok |> String.trim
        |> String.replace(~r/(^[^\w]+)|([^\w]+$)/m, "") # trim non-word characters
        |> String.downcase
  end
end