lib/amino_acid/amino_acid.ex

defmodule Bio.Sequence.AminoAcid do
  @moduledoc """
  Amino acids are modeled as simple sequences using `Bio.BaseSequence`.

  # Examples
      iex>aa = AminoAcid.new("ymabagta")
      ...>"mabag" in aa
      true

      iex>alias Bio.Enum, as: Bnum
      ...>AminoAcid.new("ymabagta")
      ...>|>Bnum.map(&(&1))
      %AminoAcid{sequence: "ymabagta", length: 8}

      iex>alias Bio.Enum, as: Bnum
      ...>AminoAcid.new("ymabagta")
      ...>|>Bnum.slice(2, 2)
      %AminoAcid{sequence: "ab", length: 2}

  If you are interested in defining conversions of amino acids then look into
  the `Bio.Polymer` module for how to deal with creating a Conversion module.

  The simple `Bio.Sequence.AminoAcid` does define the `Bio.Polymeric` protocol,
  which will allow you to define conversions from this to any type you may
  desire.
  """
  use Bio.BaseSequence

  defmodule Conversions do
    @moduledoc false
    use Bio.Convertible
  end

  @impl Bio.Sequential
  def converter, do: Conversions
end

defimpl Bio.Polymeric, for: Bio.Sequence.AminoAcid do
  alias Bio.Sequence.AminoAcid

  def kmers(%AminoAcid{} = amino, k) do
    case rem(amino.length, k) do
      0 ->
        {:ok,
         amino
         |> Enum.chunk_every(k)
         |> Enum.map(&Enum.join(&1, "")),
         amino
         |> Map.from_struct()
         |> Map.drop([:sequence])}

      _ ->
        {:error, :seq_len_mismatch}
    end
  end

  def valid?(%AminoAcid{sequence: seq}, alphabet) do
    with {:ok, regex} <- Regex.compile("[^#{alphabet}]") do
      not Regex.match?(regex, seq)
    else
      bad -> bad
    end
  end

  def validate(%AminoAcid{label: label, length: length} = sequence, alphabet) do
    # TODO: this is generalizable
    parsed =
      sequence
      |> Enum.with_index()
      |> Enum.reduce(%{}, fn {char, index}, acc ->
        case String.contains?(alphabet, char) do
          true ->
            (Map.get(acc, :result, "") <> char)
            |> then(&Map.put(acc, :result, &1))

          false ->
            Map.get(acc, :errors, [])
            |> List.insert_at(-1, {:mismatch_alpha, char, index})
            |> then(&Map.put(acc, :errors, &1))
        end
      end)

    case parsed do
      %{errors: [_ | _]} ->
        {:error, parsed.errors}

      %{result: string} ->
        {:ok,
         AminoAcid.new(string, label: label, length: length, alphabet: alphabet)
         |> Map.put(:valid?, true)}
    end
  end
end