lib/dna/dna_strand.ex

defmodule Bio.Sequence.DnaStrand do
  @moduledoc """
  A single DNA strand can be represented by the basic sequence which uses
  `Bio.BaseSequence` .

  # Examples
      iex>"tagc" in DnaStrand.new("ttagct")
      true

      iex>alias Bio.Enum, as: Bnum
      ...>DnaStrand.new("ttagct")
      ...>|> Bnum.map(&(&1))
      %DnaStrand{sequence: "ttagct", length: 6}

      iex>alias Bio.Enum, as: Bnum
      ...>DnaStrand.new("ttagct")
      ...>|> Bnum.slice(2, 2)
      %DnaStrand{sequence: "ag", length: 2}
  """
  use Bio.BaseSequence

  @impl Bio.Sequential
  def converter(), do: Bio.Sequence.Dna.Conversions
end

defimpl Bio.Polymeric, for: Bio.Sequence.DnaStrand do
  alias Bio.Sequence.DnaStrand

  def kmers(%DnaStrand{} = sequence, k) do
    case rem(sequence.length, k) do
      0 ->
        {:ok,
         sequence
         |> Enum.chunk_every(k)
         |> Enum.map(&Enum.join(&1, "")),
         sequence
         |> Map.from_struct()
         |> Map.drop([:sequence])}

      _ ->
        {:error, :seq_len_mismatch}
    end
  end

  def valid?(%DnaStrand{sequence: seq}, alphabet) do
    with {:ok, regex} <- Regex.compile("[^#{alphabet}]") do
      not Regex.match?(regex, seq)
    else
      bad -> bad
    end
  end

  def validate(%DnaStrand{} = sequence, alphabet) do
    # TODO: this is generalizable
    parsed =
      sequence
      |> Enum.with_index()
      |> Enum.reduce(%{}, fn {char, index}, acc ->
        case String.contains?(alphabet, char) do
          true ->
            (Map.get(acc, :result, "") <> char)
            |> then(&Map.put(acc, :result, &1))

          false ->
            Map.get(acc, :errors, [])
            |> List.insert_at(-1, {:mismatch_alpha, char, index})
            |> then(&Map.put(acc, :errors, &1))
        end
      end)

    case parsed do
      %{errors: [_ | _]} ->
        {:error, parsed.errors}

      %{result: string} ->
        given =
          sequence
          |> Map.from_struct()
          |> Map.drop([:sequence, :alphabet])

        {:ok,
         DnaStrand.new(string, alphabet: alphabet, length: given.length)
         |> Map.merge(given)
         |> Map.put(:valid?, true)}
    end
  end
end