lib/rna/rna_strand.ex

defmodule Bio.Sequence.RnaStrand do
  @moduledoc """
  A single RNA strand can be represented by the basic sequence which implements
  the `Bio.Polymer` behavior.

  This module doesn't implement any validations, since those are not well
  defined in every case. For example, it may be valid to contain ambiguous
  nucleotides, or it may not. Since that depends on the use, this is left to
  applications developers to write.

  # Examples
      iex>"uagc" in  RnaStrand.new("uuagcu")
      true

      iex>alias Bio.Enum, as: Bnum
      ...>RnaStrand.new("uuagcu")
      ...>|> Bnum.map(&(&1))
      %RnaStrand{sequence: "uuagcu", length: 6}

      iex>alias Bio.Enum, as: Bnum
      ...>RnaStrand.new("uuagcu")
      ...>|> Bnum.slice(2, 2)
      %RnaStrand{sequence: "ag", length: 2}
  """
  use Bio.BaseSequence

  @impl Bio.Sequential
  def converter, do: Bio.Sequence.Rna.Conversions
end

defimpl Bio.Polymeric, for: Bio.Sequence.RnaStrand do
  alias Bio.Sequence.RnaStrand

  def kmers(%RnaStrand{} = sequence, k) do
    case rem(sequence.length, k) do
      0 ->
        {:ok,
         sequence
         |> Enum.chunk_every(k)
         |> Enum.map(&Enum.join(&1, "")), sequence |> Map.from_struct() |> Map.drop([:sequence])}

      _ ->
        {:error, :seq_len_mismatch}
    end
  end

  def valid?(%RnaStrand{sequence: seq}, alphabet) do
    with {:ok, regex} <- Regex.compile("[^#{alphabet}]") do
      not Regex.match?(regex, seq)
    else
      bad -> bad
    end
  end

  def validate(%RnaStrand{} = sequence, alphabet) do
    # TODO: this is generalizable
    parsed =
      sequence
      |> Enum.with_index()
      |> Enum.reduce(%{}, fn {char, index}, acc ->
        case String.contains?(alphabet, char) do
          true ->
            (Map.get(acc, :result, "") <> char)
            |> then(&Map.put(acc, :result, &1))

          false ->
            Map.get(acc, :errors, [])
            |> List.insert_at(-1, {:mismatch_alpha, char, index})
            |> then(&Map.put(acc, :errors, &1))
        end
      end)

    case parsed do
      %{errors: [_ | _]} ->
        {:error, parsed.errors}

      %{result: string} ->
        given =
          sequence
          |> Map.from_struct()
          |> Map.drop([:sequence, :alphabet])

        {:ok,
         RnaStrand.new(string, alphabet: alphabet, length: given.length)
         |> Map.merge(given)
         |> Map.put(:valid?, true)}
    end
  end
end