lib/rna/rna.ex

defmodule Bio.Sequence.Rna do
  @moduledoc """
  A module for working with RNA.

  This module doesn't contain a representative struct, as with `Bio.Sequence.Dna`.
  This is because there are multiple ways to interpret a string as RNA. Namely, it
  can either be single or double stranded. This is why the
  `Bio.Sequence.RnaStrand` and `Bio.Sequence.RnaDoubleStrand` modules exist.

  However, this is the interface for dealing with things like `complement/1` and
  `reverse_complement/1`.

  Additionally, this module handles defining default conversions for the DNA
  sequence types into RNA sequence types (`Bio.Sequence.DnaStrand` and
  `Bio.Sequence.DnaDoubleStrand`). Conversions defined here are used by the
  `Bio.Sequence.RnaStrand` and `Bio.Sequence.RnaDoubleStrand` modules.
  The default conversions use conventional nucleotides and map them to their
  relevant DNA nucleotides:

  ```
  a -> a
  u -> t
  g -> g
  c -> c
  ```

  Casing is preserved, so mixed case sequences will not be altered.

  # Example

      iex>RnaStrand.new("uaUUg")
      ...>|> Bio.Polymer.convert(DnaStrand)
      {:ok, %DnaStrand{sequence: "taTTg", length: 5}}

  This is guaranteed, so you may encode these with intention and assume that
  they are preserved across conversions.
  """
  alias Bio.Sequence.{RnaStrand, DnaStrand}
  alias Bio.Enum, as: Bnum
  alias Bio.Sequence.Alphabets.Rna, as: Alpha

  @type complementable :: struct() | String.t()

  @complement %{
    "a" => "u",
    "A" => "U",
    "u" => "a",
    "U" => "A",
    "g" => "c",
    "G" => "C",
    "c" => "g",
    "C" => "G"
  }

  defmodule Conversions do
    @moduledoc false
    use Bio.Convertible do
      def to(DnaStrand), do: {:ok, &to_dna/2, 1}
    end

    defp to_dna({:ok, kmers, data}, module) do
      kmers
      |> Enum.map(fn base ->
        case base do
          "A" -> "A"
          "U" -> "T"
          "G" -> "G"
          "C" -> "C"
          "a" -> "a"
          "u" -> "t"
          "g" -> "g"
          "c" -> "c"
        end
      end)
      |> Enum.join()
      |> new_sequence(data, module)
    end

    defp new_sequence(seq, data, module) do
      apply(module, :new, [seq, Map.to_list(data)])
    end
  end

  @doc """
  Provide the RNA complement to a sequence.

  Given a sequence that is either a binary or a `Bio.Sequence.RnaStrand`,
  returns the RNA complement as defined by the standard nucleotide complements.

  # Examples
      iex>Rna.complement("auugacgu")
      {:ok, "uaacugca"}

      iex>RnaStrand.new("auugacgu")
      ...>|> Rna.complement()
      {:ok, %RnaStrand{sequence: "uaacugca", length: 8, alphabet: Alpha.common()}}
  """
  @spec complement(complementable, keyword() | nil) ::
          {:ok, struct()}
          | {:error, Bio.AcidHelper.mismatch()}
  def complement(sequence, opts \\ [])

  def complement(%RnaStrand{} = sequence, opts) do
    Bio.AcidHelper.complement(Alpha, RnaStrand, sequence, opts)
  end

  def complement(sequence, opts) when is_binary(sequence) do
    Bio.AcidHelper.complement(Alpha, sequence, opts)
  end

  @doc """
  Provide the RNA reverse complement to a sequence.

  Given a sequence that is either a binary or a `Bio.Sequence.RnaStrand`,
  returns the RNA reverse complement as defined by the standard nucleotide
  complements.

  # Examples
      iex>Rna.reverse_complement("auugacgu")
      "acgucaau"

      iex>RnaStrand.new("auugacgu")
      ...>|> Rna.reverse_complement()
      %RnaStrand{sequence: "acgucaau", length: 8}
  """
  @spec reverse_complement(sequence :: complementable) :: complementable
  def reverse_complement(%RnaStrand{} = sequence) do
    sequence
    |> Bnum.map(&Map.get(@complement, &1))
    |> Bnum.reverse()
  end

  def reverse_complement(sequence) when is_binary(sequence) do
    sequence
    |> String.graphemes()
    |> Enum.map(&Map.get(@complement, &1))
    |> Enum.reverse()
    |> Enum.join()
  end
end