lib/base.ex

defmodule Bio.BaseSequence do
  @moduledoc """
  Implementations of the basic sequence functionality.

  Calling `use Bio.BaseSequence` will generate a simple struct in the calling
  module, as well as the implementation for the `Enumerable` protocol.

  Because the `Enum` module makes certain assumptions about the data that it is
  given, we cannot trust that the functions therein will always behave how it
  makes the most sense. As an example, there is no way to ensure that
  `Enum.slide/3` returns anything other than a list. I believe that it makes
  sense for it to return the enumerable type, so you would get e.g. a
  `Bio.Sequence.DnaStrand` back.

  With that said, many of the `Enum` module's functions _shouldn't_ make
  assumptions. This is largely idiosynctratic, and so instead of trying to
  ham-fist the `Enum` functions to work, I just wrapped them up with `Bio.Enum`.

  The implementations in `Bio.Enum` rely on the `Enum` functions to work, but
  they go the extra mile in terms of returning things that seem to make the most
  sense. See the documentation of `Bio.Enum` for more on that.

  This module will also cause `new/2` to be defined. This function takes a
  sequence as well as the keywords `:label` and `:length`. For more examples of
  using `new/2` see `Bio.Sequence.AminoAcid`, `Bio.Sequence.DnaStrand`, or
  `Bio.Sequence.RnaStrand`.
  """
  defmacro __using__(_) do
    quote do
      using_module = __MODULE__
      @behaviour Bio.Sequential

      defstruct sequence: "", length: 0, label: nil, alphabet: nil, valid?: false

      @impl Bio.Sequential
      def new(seq, opts \\ []) when is_binary(seq) do
        [
          label: fn _ -> nil end,
          length: &String.length(&1),
          alphabet: fn _ -> nil end
        ]
        |> Enum.map(fn {key, default} ->
          {key, Keyword.get(opts, key) || default.(seq)}
        end)
        |> Enum.into(%{})
        |> Map.merge(%{sequence: seq})
        |> then(&struct!(__MODULE__, &1))
      end

      @impl Bio.Sequential
      def fasta_line(%__MODULE__{sequence: seq, label: label}) when is_binary(seq) do
        ">#{label}\n#{seq}\n"
      end

      defimpl Enumerable, for: using_module do
        @parent using_module

        def reduce(poly, acc, fun) do
          do_reduce(to_str_list(poly.sequence), acc, fun)
        end

        defp do_reduce(_, {:halt, acc}, _fun) do
          {:halted, acc}
        end

        defp do_reduce(list, {:suspend, acc}, fun) do
          {:suspended, acc, &do_reduce(list, &1, fun)}
        end

        defp do_reduce([], {:cont, acc}, _fun) do
          {:done, acc}
        end

        defp do_reduce([h | t], {:cont, acc}, fun) do
          do_reduce(t, fun.(h, acc), fun)
        end

        defp to_str_list(obj) when is_binary(obj) do
          obj
          |> String.to_charlist()
          |> Enum.map(&<<&1>>)
        end

        defp to_str_list(%@parent{sequence: obj}) do
          obj
          |> String.to_charlist()
          |> Enum.map(&<<&1>>)
        end

        def member?(poly, element) when is_binary(element) do
          element_len = String.length(element)

          cond do
            poly.length < element_len -> {:ok, false}
            poly.length == element_len -> {:ok, poly.sequence == element}
            poly.length > element_len -> check(poly.sequence, element_len, element)
          end
        end

        defp check(<<bin::binary>>, size, element) do
          <<chunk::binary-size(size), _::binary>> = bin
          <<_::binary-size(1), rest::binary>> = bin

          cond do
            chunk == element ->
              {:ok, true}

            true ->
              cond do
                String.length(rest) >= size -> check(rest, size, element)
                true -> {:ok, false}
              end
          end
        end

        defp check(<<>>, _size, _element) do
          {:ok, false}
        end

        def count(poly) do
          {:ok, poly.length}
        end

        def slice(poly) do
          {:ok, poly.length,
           fn start, amount, _step ->
             <<_before::binary-size(start), chunk::binary-size(amount), _rest::binary>> =
               poly.sequence

             String.to_charlist(chunk)
           end}
        end
      end
    end
  end
end