defmodule Mix.Tasks.Bio.Restriction.Build do
@moduledoc """
Module for building out the actual data structure that will be generated in
code.
Each enzyme will be defined as a struct under the `Bio.Restriction.Enzyme`
namespace. This struct will be populated with the minimal data needed for the
use in digestion and sequence validations.
Further additions to the data can be requested by opening an issue.
The current data preserved are:
blunt?: boolean
cut_1: number
cut_2: number
cut_3: number
cut_4: number
name: string
pattern: string
suppliers: [string...]
The value of `suppliers` is a string list of the Supplier Codes. These are
related to company names using the emboss_s file, and can be related with the
`Bio.Restriction.Suppliers` module.
"""
@shortdoc "Task for populating RE data"
use Mix.Task
alias Bio.Rebase.Emboss
import Mix.Tasks.Output
@options [cache_dir: :string]
@aliases [d: :cache_dir]
def run(inputs) do
{opts, _, _} = OptionParser.parse(inputs, aliases: @aliases, strict: @options)
put_info("Building restriction data")
base_dir =
cond do
opts[:cache_dir] == nil -> :filename.basedir(:user_cache, "RestrictionEx")
true -> opts[:cache_dir]
end
term = Emboss.date_term()
Emboss.parse(
"#{base_dir}/downloads_emboss_e.#{term}",
"#{base_dir}/downloads_emboss_r.#{term}",
"#{base_dir}/downloads_emboss_s.#{term}"
)
|> write_module
end
defp write_module(%{enzymes: enzymes, suppliers: %{} = suppliers}) do
put_info("Writing module...")
File.write(
"lib/enzyme.ex",
~s"""
# DO NOT MODIFY THIS FILE DIRECTLY
# This module is generated using `mix bio.restriction.build`
# Or with `mix bio.restriction.update`
# Data herein is derived from the REBASE database monthly data files:
# http://rebase.neb.com/rebase/rebase.files.html
defmodule Bio.Restriction.Enzyme do
@moduledoc \"\"\"
This module represents the basic data of a restriction enzyme, as well as
functions for accessing them.
The core struct contains information regarding the cut sites, bluntness,
name, and recognition site of the restriction enzyme that it represents.
The structs declaration is wrapped in a function that is prefixed by an
`_`, this allows the import of the module without overloading the context
with a lot of extraneous functions. The primary interface for accessing
enzymes is the `get/1` function.
\"\"\"
@type supplier_code :: atom()
@type supplier_list :: [supplier_code]
@type t :: %__MODULE__{
blunt?: boolean(),
cut_1: integer(),
cut_2: integer(),
cut_3: integer(),
cut_4: integer(),
name: String.t(),
pattern: String.t(),
suppliers: supplier_list
}
@suppliers %#{stringify(suppliers)}
use Bio.Restriction.Enzyme.Core
@doc \"\"\"
The primary struct for interacting with restriction enzymes
\"\"\"
defstruct #{to_source(Enum.at(enzymes, 0))}
#{enzymes |> Enum.map(fn enzyme_map -> ~s"""
@doc false
def _#{Map.get(enzyme_map, :name) |> String.downcase() |> String.replace("-", "_")} do
%Bio.Restriction.Enzyme#{stringify(enzyme_map)}
end
""" end)}
@all [#{enzymes |> Enum.map(fn enzyme_map -> ~s"""
:_#{Map.get(enzyme_map, :name) |> String.downcase() |> String.replace("-", "_")},
""" end)}
]
def all() do
@all
|> Enum.map(&apply(__MODULE__, &1, []))
end
end
"""
)
Mix.Task.run("format")
put_success("Module written, formatted, and ready for release.")
end
def to_source(enzyme_map) do
output =
enzyme_map
|> Enum.reduce("", fn {key, value}, final_str ->
final_str <> "#{key}: #{sourcify(value)},"
end)
String.slice(output, 0, String.length(output) - 1)
end
# create a reasonable string representation of a map
def stringify(obj) when is_map(obj) do
final =
obj
|> Enum.reduce("{", fn {key, value}, str ->
str <> "#{key}: #{stringify(value)},"
end)
String.replace_suffix(final, ",", "") <> "}"
end
def stringify(obj) when is_binary(obj) do
"\"#{obj}\""
end
def stringify(obj) when is_list(obj) do
final =
Enum.reduce(obj, "[", fn el, acc ->
acc <> "#{stringify(el)},"
end)
final <> "]"
end
def stringify(obj) when is_boolean(obj) do
"#{obj}"
end
def stringify(obj) when is_number(obj) do
"#{obj}"
end
def stringify(obj) when is_atom(obj) do
":#{obj}"
end
# sourcify to default struct values
def sourcify(value) when is_binary(value) do
"\"\""
end
def sourcify(value) when is_list(value) do
"[]"
end
def sourcify(value) when is_boolean(value) do
"nil"
end
def sourcify(value) when is_number(value) do
"0"
end
end