defmodule Mix.Tasks.Bio.Restriction.Build do
@moduledoc """
Module for building out the actual data structure that will be generated in
code.
Each enzyme will be defined as a struct under the `Bio.Restriction.Enzyme`
namespace. This struct will be populated with the minimal data needed for the
use in digestion and sequence validations.
Further additions to the data can be requested by opening an issue.
The current data preserved are:
blunt?: boolean
cut_1: number
cut_2: number
cut_3: number
cut_4: number
name: string
pattern: string
"""
@shortdoc "Task for populating RE data"
use Mix.Task
alias Bio.Ansio
alias Bio.Rebase.Emboss
@options [cache_dir: :string]
@aliases [d: :cache_dir]
def run(inputs) do
{opts, _, _} = OptionParser.parse(inputs, aliases: @aliases, strict: @options)
Ansio.info("Building restriction data")
base_dir =
cond do
opts[:cache_dir] == nil -> :filename.basedir(:user_cache, "RestrictionEx")
true -> opts[:cache_dir]
end
Emboss.parse(
"#{base_dir}/downloads_emboss_e",
"#{base_dir}/downloads_emboss_r",
"#{base_dir}/downloads_emboss_s"
)
|> write_module
end
defp write_module(data) do
Ansio.info("Writing module...")
File.write(
"lib/restriction/enzyme.ex",
~s"""
# DO NOT MODIFY THIS FILE DIRECTLY
# This module is generated using `mix bio.restriction.build`
# Or with `mix bio.restriction.update`
# Data herein is derived from the REBASE database monthly data files:
# http://rebase.neb.com/rebase/rebase.files.html
defmodule Bio.Restriction.Enzyme do
@moduledoc \"\"\"
This module houses all of the functions for accessing a struct
of restriction enzyme data, the `%Bio.Restriction.Enzyme`.
All functions are the name of the enzyme in lowercase, where any `-`
characters have been made `_`. By example, "BsmBI" would be `bsmbi` or
"CviKI-1" would become `cviki_1`.
\"\"\"
@doc \"\"\"
Get an enzyme struct by name, where name is either a binary or atom and
case insensitive.
# Examples
iex>Bio.Restriction.Enzyme.get(:CviRI)
%Bio.Restriction.Enzyme{
blunt?: true,
cut_1: 2,
cut_2: 2,
cut_3: 0,
cut_4: 0,
name: "CviRI",
pattern: "tgca"
}
iex>Bio.Restriction.Enzyme.get("CviRI")
%Bio.Restriction.Enzyme{
blunt?: true,
cut_1: 2,
cut_2: 2,
cut_3: 0,
cut_4: 0,
name: "CviRI",
pattern: "tgca"
}
\"\"\"
def get(name) when is_atom(name) do
name
|> Atom.to_string()
|> get_struct()
end
def get(name) when is_binary(name) do
get_struct(name)
end
defp get_struct(name) do
func_name = name
|> String.downcase()
|> String.replace("-", "_")
try do
apply(__MODULE__, String.to_atom(func_name), [])
rescue
_ in UndefinedFunctionError -> raise "Unknown restriction enzyme \#\{func_name\}"
end
end
@doc \"\"\"
The primary struct for interacting with restriction enzymes
\"\"\"
defstruct #{to_source(Enum.at(data, 0))}
#{data |> Enum.map(fn enzyme_map -> ~s"""
@doc false
def #{Map.get(enzyme_map, :name) |> String.downcase() |> String.replace("-", "_")} do
%Bio.Restriction.Enzyme#{stringify(enzyme_map)}
end
""" end)}
end
"""
)
Mix.Task.run("format")
Ansio.success("Module written, formatted, and ready for release.")
end
def to_source(enzyme_map) do
output =
enzyme_map
|> Enum.reduce("", fn {key, value}, final_str ->
final_str <> "#{key}: #{sourcify(value)},"
end)
String.slice(output, 0, String.length(output) - 1)
end
# create a reasonable string representation of a map
def stringify(obj) when is_map(obj) do
final =
obj
|> Enum.reduce("{", fn {key, value}, str ->
str <> "#{key}: #{stringify(value)},"
end)
final <> "}"
end
def stringify(obj) when is_binary(obj) do
"\"#{obj}\""
end
def stringify(obj) when is_list(obj) do
final =
Enum.reduce(obj, "[", fn el, acc ->
acc <> "#{stringify(el)},"
end)
final <> "]"
end
def stringify(obj) when is_boolean(obj) do
"#{obj}"
end
def stringify(obj) when is_number(obj) do
"#{obj}"
end
# sourcify to default struct values
def sourcify(value) when is_binary(value) do
"\"\""
end
def sourcify(value) when is_list(value) do
"[]"
end
def sourcify(value) when is_boolean(value) do
"nil"
end
def sourcify(value) when is_number(value) do
"0"
end
end