defmodule Archeometer.Analysis.DSM do
@moduledoc """
Defines algorithms for Design Structure Matrix (DSM) analysis.
A DSM represents a graph of dependencies between components or processes,
in this case, between Elixir modules.
See [DSM Web](https://dsmweb.org/introduction-to-dsm/).
In this implementation, the DSM is represented by the `DSM` struct,
with the following attributes:
- `nodes`. A List of of nodes that besides enumerating the nodes, defines at the same
time the order of rows and columns. They have always the same order.
- `edges`. The edges of the graph, in the form of a map, with a key representing existing
edges within the matrix in the form of `{row, col}`, and values can be one of `:+` or `:*`.
`:+` is used when `row` and `col` have the same value, and `:*` is used whenever the module
represented by `col` has a dependency on module represented by `row`.
- `groups`. Its a map of identified cyclic groups within the graph, every key is just an
identifier for the gruop, and the value is a list of the nodes conforming that gruop. The
value of `groups` is calculated by the algorithm and not expected to be set by the user.
- `n_groups`. This is used for internal use of the algorithms only and not expected to be
of any use to final users.
Functions `gen_dsm/1` and `gen_dsm/4` are used to create a `DSM` respectively from an
adjacency list or from information in an `Archeometer` database.
The only analysis algorith currently implemented is called `triangularization`, and you can
use it with the function `triangularize/1`. It helps you find groups of nodes that form cycles.
"""
alias Archeometer.Analysis.DSM
defstruct nodes: [], edges: %{}, groups: %{}, n_groups: 0
@doc """
Creates a DSM from an adjacency list
## Examples
iex> adj_list = %{
...> 1 => [2, 3],
...> 2 => [3, 4],
...> 3 => [4],
...> 4 => [3]
...> }
%{1 => [2, 3], 2 => [3, 4], 3 => [4], 4 => [3]}
iex> Archeometer.Analysis.DSM.gen_dsm(adj_list)
%Archeometer.Analysis.DSM{
edges: %{
{1, 1} => :+,
{2, 1} => :*,
{2, 2} => :+,
{3, 1} => :*,
{3, 2} => :*,
{3, 3} => :+,
{3, 4} => :*,
{4, 2} => :*,
{4, 3} => :*,
{4, 4} => :+
},
groups: %{},
n_groups: 0,
nodes: [1, 2, 3, 4]
}
"""
def gen_dsm(xrefs) do
edges = gen_edges(xrefs)
nodes = gen_nodes(xrefs)
%__MODULE__{nodes: nodes, edges: edges}
end
@doc """
Generates a DSM using module xref information from an Archeometer DB.
## Parameters
- `app` is an application name, in the case of an umbrella project, it's the name of
one of the child applications.
- `namespace` is a "namespace" to narrow the set of modules considered for createing the
DSM. The term "namespace" is meant very broadly since Elixir doesn't have that concept.
Namespace can be interpreted as a string matching the beginning of the module names to
be considered, such as "Foo" or "Foo.Bar", after which a last part of the name must
be given to create a full module name. For example the "namespace" `Foo` will include
in the analysis modules such as `Foo.Bar` and `Foo.Baz` but not `FooBar` or `Food`.
- `db_name` is the name of the database file to be used.
- `skip_tests` is a boolean indicating if test related modules should be skipped.
## Examples
Using Archeometer provided test database.
iex> Archeometer.Analysis.DSM.gen_dsm(
...> "jissai",
...> "Jissai.Reports",
...> "./test/resources/db/archeometer_jissai.db",
...> true
...>)
{:ok,
%Archeometer.Analysis.DSM{
edges: %{
{21, 21} => :+,
{22, 21} => :*,
{22, 22} => :+,
{23, 21} => :*,
{23, 23} => :+,
{24, 21} => :*,
{24, 24} => :+
},
groups: %{},
n_groups: 0,
nodes: [21, 22, 23, 24]
},
%{
21 => "Jissai.Reports",
22 => "Jissai.Reports.Attendance",
23 => "Jissai.Reports.ClassDynamic",
24 => "Jissai.Reports.Participant"
}}
"""
def gen_dsm(app, namespace, db_name, skip_tests) do
{module_names, xrefs} = __MODULE__.Data.get_data(app, namespace, db_name, skip_tests)
if map_size(module_names) == 0 do
{:error, :no_modules_matched}
else
dsm = gen_dsm(xrefs)
{:ok, dsm, module_names}
end
end
@doc """
Reorders a DSM to find dependency cycles.
The cycles are identified in the `groups` attribute of the resulting DSM.
## Examples
A DSM with one cycle
iex> dsm = %Archeometer.Analysis.DSM{
...> edges: %{
...> {1, 1} => :+,
...> {2, 1} => :*,
...> {2, 2} => :+,
...> {3, 1} => :*,
...> {3, 2} => :*,
...> {3, 3} => :+,
...> {3, 4} => :*,
...> {4, 2} => :*,
...> {4, 3} => :*,
...> {4, 4} => :+
...> },
...> groups: %{},
...> n_groups: 0,
...> nodes: [1, 2, 3, 4]
...> }
iex> Archeometer.Analysis.DSM.triangularize(dsm)
%Archeometer.Analysis.DSM{
edges: %{
{1, 1} => :+,
{2, 1} => :*,
{2, 2} => :+,
{3, 1} => :*,
{3, 2} => :*,
{3, 3} => :+,
{3, 4} => :*,
{4, 2} => :*,
{4, 3} => :*,
{4, 4} => :+
},
groups: %{"g1" => [3, 4]},
n_groups: 1,
nodes: [1, 2, 3, 4]
}
As you can see, it detected the only existing cycle, formed by nodes `[3, 4]`. The nodes
of the DSM are reordered such that the only nodes above the diagonal, are the ones
forming part of a cycle.
Now a DSM with two cycles
iex> dsm = %Archeometer.Analysis.DSM{
...> edges: %{
...> {1, 1} => :+,
...> {2, 1} => :*,
...> {2, 2} => :+,
...> {2, 3} => :*,
...> {3, 1} => :*,
...> {3, 2} => :*,
...> {3, 3} => :+,
...> {4, 4} => :+,
...> {4, 5} => :*,
...> {5, 4} => :*,
...> {5, 5} => :+
...> },
...> groups: %{},
...> n_groups: 0,
...> nodes: [1, 2, 3, 4, 5]
...> }
iex> Archeometer.Analysis.DSM.triangularize(dsm)
%Archeometer.Analysis.DSM{
edges: %{
{1, 1} => :+,
{2, 1} => :*,
{2, 2} => :+,
{2, 3} => :*,
{3, 1} => :*,
{3, 2} => :*,
{3, 3} => :+,
{4, 4} => :+,
{4, 5} => :*,
{5, 4} => :*,
{5, 5} => :+
},
groups: %{"g1" => [2, 3], "g2" => [4, 5]},
n_groups: 2,
nodes: [1, 2, 3, 4, 5]
}
It correctly detects the two cycles: `[2, 3]` and `[4, 5]`.
Now a DSM with a cycle of 3 nodes
iex> dsm = %Archeometer.Analysis.DSM{
...> edges: %{
...> {1, 1} => :+,
...> {2, 1} => :*,
...> {2, 2} => :+,
...> {2, 4} => :*,
...> {3, 2} => :*,
...> {3, 3} => :+,
...> {4, 3} => :*,
...> {4, 4} => :+
...> },
...> groups: %{},
...> n_groups: 0,
...> nodes: [1, 2, 3, 4]
...> }
iex> Archeometer.Analysis.DSM.triangularize(dsm)
%Archeometer.Analysis.DSM{
edges: %{
{1, 1} => :+,
{2, 1} => :*,
{2, 2} => :+,
{2, 4} => :*,
{3, 2} => :*,
{3, 3} => :+,
{4, 3} => :*,
{4, 4} => :+
},
groups: %{"g1" => [2, 3, 4]},
n_groups: 1,
nodes: [1, 2, 3, 4]
}
The cycle is correctly detected: `[2, 3, 4]`.
"""
def triangularize(%__MODULE__{} = mtx) do
mtx
|> triangularize(left: [], right: [])
|> Map.put(:edges, mtx.edges)
|> cyclic_deps_groups()
end
defp gen_edges(xrefs) do
Enum.reduce(xrefs, %{}, fn {module, callees}, edges ->
Enum.reduce(callees, edges, fn callee, edges_acc ->
Map.put(edges_acc, {callee, module}, :*)
end)
|> Map.put({module, module}, :+)
end)
end
defp gen_nodes(xrefs) do
xrefs
|> Enum.flat_map(fn {k, v} -> [k | v] end)
|> Enum.uniq()
|> Enum.sort()
end
defp triangularize(
%__MODULE__{nodes: [], edges: _, groups: groups} = dsm,
left: left_nodes,
right: right_nodes
) do
{node_seq, groups} =
case map_size(groups) do
0 ->
{left_nodes ++ right_nodes, groups}
_ ->
(left_nodes ++ right_nodes) |> expand_groups(groups)
end
%{dsm | nodes: node_seq, groups: groups}
end
defp triangularize(mtx, left: left_nodes, right: right_nodes) do
# IO.inspect(mtx, label: "mtx", charlists: :as_lists)
# IO.inspect(left_nodes, label: "left_nodes", charlists: :as_lists)
# IO.inspect(right_nodes, label: "right_nodes", charlists: :as_lists)
# IO.puts("-------------------------------")
case {roots(mtx), leaves(mtx)} do
{[], []} ->
cycle = find_cycle(mtx)
new_mtx = group_nodes(mtx, cycle)
triangularize(
new_mtx,
left: left_nodes,
right: right_nodes
)
{origins, []} ->
left_nodes = left_nodes ++ origins
nodes = mtx.nodes -- origins
edges = remove_nodes(origins, mtx.edges)
triangularize(
%{mtx | nodes: nodes, edges: edges, groups: mtx.groups},
left: left_nodes,
right: right_nodes
)
{[], destinations} ->
right_nodes = destinations ++ right_nodes
nodes = mtx.nodes -- destinations
edges = remove_nodes(destinations, mtx.edges)
triangularize(
%{mtx | nodes: nodes, edges: edges, groups: mtx.groups},
left: left_nodes,
right: right_nodes
)
{origins, destinations} ->
# Avoid duplication between origins and destinations
destinations = destinations -- origins
left_nodes = left_nodes ++ origins
right_nodes = destinations ++ right_nodes
marked_nodes = origins ++ destinations
nodes = mtx.nodes -- marked_nodes
edges = remove_nodes(marked_nodes, mtx.edges)
triangularize(
%{mtx | nodes: nodes, edges: edges, groups: mtx.groups},
left: left_nodes,
right: right_nodes
)
end
end
defp expand_groups(seq, groups) do
expanded_seq = expand_seq_once(seq, groups)
expanded_groups = expand_groups(groups)
if Enum.any?(expanded_seq, &is_group?/1) do
expand_groups(expanded_seq, expanded_groups)
else
{expanded_seq, expanded_groups}
end
end
defp expand_groups(groups) do
groups
|> Enum.map(fn {g, nodes} -> {g, expand_seq_once(nodes, groups)} end)
|> Enum.into(%{})
end
defp expand_seq_once(seq, groups) do
seq
|> Enum.map(fn n -> if Map.has_key?(groups, n), do: Map.get(groups, n), else: n end)
|> List.flatten()
end
defp is_group?(n), do: is_binary(n) and String.starts_with?(n, "g")
defp group_nodes(mtx, cycle) do
new_group_id = "g" <> to_string(mtx.n_groups + 1)
groups = Map.put(mtx.groups, new_group_id, cycle)
to_substitute =
mtx.edges
|> Enum.filter(fn {{r, c}, _v} -> Enum.member?(cycle, r) or Enum.member?(cycle, c) end)
|> Enum.into(%{})
edges =
to_substitute
|> Map.keys()
|> Enum.reduce(mtx.edges, fn edge, acc -> Map.delete(acc, edge) end)
edges =
to_substitute
|> Enum.map(fn {{r, c}, _v} ->
row = if Enum.member?(cycle, r), do: new_group_id, else: r
col = if Enum.member?(cycle, c), do: new_group_id, else: c
{{row, col}, if(r == c, do: :+, else: :*)}
end)
|> Enum.into(edges)
|> Map.put({new_group_id, new_group_id}, :+)
nodes = [new_group_id | mtx.nodes -- cycle]
%__MODULE__{nodes: nodes, edges: edges, groups: groups, n_groups: mtx.n_groups + 1}
end
def find_cycle(%__MODULE__{nodes: [n | _], edges: _edges} = mtx) do
find_cycle(mtx, [[n]])
end
defp find_cycle(mtx, paths) do
next_paths = Enum.flat_map(paths, &expand_path(mtx, &1))
case Enum.find(next_paths, :none, &contains_cycle?/1) do
:none ->
find_cycle(mtx, next_paths)
path ->
path |> get_cycle() |> tl() |> Enum.reverse()
end
end
defp expand_path(mtx, [n | _] = path) do
mtx.edges
|> Map.keys()
|> Enum.filter(fn {r, c} -> r != c and c == n end)
|> Enum.map(fn {r, _c} -> [r | path] end)
end
defp get_cycle(path) do
anchor =
path
|> Enum.frequencies()
|> Enum.find(fn {_k, v} -> v == 2 end)
|> elem(0)
[from, to] =
path
|> Enum.with_index()
|> Enum.filter(fn {x, _idx} -> x == anchor end)
|> Enum.map(&elem(&1, 1))
Enum.slice(path, Range.new(from, to))
end
defp contains_cycle?(path) do
unique_nodes = Enum.uniq(path)
length(unique_nodes) != length(path)
end
defp remove_nodes(nodes, edges) do
edges
|> Enum.filter(fn {{r, c}, _v} ->
not (Enum.member?(nodes, r) or Enum.member?(nodes, c))
end)
|> Enum.into(%{})
end
@doc """
Finds the root nodes in a DSM.
Root nodes are the ones that no othe node has dependencies upon.
"""
def roots(mtx), do: Enum.filter(mtx.nodes, &(count_ingoing(mtx, &1) == 0))
@doc """
Finds the leave nodes in a DSM.
Leave nodes are the ones that no dependencies upon other nodes.
"""
def leaves(mtx), do: Enum.filter(mtx.nodes, &(count_outgoing(mtx, &1) == 0))
defp count_ingoing(mtx, node) do
Enum.count(mtx.edges, fn {{r, _c}, v} -> r == node and v != :+ end)
end
defp count_outgoing(mtx, node) do
Enum.count(mtx.edges, fn {{_r, c}, v} -> c == node and v != :+ end)
end
@doc """
Gets a list of module groups with cyclic dependencies.
"""
def cyclic_deps_groups(dsm) do
coords = dsm |> DSM.Utils.to_coord_mtx() |> Map.keys()
cyclic_groups =
dsm
|> DSM.Utils.upper_right_group_corners()
|> Enum.map(fn {r, c} -> corner_to_group(coords, dsm.nodes, r, c) end)
|> Enum.with_index(1)
|> Enum.map(fn {group, idx} -> {"g" <> to_string(idx), group} end)
|> Enum.into(%{})
%{dsm | groups: cyclic_groups, n_groups: map_size(cyclic_groups)}
end
defp corner_to_group(mtx_coords, nodes, r, c) do
mtx_coords
|> Enum.filter(fn {mtx_row, mtx_col} ->
mtx_col >= r and mtx_col <= c and mtx_col != mtx_row
end)
|> Enum.map(fn {_r, c} -> c end)
|> Enum.uniq()
|> Enum.map(fn column -> Enum.at(nodes, column) end)
|> Enum.sort()
end
@doc """
Gets the largest group of modules with cyclical dependencies from a
triangularized DSM.
If both arguments are maps, then it is assumed they are the DSM and the
modules.
Otherwise there are treated as the namespace and the database name, and will
perfom an extra query to get the DSM and the modules.
"""
def largest_cyclic_deps_group(a, b)
def largest_cyclic_deps_group(dsm, mod_names) when is_map(dsm) and is_map(mod_names) do
dsm =
dsm
|> triangularize()
|> cyclic_deps_groups()
dsm.groups
|> Map.values()
|> Enum.max_by(&length/1, fn -> [] end)
|> group_to_names(mod_names)
end
def largest_cyclic_deps_group(app, db) do
{_, dsm, mod_names} = gen_dsm(app, "*", db, true)
largest_cyclic_deps_group(dsm, mod_names)
end
def group_to_names(group, mod_names) do
Enum.map(group, fn mod -> Map.get(mod_names, mod) end)
end
def groups_to_names(groups, mod_names) do
Enum.map(groups, &group_to_names(&1, mod_names))
end
end