lib/table.ex

defmodule Table do
  @moduledoc """
  Unified access to tabular data.

  Various data structures have a tabular representation, however to
  access this representation, manual conversion is required. On top
  of that, tabular access itself has two variants, a row-based one
  and a column-based one, each useful under different circumstances.

  The `Table` package provides a thin layer that unifies access to
  tabular data in different formats.

  ## Protocol

  The unified access is enabled for structs implementing the
  `Table.Reader` protocol. Note that a struct may be representable
  as tabular data only in some cases, so the protocol implementation
  may be lax. Consequently, functions in this module will raise when
  given non-tabular data.

  By default the protocol is implemented for lists and maps of certain
  shape.

      # List of matching key-value lists
      data = [
        [{"id", 1}, {"name", "Sherlock"}],
        [{"id", 2}, {"name", "John"}]
      ]

      # List of matching maps
      data = [
        %{"id" => 1, "name" => "Sherlock"},
        %{"id" => 2, "name" => "John"}
      ]

      # List of column tuples
      data = [
        {"id", 1..2},
        {"name", ["Sherlock", "John"]}
      ]

      # Map with column values
      data = %{
        "id" => [1, 2],
        "name" => ["Sherlock", "John"]
      }

  """

  alias Table.Reader

  @type column :: term()

  @type tabular :: Reader.t() | Reader.row_reader() | Reader.column_reader()

  @doc """
  Accesses tabular data as a sequence of rows.

  Returns an enumerable that emits each row as a map.

  ## Options

    * `:only` - specifies a subset of columns to include in the result

  ## Examples

      iex> data = %{id: [1, 2, 3], name: ["Sherlock", "John", "Mycroft"]}
      iex> data |> Table.to_rows() |> Enum.to_list()
      [%{id: 1, name: "Sherlock"}, %{id: 2, name: "John"}, %{id: 3, name: "Mycroft"}]

      iex> data = [[id: 1, name: "Sherlock"], [id: 2, name: "John"], [id: 3, name: "Mycroft"]]
      iex> data |> Table.to_rows() |> Enum.to_list()
      [%{id: 1, name: "Sherlock"}, %{id: 2, name: "John"}, %{id: 3, name: "Mycroft"}]

  """
  @spec to_rows(tabular(), keyword()) :: Enumerable.t()
  def to_rows(tabular, opts \\ []) do
    only = opts[:only] && MapSet.new(opts[:only])

    tabular
    |> init_reader!()
    |> read_rows(only)
  end

  # TODO: remove in v0.2
  @deprecated "Use Table.Reader.init/1 to get reader with metadata, then pass the reader to Table.to_rows/2"
  def to_rows_with_info(tabular, opts \\ []) do
    reader = {_, meta, _} = Table.Reader.init(tabular)
    {to_rows(reader, opts), meta}
  end

  defp init_reader!({:rows, %{}, _} = reader), do: reader
  defp init_reader!({:columns, %{}, _} = reader), do: reader

  defp init_reader!(tabular) do
    with :none <- Reader.init(tabular) do
      raise ArgumentError, "expected valid tabular data, but got: #{inspect(tabular)}"
    end
  end

  defp read_rows({:rows, meta, enum}, only) do
    Table.Mapper.map(enum, fn values ->
      build_row(meta.columns, values, only)
    end)
  end

  defp read_rows({:columns, meta, enum}, only) do
    Table.Zipper.zip_with(enum, fn values ->
      build_row(meta.columns, values, only)
    end)
  end

  defp build_row(columns, values, only) do
    for {column, value} <- Enum.zip(columns, values),
        include_column?(only, column),
        into: %{},
        do: {column, value}
  end

  @doc """
  Accesses tabular data as individual columns.

  Returns a map with enumerables as values.

  ## Options

    * `:only` - specifies a subset of columns to include in the result

  ## Examples

      iex> data = [%{id: 1, name: "Sherlock"}, %{id: 2, name: "John"}, %{id: 3, name: "Mycroft"}]
      iex> columns = Table.to_columns(data)
      iex> Enum.to_list(columns.id)
      [1, 2, 3]
      iex> Enum.to_list(columns.name)
      ["Sherlock", "John", "Mycroft"]

  """
  @spec to_columns(tabular(), keyword()) :: %{column() => Enumerable.t()}
  def to_columns(tabular, opts \\ []) do
    only = opts[:only] && MapSet.new(opts[:only])

    tabular
    |> init_reader!()
    |> read_columns(only)
  end

  # TODO: remove in v0.2
  @deprecated "Use Table.Reader.init/1 to get reader with metadata, then pass the reader to Table.to_columns/2"
  def to_columns_with_info(tabular, opts \\ []) do
    reader = {_, meta, _} = Table.Reader.init(tabular)
    {to_columns(reader, opts), meta}
  end

  defp read_columns({:columns, meta, enum}, only) do
    for {column, values} <- Enum.zip(meta.columns, enum),
        include_column?(only, column),
        into: %{},
        do: {column, values}
  end

  defp read_columns({:rows, meta, enum}, only) do
    columns =
      for {column, idx} <- Enum.with_index(meta.columns),
          include_column?(only, column),
          do: {column, idx, []}

    # Note: we intentionally materialize the columns into memory,
    # because having a separate stream for each column would be
    # notably less efficient on the consumer side
    columns = Enum.reduce(enum, columns, &row_into_columns/2)

    for {column, _, acc} <- columns,
        into: %{},
        do: {column, Enum.reverse(acc)}
  end

  defp row_into_columns(row, columns), do: row_into_columns(row, 0, columns)

  defp row_into_columns([value | values], idx, [{column, idx, acc} | columns]) do
    [{column, idx, [value | acc]} | row_into_columns(values, idx + 1, columns)]
  end

  defp row_into_columns([_value | values], idx, columns) do
    row_into_columns(values, idx + 1, columns)
  end

  defp row_into_columns([], _idx, []), do: []

  defp include_column?(nil, _column), do: true
  defp include_column?(only, column), do: MapSet.member?(only, column)
end