defmodule Xlsxplorer do
@moduledoc """
`Xlsxplorer`. Load data from your .xlsx file into an `Explorer.DataFrame`.
Uses `Xlsxir` to parse data from an .xlsx file.
"""
defp read_excel(path, index) do
Xlsxir.stream_list(path, index)
end
defp process_data(xlsx_stream) do
# First record are the headers.
# Polars from_xlsx assumes a header, thus we will too.
header = Enum.take(xlsx_stream, 1) |> hd
# Drop the headers, zip them with each row of actual data
# and make that a Map.
Enum.drop(xlsx_stream, 1)
|> Enum.map(&Map.new(Enum.zip(header, &1)))
end
@spec from_xlsx(String.t(), integer()) :: Explorer.DataFrame.t()
@doc """
Loads data from a .xlsx file into an `Explorer.DataFrame`
- `path` is a string with the path to the .xlsx file
- `index` is the sheet number to be read
from the .xlsx file into the `Explorer.DataFrame`
## Example
```elixir
iex(1)> Xlsxplorer.from_xlsx("./test/data/example.xlsx", 0)
#Explorer.DataFrame<
Polars[2 x 2]
number integer [1, 2]
string string ["hello", "world"]
>
```
"""
def from_xlsx(path, index) do
# load excel in memory
xlsx_stream = read_excel(path, index)
# process and return dataframe
xlsx_stream |> process_data |> Explorer.DataFrame.new()
end
end