lib/lastfm_archive/transform.ex

defmodule LastfmArchive.Transform do
  @moduledoc """
  This module provides functions for reading and transforming downloaded Lastfm data.

  """

  alias LastfmArchive.Utils

  @default_delimiter "\t"
  @tsv_headers "id\tname\tscrobble_date\tscrobble_date_iso\tmbid\turl\tartist\tartist_mbid\tartist_url\talbum\talbum_mbid"

  @doc """
  Transform a downloaded raw JSON page into a list of tab-delimited track data.

  ### Example
  ```
    # transform a page of scrobbles from the file archive
    LastfmArchive.Transform.transform("a_lastfm_user", "2007/200_1.gz")
  ```

  A row of tab-delimited track currently contains (if any):

  - `id` auto-generated by the system to uniquely identify a scrobble
  - `name` the track name
  - `scrobble_date` Unix timestamp of the scrobble date
  - `scrobble_date_iso` scrobble date in ISO 8601 datetime format
  - `mbid` MusicBrainz identifier for the track
  - `url` web address of the track on Last.fm
  - `artist`
  - `artist_mbid` MusicBrainz identifier for the artist
  - `artist_url` web address of the artist on Last.fm
  - `album`
  - `album_mbid` MusicBrainz identifier for the album

  """
  @spec transform(binary, binary, :atom) :: list(binary) | {:error, :file.posix()}
  def transform(user, filename, mode \\ :tsv)

  def transform(user, filename, :tsv) do
    case Utils.read(user, filename) do
      {:ok, resp} ->
        tracks = resp |> Jason.decode!()
        index = initial_index(tracks["recenttracks"]["@attr"])

        [track | rest] = tracks["recenttracks"]["track"]

        if track["@attr"]["nowplaying"],
          do: _transform(user, rest, index, [@tsv_headers]),
          else: _transform(user, tracks, index, [@tsv_headers])

      error ->
        error
    end
  end

  def tsv_headers(), do: @tsv_headers

  defp _transform(_user, [], _index, acc), do: acc

  defp _transform(user, [track | tracks], index, acc) do
    next_index = index + 1
    _transform(user, tracks, next_index, acc ++ [_transform(user, track, index)])
  end

  # id,name,scrobble_date,date_iso,mbid,url,artist,artist_mbid,artist_url,album,album_mbid
  defp _transform(user, track, index) do
    id = "#{user}_#{track["date"]["uts"]}_#{index |> to_string}"

    uts = if is_binary(track["date"]["uts"]), do: String.to_integer(track["date"]["uts"]), else: track["date"]["uts"]
    date_s = uts |> DateTime.from_unix!() |> DateTime.to_iso8601()

    track_info = [id, track["name"] |> String.trim(), track["date"]["uts"], date_s, track["mbid"], track["url"]]
    artist_info = [track["artist"]["name"], track["artist"]["mbid"], track["artist"]["url"]]
    album_info = [track["album"]["#text"], track["album"]["mbid"]]
    Enum.join(track_info ++ artist_info ++ album_info, @default_delimiter)
  end

  defp initial_index(%{"page" => page, "perPage" => per_page}) when is_binary(page) and is_binary(per_page) do
    (String.to_integer(page) - 1) * String.to_integer(per_page) + 1
  end

  defp initial_index(info), do: (info["page"] - 1) * info["perPage"] + 1
end