lib/naiveical/extractor.ex

defmodule Naiveical.Extractor do
  @moduledoc """
  This module allows the extraction of parts of a icalendar text.
  """

  alias Naiveical.Helpers

  @type content_line :: {String.t(), String.t(), String.t() | nil}

  @doc """
  Extract parts of an icalender text, such as all VALARMs.
    ## Examples:

  iex> Naiveical.Extractor.extract_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "XX")
  ["BEGIN:XX\\r\\nBEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY\\r\\nBEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY\\r\\nEND:XX"]

  iex> Naiveical.Extractor.extract_sections_by_tag("BEGIN:XX\\r\\nBEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY\\r\\nBEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY\\r\\nEND:XX", "YY")
  ["BEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY", "BEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY"]

  """
  @spec extract_sections_by_tag(String.t(), String.t()) :: [String.t()]
  def extract_sections_by_tag(ical_text, tag) do
    ical_text = String.replace(ical_text, "\r\n", "\n")
    {:ok, regex} = Regex.compile("BEGIN:#{tag}")
    startings = Regex.scan(regex, ical_text, return: :index)
    {:ok, regex} = Regex.compile("END:#{tag}")
    endings = Regex.scan(regex, ical_text, return: :index)

    if length(startings) != length(endings),
      do: raise("No correct ical file, no matchin BEGIN/END for #{tag}")

    case startings do
      [] ->
        []

      _ ->
        Enum.map(0..(length(startings) - 1), fn idx ->
          [{s, _len}] = Enum.at(startings, idx)
          [{e, len}] = Enum.at(endings, idx)

          String.slice(ical_text, s, e - s + len)
          |> String.replace(~r/\r?\n/, "\r\n")
          |> String.trim()
        end)
    end
  end

  @doc """
  Remove sections of an icalender text, such as remove all VALARMs from a VEVENT.

  The reason of this is to allow the correct extraction of the content lines. If, for example, a VEVENT also contains a VALARM with a description, but the
  VEVENT does not contain a description, the function extract_contentline_by_tag would fetch the description of the VALARM instead of returning nil.

    ## Examples:

  iex> Naiveical.Extractor.remove_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nEND:XX", "YY")
  "BEGIN:XX\\nEND:XX"

  iex> Naiveical.Extractor.remove_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\naaaa:bbbb\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "NOTEXIST")
  "BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\naaaa:bbbb\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX"
  """
  @spec remove_sections_by_tag(String.t(), String.t()) :: String.t()
  def remove_sections_by_tag(ical_text, tag) do
    if String.contains?(ical_text, tag) do
      ical_text = String.replace(ical_text, "\r\n", "\n")
      {:ok, regex} = Regex.compile("BEGIN:#{tag}")

      byte_len = byte_size(ical_text)
      startings =
        Regex.scan(regex, ical_text, return: :index) ++ [[{byte_len, 0}]]

      {:ok, regex} = Regex.compile("END:#{tag}")
      endings = [[{0, 0}]] ++ Regex.scan(regex, ical_text, return: :index)

      if length(startings) != length(endings),
        do: raise("No correct ical file, no matchin BEGIN/END for #{tag}")

      [{s, _len}] = Enum.at(startings, 0)
      [{last_e, last_e_len}] = Enum.at(endings, -1)
      end_acc_start = last_e + last_e_len
      tail_length = max(byte_len - end_acc_start, 0)
      end_acc = String.slice(ical_text, end_acc_start, tail_length)

      if length(startings) < 2 do
        [{e, e_len}] = Enum.at(endings, 0)
        prefix = String.slice(ical_text, 0, s)
        suffix_length = max(byte_len - (e + e_len), 0)
        suffix = String.slice(ical_text, e + e_len, suffix_length)

        (prefix <> suffix)
        |> String.replace(~r/(\r?\n)+/, "\\1")
      else
        # |> String.replace(~r/\r?\n/, "\r\n")
        (Enum.reduce(0..(length(startings) - 2), "", fn idx, acc ->
           [{s, _len}] = Enum.at(startings, idx)
           [{e, e_len}] = Enum.at(endings, idx)
           from = e + e_len
           to = s - 1
           length = to - from + 1

           segment =
             if length > 0 do
               String.slice(ical_text, from, length)
             else
               ""
             end

           (acc <> segment) |> String.trim()
         end) <> end_acc)
        |> String.replace(~r/((\\r)?\\n)+/, "\\1")
      end
    else
      ical_text
    end
  end

  @doc """
  Extract a single content line from an icalendar text split into tag, properties, and values. It returns a tuple with `{tag-name, properties, value}`.

    ## Examples:

  iex> Naiveical.Extractor.extract_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "A")
  {"A","","aa"}

  iex> Naiveical.Extractor.extract_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "ZZZ")
  {"ZZZ","",nil}
  """
  @spec extract_contentline_by_tag(String.t() | nil, String.t()) :: content_line()
  def extract_contentline_by_tag(nil, tag), do: {tag, "", nil}

  def extract_contentline_by_tag(ical_text, tag) do
    tag = String.upcase(tag)

    if String.contains?(ical_text, tag) do
      ical_text = Helpers.unfold(ical_text)
      {:ok, regex} = Regex.compile("^#{tag}[;]?(.*):(.*)$", [:multiline])

      case Regex.run(regex, ical_text) do
        [_, properties, values] ->
          values = values |> String.replace("\\n", " ") |> String.trim()
          {tag, String.trim(properties), values}

        nil ->
          {tag, "", nil}
      end
    else
      {tag, "", nil}
    end
  end

  @doc """
  Extract a raw single content line from an icalendar text.

    ## Examples:

  iex> Naiveical.Extractor.extract_raw_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA;xx:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "A")
  "A;xx:aa"

  iex> Naiveical.Extractor.extract_raw_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "ZZZ")
  nil
  """
  @spec extract_raw_contentline_by_tag(String.t() | nil, String.t()) :: String.t() | nil
  def extract_raw_contentline_by_tag(nil, _tag), do: nil

  def extract_raw_contentline_by_tag(ical_text, tag) do
    tag = String.upcase(tag)

    if String.contains?(ical_text, tag) do
      ical_text = Helpers.unfold(ical_text)
      {:ok, regex} = Regex.compile("^#{tag}[;]?.*:.*$", [:multiline])
      [res] = Regex.run(regex, ical_text)
      res |> String.replace("\\n", " ") |> String.trim()
    else
      nil
    end
  end

  @doc """
  Extract a single datetime content line from an icalendar text. It returns a the datetime object.

  Basically, it tries to parse the extracted text as a datetime object with the
  given timezone information

    ## Examples:

  iex> Naiveical.Extractor.extract_datetime_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422T150000\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")

  """
  @spec extract_datetime_contentline_by_tag(String.t(), String.t()) ::
          {:ok, DateTime.t()} | {:error, term()} | nil
  def extract_datetime_contentline_by_tag(ical_text, tag) do
    {_tag, attrs, dtstart_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)

    tzid = Naiveical.Extractor.extract_attribute(attrs, "TZID")

    if is_nil(tzid) do
      Naiveical.Helpers.parse_datetime(dtstart_str)
    else
      Naiveical.Helpers.parse_datetime(dtstart_str, tzid)
    end
  end

  @doc """
  Extract a single datetime content line from an icalendar text. It returns a the datetime object.

  Basically, it tries to parse the extracted text as a datetime object with the
  given timezone information

    ## Examples:

  iex> Naiveical.Extractor.extract_datetime_contentline_by_tag!("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422T150000\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")

  """
  @spec extract_datetime_contentline_by_tag!(String.t(), String.t()) :: DateTime.t()
  def extract_datetime_contentline_by_tag!(ical_text, tag) do
    {_tag, attrs, datetime_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)

    tzid = Naiveical.Extractor.extract_attribute(attrs, "TZID")

    if is_nil(tzid) do
      Naiveical.Helpers.parse_datetime!(datetime_str)
    else
      Naiveical.Helpers.parse_datetime!(datetime_str, tzid)
    end
  end

  @doc """
  Extract a single date content line from an icalendar text. It returns a the datetime object.

  Basically, it tries to parse the extracted text as a date object

    ## Examples:

  iex> Naiveical.Extractor.extract_date_contentline_by_tag!("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")

  """
  @spec extract_date_contentline_by_tag!(String.t(), String.t()) :: NaiveDateTime.t()
  def extract_date_contentline_by_tag!(ical_text, tag) do
    {_tag, _attrs, date_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)

    Naiveical.Helpers.parse_date!(date_str)
  end

  @spec extract_date_contentline_by_tag(String.t(), String.t()) :: {:ok, Date.t()} | {:error, term()}
  def extract_date_contentline_by_tag(ical_text, tag) do
    {_tag, _attrs, date_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)

    Naiveical.Helpers.parse_date(date_str)
  end

  @doc """
  Extracts a specific attribute from a list of attributes.
  """
  @spec extract_attribute(String.t(), String.t()) :: String.t() | nil
  def extract_attribute(attribute_list_str, attr) do
    if String.contains?(attribute_list_str, attr) do
      attribute_list_str
      |> String.split(";")
      |> Enum.filter(fn x ->
        [name, _value] = String.split(x, "=")
        name == attr
      end)
      |> List.first()
      |> String.split("=")
      |> List.last()
    else
      nil
    end
  end

  @doc """
  Detects the component type from iCalendar data.

  Returns an atom representing the component type found in the iCalendar data.
  Priority order: :vfreebusy > :vtodo > :vjournal > :vevent (default)

  ## Examples

      iex> Naiveical.Extractor.detect_component_type("BEGIN:VCALENDAR\\nBEGIN:VEVENT\\nEND:VEVENT\\nEND:VCALENDAR")
      :vevent

      iex> Naiveical.Extractor.detect_component_type("BEGIN:VCALENDAR\\nBEGIN:VTODO\\nEND:VTODO\\nEND:VCALENDAR")
      :vtodo

      iex> Naiveical.Extractor.detect_component_type("BEGIN:VCALENDAR\\nEND:VCALENDAR")
      :vevent

  """
  @spec detect_component_type(String.t()) :: atom()
  def detect_component_type(ical_data) do
    cond do
      component_present?(ical_data, "VFREEBUSY") -> :vfreebusy
      component_present?(ical_data, "VTODO") -> :vtodo
      component_present?(ical_data, "VJOURNAL") -> :vjournal
      component_present?(ical_data, "VEVENT") -> :vevent
      true -> :vevent
    end
  end

  defp component_present?(ical_data, tag) do
    try do
      extract_sections_by_tag(ical_data, tag) != []
    rescue
      _ in RuntimeError -> String.contains?(ical_data, "BEGIN:#{tag}")
    end
  end
end