defmodule Naiveical.Extractor do
@moduledoc """
This module allows the extraction of parts of a icalendar text.
"""
alias Naiveical.Helpers
@doc """
Extract parts of an icalender text, such as all VALARMs.
## Examples:
iex> Naiveical.Extractor.extract_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "XX")
["BEGIN:XX\\r\\nBEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY\\r\\nBEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY\\r\\nEND:XX"]
iex> Naiveical.Extractor.extract_sections_by_tag("BEGIN:XX\\r\\nBEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY\\r\\nBEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY\\r\\nEND:XX", "YY")
["BEGIN:YY\\r\\nA:aa\\r\\nB:bb\\r\\nEND:YY", "BEGIN:YY\\r\\nC:cc\\r\\nD:dd\\r\\nEND:YY"]
"""
def extract_sections_by_tag(ical_text, tag) do
ical_text = String.replace(ical_text, "\r\n", "\n")
{:ok, regex} = Regex.compile("BEGIN:#{tag}")
startings = Regex.scan(regex, ical_text, return: :index)
{:ok, regex} = Regex.compile("END:#{tag}")
endings = Regex.scan(regex, ical_text, return: :index)
if length(startings) != length(endings),
do: raise("No correct ical file, no matchin BEGIN/END for #{tag}")
case startings do
[] ->
[]
_ ->
Enum.map(0..(length(startings) - 1), fn idx ->
[{s, _len}] = Enum.at(startings, idx)
[{e, len}] = Enum.at(endings, idx)
String.slice(ical_text, s, e - s + len)
|> String.replace(~r/\r?\n/, "\r\n")
|> String.trim()
end)
end
end
@doc """
Remove sections of an icalender text, such as remove all VALARMs from a VEVENT.
The reason of this is to allow the correct extraction of the content lines. If, for example, a VEVENT also contains a VALARM with a description, but the
VEVENT does not contain a description, the function extract_contentline_by_tag would fetch the description of the VALARM instead of returning nil.
## Examples:
iex> Naiveical.Extractor.remove_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nEND:XX", "YY")
"BEGIN:XX\\nEND:XX"
iex> Naiveical.Extractor.remove_sections_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\naaaa:bbbb\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "NOTEXIST")
"BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\naaaa:bbbb\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX"
"""
def remove_sections_by_tag(ical_text, tag) do
if String.contains?(ical_text, tag) do
ical_text = String.replace(ical_text, "\r\n", "\n")
{:ok, regex} = Regex.compile("BEGIN:#{tag}")
startings =
Regex.scan(regex, ical_text, return: :index) ++ [[{String.length(ical_text), 0}]]
{:ok, regex} = Regex.compile("END:#{tag}")
endings = [[{0, 0}]] ++ Regex.scan(regex, ical_text, return: :index)
if length(startings) != length(endings),
do: raise("No correct ical file, no matchin BEGIN/END for #{tag}")
[{s, _len}] = Enum.at(startings, 0)
[{last_e, last_e_len}] = Enum.at(endings, -1)
end_acc = String.slice(ical_text, (last_e + last_e_len)..-1)
if length(startings) < 2 do
[{e, e_len}] = Enum.at(endings, 0)
(String.slice(ical_text, 0..(s - 1)) <> String.slice(ical_text, (e + e_len)..-1))
|> String.replace(~r/(\r?\n)+/, "\\1")
else
# |> String.replace(~r/\r?\n/, "\r\n")
(Enum.reduce(0..(length(startings) - 2), "", fn idx, acc ->
[{s, _len}] = Enum.at(startings, idx)
[{e, e_len}] = Enum.at(endings, idx)
from = e + e_len
to = s - 1
(acc <> String.slice(ical_text, from..to))
|> String.trim()
end) <> end_acc)
|> String.replace(~r/((\\r)?\\n)+/, "\\1")
end
else
ical_text
end
end
@doc """
Extract a single content line from an icalendar text split into tag, properties, and values. It returns a tuple with `{tag-name, properties, value}`.
## Examples:
iex> Naiveical.Extractor.extract_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "A")
{"A","","aa"}
iex> Naiveical.Extractor.extract_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "ZZZ")
{"ZZZ","",nil}
"""
def extract_contentline_by_tag(nil, tag), do: {tag, "", nil}
def extract_contentline_by_tag(ical_text, tag) do
tag = String.upcase(tag)
if String.contains?(ical_text, tag) do
ical_text = Helpers.unfold(ical_text)
{:ok, regex} = Regex.compile("^#{tag}[;]?(.*):(.*)$", [:multiline])
case Regex.run(regex, ical_text) do
[_, properties, values] ->
values = values |> String.replace("\\n", " ") |> String.trim()
{tag, String.trim(properties), values}
nil ->
{tag, "", nil}
end
else
{tag, "", nil}
end
end
@doc """
Extract a raw single content line from an icalendar text.
## Examples:
iex> Naiveical.Extractor.extract_raw_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA;xx:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "A")
"A;xx:aa"
iex> Naiveical.Extractor.extract_raw_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "ZZZ")
nil
"""
def extract_raw_contentline_by_tag(nil, _tag), do: nil
def extract_raw_contentline_by_tag(ical_text, tag) do
tag = String.upcase(tag)
if String.contains?(ical_text, tag) do
ical_text = Helpers.unfold(ical_text)
{:ok, regex} = Regex.compile("^#{tag}[;]?.*:.*$", [:multiline])
[res] = Regex.run(regex, ical_text)
res |> String.replace("\\n", " ") |> String.trim()
else
nil
end
end
@doc """
Extract a single datetime content line from an icalendar text. It returns a the datetime object.
Basically, it tries to parse the extracted text as a datetime object with the
given timezone information
## Examples:
iex> Naiveical.Extractor.extract_datetime_contentline_by_tag("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422T150000\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")
"""
def extract_datetime_contentline_by_tag(ical_text, tag) do
{_tag, attrs, dtstart_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)
tzid = Naiveical.Extractor.extract_attribute(attrs, "TZID")
if is_nil(tzid) do
Naiveical.Helpers.parse_datetime(dtstart_str)
else
Naiveical.Helpers.parse_datetime(dtstart_str, tzid)
end
end
@doc """
Extract a single datetime content line from an icalendar text. It returns a the datetime object.
Basically, it tries to parse the extracted text as a datetime object with the
given timezone information
## Examples:
iex> Naiveical.Extractor.extract_datetime_contentline_by_tag!("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422T150000\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")
"""
def extract_datetime_contentline_by_tag!(ical_text, tag) do
{_tag, attrs, datetime_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)
tzid = Naiveical.Extractor.extract_attribute(attrs, "TZID")
if is_nil(tzid) do
Naiveical.Helpers.parse_datetime!(datetime_str)
else
Naiveical.Helpers.parse_datetime!(datetime_str, tzid)
end
end
@doc """
Extract a single date content line from an icalendar text. It returns a the datetime object.
Basically, it tries to parse the extracted text as a date object
## Examples:
iex> Naiveical.Extractor.extract_date_contentline_by_tag!("BEGIN:XX\\nBEGIN:YY\\nA:aa\\nB:bb\\nDTSTART;TZID=Europe/Berlin:20210422\\nEND:YY\\nBEGIN:YY\\nC:cc\\nD:dd\\nEND:YY\\nEND:XX", "DTSTART")
"""
def extract_date_contentline_by_tag!(ical_text, tag) do
{_tag, _attrs, date_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)
Naiveical.Helpers.parse_date!(date_str)
end
def extract_date_contentline_by_tag(ical_text, tag) do
{_tag, _attrs, date_str} = Naiveical.Extractor.extract_contentline_by_tag(ical_text, tag)
Naiveical.Helpers.parse_date(date_str)
end
@doc """
Extracts a specific attribute from a list of attributes.
"""
def extract_attribute(attribute_list_str, attr) do
if String.contains?(attribute_list_str, attr) do
attribute_list_str
|> String.split(";")
|> Enum.filter(fn x ->
[name, _value] = String.split(x, "=")
name == attr
end)
|> List.first()
|> String.split("=")
|> List.last()
else
nil
end
end
end