defmodule Timex.Parse.ZoneInfo.Parser do
@moduledoc """
This module is responsible for parsing binary zoneinfo files,
such as those found in /usr/local/zoneinfo.
"""
# See https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html for details
defmodule Zone do
@moduledoc """
Represents the data retrieved from a binary tzfile.
"""
# Maximum version encountered
defstruct version: nil,
# Transition times
transitions: [],
# Leap second adjustments
leaps: [],
# POSIX-TZ rule that describes the zone for future dates
rule: nil
end
defmodule Header do
@moduledoc false
# Six big-endian 4-8 byte integers
# count of UTC/local indicators
defstruct utc_count: 0,
# count of standard/wall indicators
wall_count: 0,
# number of leap seconds
leap_count: 0,
# number of transition times
transition_count: 0,
# number of local time types (never zero)
type_count: 0,
# total number of characters of the zone abbreviations string
abbrev_length: 0
end
defmodule TransitionInfo do
@moduledoc false
# total ISO 8601 offset (std + dst)
defstruct gmt_offset: 0,
# The time at which this transition starts
starts_at: 0,
# Is this transition in daylight savings time
is_dst?: false,
# The lookup index of the abbreviation
abbrev_index: 0,
# The zone abbreviation
abbreviation: "N/A",
# Whether transitions are standard or wall
is_std?: true,
# Whether transitions are UTC or local
is_utc?: false
end
defmodule LeapSecond do
@moduledoc false
# The time at which this leap second occurs
defstruct epoch: 0,
# The number of leap seconds to be applied to UTC on/after epoch
correction: 0
end
defmodule Rule do
@moduledoc false
defstruct std_abbr: nil,
std_offset: 0,
dst_abbr: nil,
dst_offset: 0,
start_time: nil,
end_time: nil
end
defguardp is_digit(c) when c >= ?0 and c <= ?9
defguardp is_alphabetic(c) when (c >= ?A and c <= ?Z) or (c >= ?a and c <= ?z)
##############
# Macros defining common bitstring modifier combinations in zoneinfo files
defmacrop char() do
quote do: size(1) - unit(8) - integer
end
defmacrop bytes(size) do
quote do: binary - size(unquote(size)) - unit(8)
end
defmacrop integer_32bit_be do
quote do: big - size(4) - unit(8) - integer
end
defmacrop integer_64bit_be do
quote do: big - size(8) - unit(8) - integer
end
defmacrop signed_char_be do
quote do: big - size(1) - unit(8) - signed - integer
end
defmacrop unsigned_char_be do
quote do: big - size(1) - unit(8) - unsigned - integer
end
@doc """
Parses a binary representing a valid zoneinfo file.
Parses the timezone information inside, and returns it as a Zone struct.
"""
@spec parse(binary) :: {:ok, Zone.t()} | {:error, binary}
def parse(<<?T, ?Z, ?i, ?f, version::bytes(1), _reserved::bytes(15), rest::binary>>) do
version =
case version do
<<0>> ->
1
<<?2>> ->
2
<<?3>> ->
3
byte ->
{:error, {:invalid_tzfile_version, byte}, rest}
end
with v when is_integer(v) <- version,
{:ok, zoneinfo, _} <- parse_versioned_content(v, rest) do
{:ok, zoneinfo}
else
{:error, reason, _} ->
{:error, reason}
end
end
def parse(_) do
{:error, :invalid_zoneinfo_content}
end
@doc """
Like `parse/1`, but expects a file path to parse.
"""
def parse_file(path) when is_binary(path) do
if path |> File.exists?() do
path |> File.read!() |> parse()
else
{:error, "No zoneinfo file at #{path}"}
end
end
# Parses the content of a tzinfo file based on the version format
defp parse_versioned_content(version, data)
defp parse_versioned_content(1, data) do
with {:ok, zone, rest} <- parse_content(1, data, %Zone{version: 1}) do
transitions = Enum.sort_by(zone.transitions, fn tx -> tx.starts_at end)
leaps = Enum.sort_by(zone.leaps, fn leap -> leap.epoch end)
{:ok, %Zone{zone | transitions: transitions, leaps: leaps}, rest}
end
end
defp parse_versioned_content(version, data) do
expected_version =
case version do
1 -> <<0>>
2 -> <<?2>>
3 -> <<?3>>
end
with {:ok, zone1, rest} <- parse_content(1, data, %Zone{version: 1}),
{:header, <<?T, ?Z, ?i, ?f, ^expected_version::bytes(1), _::bytes(15), rest::binary>>} <-
{:header, rest},
{:ok, zone2, rest} <- parse_content(version, rest, %Zone{version: version}) do
# Append the second set of zone info to the first set
transitions =
zone1.transitions
|> Enum.concat(zone2.transitions)
|> Enum.sort_by(fn tx -> tx.starts_at end)
leaps =
zone1.leaps
|> Enum.concat(zone2.leaps)
|> Enum.sort_by(fn leap -> leap.epoch end)
zone = %Zone{
version: zone2.version,
transitions: transitions,
leaps: leaps,
rule: zone2.rule
}
{:ok, zone, rest}
else
{:header, bytes} ->
{:error, {:invalid_version_header, version}, bytes}
end
end
# Parsing the content of a tzinfo file starting with the header
#
# ## Header Format
#
# +---------------+---+
# | magic (4) | <-+-- version (1)
# +---------------+---+---------------------------------------+
# | [unused - reserved for future use] (15) |
# +---------------+---------------+---------------+-----------+
# | isutccnt (4) | isstdcnt (4) | leapcnt (4) |
# +---------------+---------------+---------------+
# | timecnt (4) | typecnt (4) | charcnt (4) |
# ---
#
# ## 32-bit Body Format
#
# | transition times (timecnt x 4) ...
# +-----------------------------------------------+
# | transition time index (timecnt) ...
# +-----------------------------------------------+
# | local time type records (typecnt x 6) ...
# +-----------------------------------------------+
# | time zone designations (charcnt) ...
# +-----------------------------------------------+
# | leap second records (leapcnt x 8) ...
# +-----------------------------------------------+
# | standard/wall indicators (isstdcnt) ...
# +-----------------------------------------------+
# | UTC/local indicators (isutccnt) ...
# +-----------------------------------------------+
#
# ## 64-bit Body Format
#
# | transition times (timecnt x 8) ...
# +-----------------------------------------------+
# | transition time index (timecnt) ...
# +-----------------------------------------------+
# | local time type records (typecnt x 6) ...
# +-----------------------------------------------+
# | time zone designations (charcnt) ...
# +-----------------------------------------------+
# | leap second records (leapcnt x 12) ...
# +-----------------------------------------------+
# | standard/wall indicators (isstdcnt) ...
# +-----------------------------------------------+
# | UTC/local indicators (isutccnt) ...
# +---+---------------------------------------+---+
# | NL| POSIX TZ string (0...) |NL |
# +---+---------------------------------------+---+
defp parse_content(version, <<header_raw::bytes(24), rest::binary>>, zone) do
{utc_count, header_raw} = parse_i32(header_raw)
{wall_count, header_raw} = parse_i32(header_raw)
{leap_count, header_raw} = parse_i32(header_raw)
{tx_count, header_raw} = parse_i32(header_raw)
{type_count, header_raw} = parse_i32(header_raw)
{abbrev_length, _} = parse_i32(header_raw)
header = %Header{
utc_count: utc_count,
wall_count: wall_count,
leap_count: leap_count,
transition_count: tx_count,
type_count: type_count,
abbrev_length: abbrev_length
}
parse_transition_times(version, rest, header, zone)
end
# Parse the number of transition times in this zone
defp parse_transition_times(version, data, %Header{transition_count: tx_count} = header, zone) do
{times, rest} = parse_array(data, tx_count, &parse_int(version, &1))
parse_transition_info(version, rest, header, %Zone{zone | transitions: times})
end
# Parse transition time info for this zone
defp parse_transition_info(
version,
data,
%Header{transition_count: tx_count, type_count: type_count} = header,
%Zone{transitions: transitions} = zone
) do
{indices, rest} = parse_array(data, tx_count, &parse_uchar/1)
{txinfos, rest} =
parse_array(rest, type_count, fn data ->
{gmt_offset, next} = parse_i32(data)
{is_dst, next} = parse_char(next)
{abbrev_index, next} = parse_uchar(next)
info = %TransitionInfo{
gmt_offset: gmt_offset,
is_dst?: is_dst == 1,
abbrev_index: abbrev_index
}
{info, next}
end)
txs =
indices
|> Enum.map(&Enum.at(txinfos, &1))
|> Enum.zip(transitions)
|> Enum.map(fn {info, time} ->
Map.put(info, :starts_at, time)
end)
parse_abbreviations(version, rest, header, %Zone{zone | transitions: txs})
end
# Parses zone abbreviations for this zone
defp parse_abbreviations(
version,
data,
%Header{abbrev_length: len} = header,
%Zone{transitions: transitions} = zone
) do
<<abbrevs::bytes(len), rest::binary>> = data
txinfos =
Enum.map(transitions, fn %TransitionInfo{abbrev_index: idx} = tx ->
{:ok, abbrev, _} = parse_null_terminated_str(:binary.part(abbrevs, idx, len - idx))
%{tx | :abbreviation => abbrev}
end)
parse_leap_seconds(version, rest, header, %Zone{zone | transitions: txinfos})
end
# Parses leap second information for this zone
defp parse_leap_seconds(version, data, %Header{leap_count: count} = header, zone) do
{leaps, rest} =
parse_array(data, count, fn data ->
{epoch, next} = parse_int(version, data)
{correction, next} = parse_i32(next)
leap = %LeapSecond{
epoch: epoch,
correction: correction
}
{leap, next}
end)
parse_flags(version, rest, header, %Zone{zone | leaps: leaps})
end
# Parses the trailing flags in the zoneinfo binary
defp parse_flags(version, data, %Header{utc_count: utc_count, wall_count: wall_count}, zone) do
{is_std_indicators, rest} = parse_array(data, wall_count, &parse_char/1)
{is_utc_indicators, rest} = parse_array(rest, utc_count, &parse_char/1)
transitions =
zone.transitions
|> Enum.with_index()
|> Enum.map(fn {tx, i} ->
is_std? = Enum.at(is_std_indicators, i) == 1
is_utc? = Enum.at(is_utc_indicators, i) == 1
%{tx | :is_std? => is_std?, :is_utc? => is_utc?}
end)
if version > 1 do
parse_posixtz_string(version, rest, %Zone{zone | transitions: transitions})
else
{:ok, %Zone{zone | transitions: transitions}, rest}
end
end
# stdoffset[dst[offset][,start[/time],end[/time]]]
defp parse_posixtz_string(_version, <<?\n, rest::binary>>, zone) do
with {:ok, format_str, rest} <- parse_newline_terminated_str(rest),
{:ok, rule, format_rest} <- parse_tz(format_str) do
{:ok, %Zone{zone | rule: rule}, format_rest <> rest}
end
end
defp parse_posixtz_string(_version, rest, _zone) do
{:error, {:invalid_format, "expected newline to follow set of utc/local indicators"}, rest}
end
defp parse_tz(""), do: {:ok, nil, ""}
defp parse_tz(str), do: parse_tz(:std_abbr, str, %Rule{})
defp parse_tz(:std_abbr, str, rule) do
with {:ok, abbr, rest} <- parse_abbrev(str) do
parse_tz(:std_offset, rest, %Rule{rule | std_abbr: abbr, dst_abbr: abbr})
end
end
defp parse_tz(:std_offset, str, rule) do
with {:ok, offset, rest} <- parse_offset(str) do
parse_tz(:dst_abbr, rest, %Rule{rule | std_offset: offset, dst_offset: offset})
else
{:error, nil, ""} ->
{:ok, rule, ""}
{:error, nil, rest} ->
parse_tz(:dst_abbr, rest, rule)
{:error, _, _} = err ->
err
end
end
# dst[offset][,...]
defp parse_tz(:dst_abbr, str, rule) do
with {:ok, abbr, rest} <- parse_abbrev(str),
rule = %Rule{rule | dst_abbr: abbr} do
# dst_offset is optional, and may or may not be followed by a comma and start/end rule
# if the offset is not present.
case rest do
<<>> ->
{:ok, rule, ""}
<<?,, rest::binary>> ->
parse_tz(:rule_period, rest, rule)
_ ->
parse_tz(:dst_offset, rest, rule)
end
end
end
# offset[,...]
defp parse_tz(:dst_offset, str, rule) do
with {:ok, offset, rest} <- parse_offset(str),
rule = %Rule{rule | dst_offset: offset} do
case rest do
<<>> ->
{:ok, rule, ""}
<<?,, rest::binary>> ->
parse_tz(:rule_period, rest, rule)
_ ->
{:error, :invalid_tz_rule_format}
end
else
{:error, nil, ""} ->
{:ok, rule, ""}
{:error, nil, <<?,, rest::binary>>} ->
parse_tz(:rule_period, rest, rule)
{:error, _, _} = err ->
err
end
end
defp parse_tz(:rule_period, str, rule) do
case String.split(str, ",", parts: 2, trim: false) do
[start_dt, end_dt] ->
with {:ok, start_time, _} <- parse_posixtz_datetime(start_dt),
{:ok, end_time, rest} <- parse_posixtz_datetime(end_dt) do
{:ok, %Rule{rule | start_time: start_time, end_time: end_time}, rest}
else
{:ok, _, rest} ->
{:error, :expected_comma, rest}
{:error, _, _} = err ->
err
end
_ ->
{:error, :expected_datetime_range, str}
end
end
defp parse_posixtz_datetime(str) do
result =
case str do
<<?M, rest::binary>> -> parse_month_week_day(rest)
<<?J, rest::binary>> -> parse_julian_day(rest, allow_leap_days: false)
_ -> parse_julian_day(str, allow_leap_days: true)
end
with {:ok, date, rest} <- result do
case rest do
<<?/, rest::binary>> ->
with {:ok, time, rest} <- parse_time(rest) do
{:ok, {date, time}, rest}
end
_ ->
{:ok, {date, Timex.Time.new!(2, 0, 0, 0)}, rest}
end
end
end
defp parse_month_week_day(str) do
case String.split(str, ".", parts: 3, trim: false) do
[m, n, rest] ->
case Integer.parse(rest) do
{d, rest} ->
with {:ok, date} <- parse_month_week_day(m, n, d) do
{:ok, date, rest}
else
{:error, reason} ->
{:error, reason, str}
end
:error ->
{:error, :expected_day_number, str}
end
_ ->
{:error, :invalid_month_week_day, str}
end
end
defp parse_month_week_day(m, n, d) do
with {:ok, m} <- to_integer(m),
{:ok, n} <- to_integer(n),
{:ok, d} <- to_integer(d) do
cond do
m < 1 or m > 12 ->
{:error, :invalid_month}
n < 1 or n > 5 ->
{:error, :invalid_week_of_month}
d < 0 or d > 6 ->
{:error, :invalid_week_day}
:else ->
{:ok, {:mwd, {m, n, d}}}
end
else
:error ->
{:error, :invalid_number}
end
end
defp parse_julian_day(str, opts) do
with {:ok, day, rest} <- parse_integer_unsigned(str) do
allow_leaps? = Keyword.get(opts, :allow_leap_days, true)
cond do
# Day of year including Feb 29
allow_leaps? and day >= 0 and day <= 365 ->
{:ok, {:julian, day, opts}, rest}
allow_leaps? ->
{:error, {:invalid_julian_day, day}, str}
# Day of year without Feb 29, i.e. day 59 is Feb 28, and day 60 is Mar 1
day >= 1 and day <= 365 ->
{:ok, {:julian, day, opts}, rest}
:else ->
{:error, {:invalid_julian_day, day}, str}
end
end
end
defp parse_abbrev(<<?<, rest::binary>>), do: parse_quoted_abbrev(rest)
defp parse_abbrev(str), do: parse_unquoted_abbrev(str)
defp parse_quoted_abbrev(str, acc \\ "")
defp parse_quoted_abbrev(<<?>, rest::binary>>, acc) when byte_size(acc) < 3,
do: {:error, {:invalid_quoted_abbreviation, acc}, rest}
defp parse_quoted_abbrev(<<?>, rest::binary>>, acc),
do: {:ok, acc, rest}
defp parse_quoted_abbrev(<<c::char(), rest::binary>>, acc),
do: parse_quoted_abbrev(rest, acc <> <<c::char()>>)
defp parse_quoted_abbrev(<<>>, acc),
do: {:error, :unclosed_quoted_abbreviation, acc}
defp parse_unquoted_abbrev(str, acc \\ "")
defp parse_unquoted_abbrev(<<c::char(), rest::binary>>, acc) when is_alphabetic(c),
do: parse_unquoted_abbrev(rest, acc <> <<c::char()>>)
defp parse_unquoted_abbrev(rest, acc) when byte_size(acc) < 3,
do: {:error, {:invalid_unquoted_abbreviation, acc}, rest}
defp parse_unquoted_abbrev(rest, acc),
do: {:ok, acc, rest}
defp parse_offset(<<sign::char(), rest::binary>>) when sign in [?+, ?-],
do: parse_offset(rest, sign)
defp parse_offset(str) when is_binary(str),
do: parse_offset(str, ?+)
defp parse_offset(str, sign) do
sign = if sign == ?+, do: 1, else: -1
with {:ok, time, rest} <- parse_time(str),
{seconds, _} <- Timex.Time.to_seconds_after_midnight(time) do
{:ok, sign * seconds, rest}
end
end
defp parse_time(str) do
case parse_integer_unsigned(str) do
{:ok, hh, <<?:, rest::binary>>} when hh >= 0 and hh <= 24 ->
case parse_integer_unsigned(rest) do
{:ok, mm, <<?:, rest::binary>>} when mm >= 0 and mm < 60 ->
case parse_integer_unsigned(rest) do
{:ok, ss, rest} when ss >= 0 and ss < 60 ->
{:ok, Timex.Time.new!(hh, mm, ss, 0), rest}
_ ->
{:ok, Timex.Time.new!(hh, mm, 0, 0), rest}
end
{:ok, mm, rest} when mm >= 0 and mm < 60 ->
{:ok, Timex.Time.new!(hh, mm, 0, 0), rest}
_ ->
{:ok, Timex.Time.new!(hh, 0, 0, 0), rest}
end
{:ok, hh, rest} when hh >= 0 and hh <= 24 ->
{:ok, Timex.Time.new!(hh, 0, 0, 0), rest}
{:ok, _, _} ->
{:error, :invalid_hour, str}
{:error, _, _} = err ->
err
end
end
defp to_integer(n) when is_integer(n) and n >= 0, do: {:ok, n}
defp to_integer(s) when is_binary(s) do
with {:ok, n, _} <- parse_integer_signed(s) do
{:ok, n}
end
end
defp parse_integer_signed(str) do
case Integer.parse(str) do
{value, rest} ->
{:ok, value, rest}
_ ->
{:error, :invalid_number, str}
end
end
defp parse_integer_unsigned(str, acc \\ "")
defp parse_integer_unsigned(<<c::char(), rest::binary>>, acc) when is_digit(c) do
parse_integer_unsigned(rest, acc <> <<c::char()>>)
end
defp parse_integer_unsigned(rest, acc) when byte_size(acc) > 0 do
{:ok, String.to_integer(acc), rest}
end
defp parse_integer_unsigned(rest, _) do
{:error, :invalid_number, rest}
end
################
# Parses an array of a primitive type, ex:
# parse_array(<<"test">>, 2, &parse_uchar/1) => [?t, ?e]
###
defp parse_array(data, 0, _parser), do: {[], data}
defp parse_array(data, count, parser) when is_binary(data) and is_function(parser) do
{results, rest} = do_parse_array(data, count, parser, [])
{results, rest}
end
defp do_parse_array(data, 0, _, acc), do: {Enum.reverse(acc), data}
defp do_parse_array(data, count, parser, acc) do
{item, next} = parser.(data)
do_parse_array(next, count - 1, parser, [item | acc])
end
#################
# Data Type Parsers
defp parse_int(1, bin), do: parse_i32(bin)
defp parse_int(_, bin), do: parse_i64(bin)
defp parse_i32(<<val::integer_32bit_be, rest::binary>>), do: {val, rest}
defp parse_i64(<<val::integer_64bit_be, rest::binary>>), do: {val, rest}
defp parse_char(<<val::signed_char_be, rest::binary>>), do: {val, rest}
defp parse_uchar(<<val::unsigned_char_be, rest::binary>>), do: {val, rest}
defp parse_null_terminated_str(bin), do: parse_null_terminated_str(bin, <<>>)
defp parse_null_terminated_str(<<>>, acc), do: {:ok, acc, ""}
defp parse_null_terminated_str(<<0, rest::binary>>, acc), do: {:ok, acc, rest}
defp parse_null_terminated_str(<<c::char(), rest::binary>>, acc) do
parse_null_terminated_str(rest, acc <> <<c::char()>>)
end
defp parse_newline_terminated_str(bin), do: parse_newline_terminated_str(bin, <<>>)
defp parse_newline_terminated_str(<<>>, acc), do: {:ok, acc, ""}
defp parse_newline_terminated_str(<<?\n, rest::binary>>, acc), do: {:ok, acc, rest}
defp parse_newline_terminated_str(<<c::char(), rest::binary>>, acc) do
parse_newline_terminated_str(rest, acc <> <<c::char()>>)
end
end