defmodule Timex.Parse.Timezones.Posix do
@moduledoc """
Parses POSIX-style timezones:
## Format
POSIX-style timezones are of the format: `stdoffset[dst[offset][,start[/time],end[/time]]]`
Where `std`/`dst` are dates in one of the following formats:
The `Mm.n.d` format, where:
- `Mm` (1-12) for 12 months
- `n` (1-5) 1 for the first week and 5 for the last week in the month
- `d` (0-6) 0 for Sunday and 6 for Saturday
The `Jn` format, where `n` is the julian day and leap days are excluded.
Or the `n` format, where `n` is the julian day, and leap days are included.
Offsets are optional, except for the `std` offset, and can be preceded by a sign. The offset indicates
the time added to the local time to obtain UTC time. The offsets may be hours; hours and minutes;
and hours, minutes, and seconds - colon separated between components. NOTE: The sign of the offset is
opposite the usual expectation, positive numbers are west of GMT, and negative numbers are east of GMT,
this is because the offset is the time added to _local_ time to arrive at UTC, rather than the other way
around.
For more info, see: https://pubs.opengroup.org/onlinepubs/9699919799/
## Example
TZ = `CST6CDT,M3.2.0/2:00:00,M11.1.0/2:00:00`
This would represents a change to daylight saving time at 2:00 AM on the second Sunday
in March and change back at 2:00 AM on the first Sunday in November, and keep 6 hours time
offset from GMT every year. The breakdown of the string is:
- `CST6CDT` is the timezone name (constructed by concatenating the abbreviation and offset of std/dst)
- `CST` is the standard abbreviation
- `6` is the offset from `CST` to get `UTC`
- `CDT` is the DST abbreviation
- There is no offset from `CDT`, so the standard assumes the offset is one hour ahead of `CST`, or `5`
- `,M3` is the third month
- `.2` is second week of the month
- `.0` is the day of the week (Sunday in this case)
- `/2:00:00` is the time at which `CST` changes to `CDT`; defaults to `2:00:00` if not specified
- `,M11` is the eleventh month
- `.1` is the first week of the month
- `.0` is the day of the week
- `/2:00:00` is the time at which `CDT` changes back to `CST`; defaults to `2:00:00` if not specified
"""
alias Timex.PosixTimezone, as: TZ
defguardp is_digit(c) when c >= ?0 and c <= ?9
defguardp is_alphabetic(c) when (c >= ?A and c <= ?Z) or (c >= ?a and c <= ?z)
defmacrop char() do
quote do: size(1) - unit(8) - integer
end
def parse(s) when is_binary(s) do
with {:ok, format_str, rest} <- parse_newline_terminated_str(s),
{:ok, tz, format_rest} <- parse_tz(format_str) do
{:ok, finalize(tz), format_rest <> rest}
end
end
defp finalize(%TZ{std_abbr: std, std_offset: soffs, dst_abbr: dst, dst_offset: nil} = tz)
when is_binary(dst) do
# DST exists, but offset is unset, so the standard dictates that this means an hour ahead of standard
%TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}", dst_offset: soffs + 3600}
end
defp finalize(%TZ{std_abbr: std, std_offset: soffs, dst_abbr: nil, dst_offset: nil} = tz) do
# No DST, so set the abbreviation to STD and set the offset to the same
%TZ{tz | name: "#{std}#{soffs}", dst_abbr: std, dst_offset: soffs}
end
defp finalize(
%TZ{name: nil, std_abbr: std, std_offset: soffs, dst_abbr: dst, dst_offset: doffs} = tz
) do
# Construct the full name for this zone
if diff(soffs, doffs) == 3600 do
# The DST offset is one hour ahead of the STD offset, so we can omit it
%TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}"}
else
%TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}#{to_offset(doffs)}"}
end
end
defp finalize(nil), do: nil
defp diff(std, dst), do: std - dst
defp to_offset(0), do: "0"
defp to_offset(n) do
n = n * -1
hours = div(n, 3600)
minutes = div(rem(n, 3600), 60)
seconds = rem(minutes, 60)
cond do
seconds == 0 and minutes == 0 ->
"#{hours}"
seconds == 0 ->
"#{hours}:#{String.pad_leading(minutes, 2, "0")}"
:else ->
"#{hours}:#{String.pad_leading(minutes, 2, "0")}:#{String.pad_leading(seconds, 2, "0")}"
end
end
defp parse_tz(""), do: {:ok, nil, ""}
defp parse_tz(str), do: parse_tz(:std_abbr, str, %TZ{})
defp parse_tz(:std_abbr, str, rule) do
with {:ok, abbr, rest} <- parse_abbrev(str) do
parse_tz(:std_offset, rest, %TZ{rule | std_abbr: abbr})
end
end
defp parse_tz(:std_offset, str, rule) do
with {:ok, offset, rest} <- parse_offset(str) do
parse_tz(:dst_abbr, rest, %TZ{rule | std_offset: offset})
else
{:error, nil, ""} ->
{:error, :invalid_offset, ""}
{:error, nil, rest} ->
parse_tz(:dst_abbr, rest, rule)
{:error, _, _} = err ->
err
end
end
# dst[offset][,...]
defp parse_tz(:dst_abbr, str, rule) do
with {:ok, abbr, rest} <- parse_abbrev(str),
rule = %TZ{rule | dst_abbr: abbr} do
# dst_offset is optional, and may or may not be followed by a comma and start/end rule
# if the offset is not present.
case rest do
<<>> ->
{:ok, rule, ""}
<<?,, rest::binary>> ->
parse_tz(:rule_period, rest, rule)
_ ->
parse_tz(:dst_offset, rest, rule)
end
end
end
# offset[,...]
defp parse_tz(:dst_offset, str, rule) do
with {:ok, offset, rest} <- parse_offset(str),
rule = %TZ{rule | dst_offset: offset} do
case rest do
<<>> ->
{:ok, rule, ""}
<<?,, rest::binary>> ->
parse_tz(:rule_period, rest, rule)
_ ->
{:error, :invalid_tz_rule_format}
end
else
{:error, nil, ""} ->
{:ok, rule, ""}
{:error, nil, <<?,, rest::binary>>} ->
parse_tz(:rule_period, rest, rule)
{:error, _, _} = err ->
err
end
end
defp parse_tz(:rule_period, str, rule) do
case String.split(str, ",", parts: 2, trim: false) do
[start_dt, end_dt] ->
with {:ok, dst_start, _} <- parse_posixtz_datetime(start_dt),
{:ok, dst_end, rest} <- parse_posixtz_datetime(end_dt) do
{:ok, %TZ{rule | dst_start: dst_start, dst_end: dst_end}, rest}
else
{:ok, _, rest} ->
{:error, :expected_comma, rest}
{:error, _, _} = err ->
err
end
_ ->
{:error, :expected_datetime_range, str}
end
end
defp parse_posixtz_datetime(str) do
result =
case str do
<<?M, rest::binary>> -> parse_month_week_day(rest)
<<?J, rest::binary>> -> parse_julian_day(rest, allow_leap_days: false)
_ -> parse_julian_day(str, allow_leap_days: true)
end
with {:ok, date, rest} <- result do
case rest do
<<?/, rest::binary>> ->
with {:ok, time, rest} <- parse_time(rest) do
{:ok, {date, time}, rest}
end
_ ->
{:ok, {date, Timex.Time.new!(2, 0, 0, 0)}, rest}
end
end
end
defp parse_month_week_day(str) do
case String.split(str, ".", parts: 3, trim: false) do
[m, n, rest] ->
case Integer.parse(rest) do
{d, rest} ->
with {:ok, date} <- parse_month_week_day(m, n, d) do
{:ok, date, rest}
else
{:error, reason} ->
{:error, reason, str}
end
:error ->
{:error, :expected_day_number, str}
end
_ ->
{:error, :invalid_month_week_day, str}
end
end
defp parse_month_week_day(m, n, d) do
with {:ok, m} <- to_integer(m),
{:ok, n} <- to_integer(n),
{:ok, d} <- to_integer(d) do
cond do
m < 1 or m > 12 ->
{:error, :invalid_month}
n < 1 or n > 5 ->
{:error, :invalid_week_of_month}
d < 0 or d > 6 ->
{:error, :invalid_week_day}
:else ->
{:ok, {:mwd, {m, n, d}}}
end
else
:error ->
{:error, :invalid_number}
end
end
defp parse_julian_day(str, opts) do
with {:ok, day, rest} <- parse_integer_unsigned(str) do
allow_leaps? = Keyword.get(opts, :allow_leap_days, true)
cond do
# Day of year including Feb 29
allow_leaps? and day >= 0 and day <= 365 ->
{:ok, {:julian_leap, day}, rest}
allow_leaps? ->
{:error, {:invalid_julian_day, day}, str}
# Day of year without Feb 29, i.e. day 59 is Feb 28, and day 60 is Mar 1
day >= 1 and day <= 365 ->
{:ok, {:julian, day}, rest}
:else ->
{:error, {:invalid_julian_day, day}, str}
end
end
end
defp parse_abbrev(<<?<, rest::binary>>), do: parse_quoted_abbrev(rest)
defp parse_abbrev(str), do: parse_unquoted_abbrev(str)
defp parse_quoted_abbrev(str, acc \\ "")
defp parse_quoted_abbrev(<<?>, rest::binary>>, acc) when byte_size(acc) < 3,
do: {:error, {:invalid_quoted_abbreviation, acc}, rest}
defp parse_quoted_abbrev(<<?>, rest::binary>>, acc),
do: {:ok, acc, rest}
defp parse_quoted_abbrev(<<c::char(), rest::binary>>, acc),
do: parse_quoted_abbrev(rest, acc <> <<c::char()>>)
defp parse_quoted_abbrev(<<>>, acc),
do: {:error, :unclosed_quoted_abbreviation, acc}
defp parse_unquoted_abbrev(str, acc \\ "")
defp parse_unquoted_abbrev(<<c::char(), rest::binary>>, acc) when is_alphabetic(c),
do: parse_unquoted_abbrev(rest, acc <> <<c::char()>>)
defp parse_unquoted_abbrev(rest, acc) when byte_size(acc) < 3,
do: {:error, {:invalid_unquoted_abbreviation, acc}, rest}
defp parse_unquoted_abbrev(rest, acc),
do: {:ok, acc, rest}
defp parse_offset(<<sign::char(), rest::binary>>) when sign in [?+, ?-],
do: parse_offset(rest, sign)
defp parse_offset(str) when is_binary(str),
do: parse_offset(str, ?+)
defp parse_offset(str, sign) do
sign = if sign == ?+, do: -1, else: 1
with {:ok, time, rest} <- parse_time(str),
{seconds, _} <- Timex.Time.to_seconds_after_midnight(time) do
{:ok, sign * seconds, rest}
end
end
defp parse_time(str) do
case parse_integer_unsigned(str) do
{:ok, hh, <<?:, rest::binary>>} when hh >= 0 and hh <= 24 ->
case parse_integer_unsigned(rest) do
{:ok, mm, <<?:, rest::binary>>} when mm >= 0 and mm < 60 ->
case parse_integer_unsigned(rest) do
{:ok, ss, rest} when ss >= 0 and ss < 60 ->
{:ok, Timex.Time.new!(hh, mm, ss), rest}
_ ->
{:ok, Timex.Time.new!(hh, mm, 0), rest}
end
{:ok, mm, rest} when mm >= 0 and mm < 60 ->
{:ok, Timex.Time.new!(hh, mm, 0), rest}
_ ->
{:ok, Timex.Time.new!(hh, 0, 0), rest}
end
{:ok, hh, rest} when hh >= 0 and hh <= 24 ->
{:ok, Timex.Time.new!(hh, 0, 0), rest}
{:ok, _, _} ->
{:error, :invalid_hour, str}
{:error, :invalid_number, str} ->
{:error, nil, str}
end
end
defp to_integer(n) when is_integer(n) and n >= 0, do: {:ok, n}
defp to_integer(s) when is_binary(s) do
with {:ok, n, _} <- parse_integer_signed(s) do
{:ok, n}
end
end
defp parse_integer_signed(str) do
case Integer.parse(str) do
{value, rest} ->
{:ok, value, rest}
_ ->
{:error, :invalid_number, str}
end
end
defp parse_integer_unsigned(str, acc \\ "")
defp parse_integer_unsigned(<<c::char(), rest::binary>>, acc) when is_digit(c) do
parse_integer_unsigned(rest, acc <> <<c::char()>>)
end
defp parse_integer_unsigned(rest, acc) when byte_size(acc) > 0 do
{:ok, String.to_integer(acc), rest}
end
defp parse_integer_unsigned(rest, _) do
{:error, :invalid_number, rest}
end
defp parse_newline_terminated_str(bin), do: parse_newline_terminated_str(bin, <<>>)
defp parse_newline_terminated_str(<<>>, acc), do: {:ok, acc, ""}
defp parse_newline_terminated_str(<<?\n, rest::binary>>, acc), do: {:ok, acc, rest}
defp parse_newline_terminated_str(<<c::char(), rest::binary>>, acc) do
parse_newline_terminated_str(rest, acc <> <<c::char()>>)
end
end