lib/expo/parser/mo.ex

defmodule Expo.Parser.Mo do
  @moduledoc """
  `.mo` file parser
  """

  @behaviour Expo.Parser

  alias Expo.Parser.Util
  alias Expo.Translation
  alias Expo.Translations

  @doc """
  Parse `.mo` file

  ### Examples

      iex> Expo.Parser.Mo.parse(<<0xDE120495::size(4)-unit(8),
      ...>   0::little-unsigned-integer-size(2)-unit(8),
      ...>   0::little-unsigned-integer-size(2)-unit(8),
      ...>   0::little-unsigned-integer-size(4)-unit(8),
      ...>   28::little-unsigned-integer-size(4)-unit(8),
      ...>   28::little-unsigned-integer-size(4)-unit(8),
      ...>   28::little-unsigned-integer-size(4)-unit(8),
      ...>   0::little-unsigned-integer-size(4)-unit(8)>>)
      {:ok, %Expo.Translations{headers: [], translations: []}}

  """
  @impl Expo.Parser
  def parse(content)

  def parse(content) when byte_size(content) >= 28 do
    with {:ok, {endianness, header}} <- parse_header(binary_part(content, 0, 28)),
         :ok <-
           check_version(header.file_format_revision_major, header.file_format_revision_minor),
         translations <- parse_translations(endianness, header, content),
         {headers, top_comments, translations} <- Util.extract_meta_headers(translations) do
      {:ok,
       %Translations{translations: translations, headers: headers, top_comments: top_comments}}
    end
  end

  def parse(_content), do: {:error, :invalid_file}

  defp parse_header(header_binary)

  defp parse_header(
         <<0xDE120495::size(4)-unit(8),
           file_format_revision_major::little-unsigned-integer-size(2)-unit(8),
           file_format_revision_minor::little-unsigned-integer-size(2)-unit(8),
           number_of_strings::little-unsigned-integer-size(4)-unit(8),
           offset_of_table_with_original_strings::little-unsigned-integer-size(4)-unit(8),
           offset_of_table_with_translation_strings::little-unsigned-integer-size(4)-unit(8),
           _size_of_hashing_table::little-unsigned-integer-size(4)-unit(8),
           _offset_of_hashing_table::little-unsigned-integer-size(4)-unit(8)>>
       ),
       do:
         {:ok,
          {:little,
           %{
             file_format_revision_major: file_format_revision_major,
             file_format_revision_minor: file_format_revision_minor,
             number_of_strings: number_of_strings,
             offset_of_table_with_original_strings: offset_of_table_with_original_strings,
             offset_of_table_with_translation_strings: offset_of_table_with_translation_strings
           }}}

  defp parse_header(
         <<0x950412DE::size(4)-unit(8),
           file_format_revision_major::big-unsigned-integer-size(2)-unit(8),
           file_format_revision_minor::big-unsigned-integer-size(2)-unit(8),
           number_of_strings::big-unsigned-integer-size(4)-unit(8),
           offset_of_table_with_original_strings::big-unsigned-integer-size(4)-unit(8),
           offset_of_table_with_translation_strings::big-unsigned-integer-size(4)-unit(8),
           _size_of_hashing_table::big-unsigned-integer-size(4)-unit(8),
           _offset_of_hashing_table::big-unsigned-integer-size(4)-unit(8)>>
       ),
       do:
         {:ok,
          {:big,
           %{
             file_format_revision_major: file_format_revision_major,
             file_format_revision_minor: file_format_revision_minor,
             number_of_strings: number_of_strings,
             offset_of_table_with_original_strings: offset_of_table_with_original_strings,
             offset_of_table_with_translation_strings: offset_of_table_with_translation_strings
           }}}

  defp parse_header(_header_binary), do: {:error, :invalid_header}

  defp check_version(major, minor)
  # Not checking minor since they must be BC compatible
  defp check_version(0, _minor), do: :ok
  defp check_version(major, minor), do: {:error, {:unsupported_version, major, minor}}

  defp parse_translations(endianness, header, content) do
    [
      header.offset_of_table_with_original_strings,
      header.offset_of_table_with_translation_strings
    ]
    |> Enum.map(&read_table(endianness, content, &1, header.number_of_strings))
    |> zip_with(&to_translation/1)
  end

  defp read_table(endianness, content, start_offset, number_of_elements),
    do:
      endianness
      |> read_table_headers(binary_part(content, start_offset, number_of_elements * 2 * 4), [])
      |> Enum.map(&read_table_cell(content, &1))

  defp read_table_headers(endianness, table_header, acc)

  defp read_table_headers(
         :big,
         <<cell_length::big-unsigned-integer-size(4)-unit(8),
           cell_offset::big-unsigned-integer-size(4)-unit(8), rest::binary>>,
         acc
       ),
       do: read_table_headers(:big, rest, [{cell_offset, cell_length} | acc])

  defp read_table_headers(
         :little,
         <<cell_length::little-unsigned-integer-size(4)-unit(8),
           cell_offset::little-unsigned-integer-size(4)-unit(8), rest::binary>>,
         acc
       ),
       do: read_table_headers(:little, rest, [{cell_offset, cell_length} | acc])

  defp read_table_headers(_endianness, <<>>, acc), do: Enum.reverse(acc)

  defp read_table_cell(content, position)
  defp read_table_cell(content, {offset, length}), do: binary_part(content, offset, length)

  defp to_translation([msgid, msgstr]) do
    {attrs, translation_type} = msg_id_to_translation_attrs(msgid)

    attrs =
      case translation_type do
        Translation.Singular ->
          Map.merge(attrs, %{msgstr: [msgstr]})

        Translation.Plural ->
          msgstr =
            for {msgstr, index} <- Enum.with_index(String.split(msgstr, <<0>>)),
                into: %{},
                do: {index, [msgstr]}

          Map.merge(attrs, %{msgstr: msgstr})
      end

    struct!(translation_type, attrs)
  end

  defp msg_id_to_translation_attrs(msgid) do
    {attrs, msgid} =
      case String.split(msgid, <<4::utf8>>, parts: 2) do
        [msgid] -> {%{}, msgid}
        [msgctxt, msgid] -> {%{msgctxt: msgctxt}, msgid}
      end

    case String.split(msgid, <<0>>, parts: 2) do
      [msgid] ->
        {Map.merge(attrs, %{msgid: [msgid]}), Translation.Singular}

      [msgid, msgid_plural] ->
        {Map.merge(attrs, %{msgid: [msgid], msgid_plural: [msgid_plural]}), Translation.Plural}
    end
  end

  # TODO: Remove when requiring at least Elixir 1.12
  if function_exported?(Enum, :zip_with, 2) do
    defp zip_with(lists, mapper), do: Enum.zip_with(lists, mapper)
  else
    defp zip_with(lists, mapper),
      do: lists |> Enum.zip() |> Enum.map(fn {left, right} -> mapper.([left, right]) end)
  end
end