src/locus_mmdb_metadata.erl

%% Copyright (c) 2017-2022 Guilherme Andrade
%%
%% Permission is hereby granted, free of charge, to any person obtaining a
%% copy  of this software and associated documentation files (the "Software"),
%% to deal in the Software without restriction, including without limitation
%% the rights to use, copy, modify, merge, publish, distribute, sublicense,
%% and/or sell copies of the Software, and to permit persons to whom the
%% Software is furnished to do so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be included in
%% all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
%% DEALINGS IN THE SOFTWARE.
%%
%% locus is an independent project and has not been authorized, sponsored,
%% or otherwise approved by MaxMind.

%% @reference <a target="_parent" href="https://maxmind.github.io/MaxMind-DB/">
%% MaxMind DB File Format Specification</a>

%% @doc API for working with MMDB - metadata
-module(locus_mmdb_metadata).

%% ------------------------------------------------------------------
%% API Function Exports
%% ------------------------------------------------------------------

-export([parse_and_validate/1]).

%% ------------------------------------------------------------------
%% Macro Definitions
%% ------------------------------------------------------------------

-define(METADATA_MARKER, "\xab\xcd\xefMaxMind.com").

%% ------------------------------------------------------------------
%% Type Definitions
%% ------------------------------------------------------------------

-type t()
    :: #{ binary_format_version := binary_format_version(),
          node_count := locus_mmdb_data:uint32(),
          record_size := locus_mmdb_data:uint16(),
          ip_version := 4 | 6,
          database_type := locus_mmdb_data:utf8_string(),
          languages => [locus_mmdb_data:utf8_string()],
          build_epoch := locus_mmdb_data:uint64(), % UNIX timestamp
          description => #{locus_mmdb_data:utf8_string() => locus_mmdb_data:utf8_string()},
          {'$_unrecognized_properties'} => #{locus_mmdb_data:utf8_string()
                                             := locus_mmdb_data_raw:value()}
        }.
-export_type([t/0]).

-type binary_format_version() :: {2, locus_mmdb_data:uint16()}. % {Major, Minor}
-export_type([binary_format_version/0]).

-type parse_or_validation_error()
    :: {marker_not_found, bitstring()}
    |  {atom(), term(), list()} % Stacktrace
    |  {not_a_map, term()}
    |  {incompatible_binary_format_version, {locus_mmdb_data_raw:uint16(),
                                             locus_mmdb_data_raw:uint16()}}
    |  {invalid_binary_format_minor_version, term()}
    |  {invalid_binary_format_major_version, term()}
    |  {missing_metadata_keys, [unicode:unicode_binary(), ...]}
    |  {invalid_node_count, locus_mmdb_data_raw:value()}
    |  {missing_node_count, locus_mmdb_data_raw:map_()}
    |  {invalid_record_size, locus_mmdb_data_raw:value()}
    |  {missing_record_size, locus_mmdb_data_raw:map_()}
    |  {invalid_ip_version, locus_mmdb_data_raw:value()}
    |  {missing_ip_version, locus_mmdb_data_raw:map_()}
    |  {invalid_database_type, locus_mmdb_data_raw:value()}
    |  {missing_database_type, locus_mmdb_data_raw:map_()}
    |  {languages_not_an_array, locus_mmdb_data_raw:value()}
    |  {bad_languages, {language_number, pos_integer(), not_an_utf8_string,
                        locus_mmdb_data_raw:value()}}
    |  {description_not_a_map, locus_mmdb_data_raw:value()}
    |  {bad_description, {for_language_code, unicode:unicode_binary(),
                          {not_an_utf8_string, locus_mmdb_data_raw:value()}}}.

-export_type([parse_or_validation_error/0]).

%% ------------------------------------------------------------------
%% API Function Definitions
%% ------------------------------------------------------------------

%% @doc Parses and validates `Metadata' out of `EncodedDatabase'
%%
%% To proceed further with `TreeAndDataSection',
%% see {@link locus_mmdb:unpack_tree_data_and_data_section/2}.
%%
-spec parse_and_validate(EncodedDatabase) -> {ok, Metadata, TreeAndDataSection}
                                             | {error, Reason}
    when EncodedDatabase :: binary(),
         Metadata :: t(),
         TreeAndDataSection :: binary(),
         Reason :: parse_or_validation_error().
parse_and_validate(EncodedDatabase) ->
    case binary:matches(EncodedDatabase, <<?METADATA_MARKER>>) of
        [_|_] = PossibleMetadataMarkers ->
            {MetadataStart, _} = lists:last(PossibleMetadataMarkers),
            <<TreeAndDataSection:MetadataStart/bytes,
              ?METADATA_MARKER,
              EncodedMetadata/bytes>> = EncodedDatabase,

            parse_and_validate(EncodedMetadata, TreeAndDataSection);

        [] ->
            {error, {marker_not_found, <<?METADATA_MARKER>>}}
    end.

%% ------------------------------------------------------------------
%% Internal Function Definitions
%% ------------------------------------------------------------------

parse_and_validate(EncodedMetadata, TreeAndDataSection) ->
    try locus_mmdb_data_codec:parse_on_index(0, EncodedMetadata, _Raw = true) of
        {RawMetadata, _RemainingData} ->
            validate(RawMetadata, TreeAndDataSection)
    catch
        Class:Reason:Stacktrace ->
            {error, {Class, Reason, Stacktrace}}
    end.

validate({map, MetadataMap}, TreeAndDataSection) ->
    case validate_version(MetadataMap) of
        {ok, Metadata} ->
            {ok, Metadata, TreeAndDataSection};
        {error, _} = Error ->
            Error
    end;
validate(NotAMap, _OtherSections) ->
    {error, {not_a_map, NotAMap}}.

validate_version(#{<<"binary_format_major_version">> := MajorVersion,
                   <<"binary_format_minor_version">> := MinorVersion
                  } = MetadataMap)
->
    case {MajorVersion, MinorVersion} of
        {{uint16, 2}, {uint16, MinorVersionValue}} when MinorVersionValue >= 0 ->
            RequiredKeys = [node_count, record_size, ip_version,
                            database_type, languages, build_epoch, description],
            Acc0 = [{binary_format_version, {2, MinorVersionValue}}],
            RemainingMetadataMap = maps:without([<<"binary_format_major_version">>,
                                                 <<"binary_format_minor_version">>],
                                                MetadataMap),
            validate_recur(RequiredKeys, RemainingMetadataMap, Acc0);
        {{uint16, MajorVersionValue}, {uint16, MinorVersionValue}} ->
            {error, {incompatible_binary_format_version, {MajorVersionValue, MinorVersionValue}}};
        {{uint16, _}, _} ->
            {error, {invalid_binary_format_minor_version, MinorVersion}};
        {_, _} ->
            {error, {invalid_binary_format_major_version, MajorVersion}}
    end;
validate_version(MetadataMap) ->
    RequiredKeys = [<<"binary_format_major_version">>, <<"binary_format_minor_version">>],
    MissingKeys = RequiredKeys -- maps:keys(MetadataMap),
    {error, {missing_metadata_keys, MissingKeys}}.

validate_recur([node_count | Next], MetadataMap, Acc) ->
    case maps:take(<<"node_count">>, MetadataMap) of
        {{uint32, NodeCountValue}, RemainingMetadataMap} ->
            UpdatedAcc = [{node_count, NodeCountValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {InvalidNodeCount, _} ->
            {error, {invalid_node_count, InvalidNodeCount}};
        error ->
            {error, {missing_node_count, MetadataMap}}
    end;
validate_recur([record_size | Next], MetadataMap, Acc) ->
    case maps:take(<<"record_size">>, MetadataMap) of
        {{uint16, RecordSizeValue}, RemainingMetadataMap} ->
            UpdatedAcc = [{record_size, RecordSizeValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {InvalidRecordSize, _} ->
            {error, {invalid_record_size, InvalidRecordSize}};
        error ->
            {error, {missing_record_size, MetadataMap}}
    end;
validate_recur([ip_version | Next], MetadataMap, Acc) ->
    case maps:take(<<"ip_version">>, MetadataMap) of
        {{uint16, IpVersionValue}, RemainingMetadataMap}
          when IpVersionValue =:= 6; IpVersionValue =:= 4 ->
            UpdatedAcc = [{ip_version, IpVersionValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {InvalidIpVersion, _} ->
            {error, {invalid_ip_version, InvalidIpVersion}};
        error ->
            {error, {missing_ip_version, MetadataMap}}
    end;
validate_recur([database_type | Next], MetadataMap, Acc) ->
    case maps:take(<<"database_type">>, MetadataMap) of
        {{utf8_string, DatabaseTypeValue}, RemainingMetadataMap} ->
            UpdatedAcc = [{database_type, DatabaseTypeValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {InvalidDatabaseType, _} ->
            {error, {invalid_database_type, InvalidDatabaseType}};
        error ->
            {error, {missing_database_type, MetadataMap}}
    end;
validate_recur([languages | Next], MetadataMap, Acc) ->
    case validate_languages(MetadataMap) of
        {ok, LanguageValues, RemainingMetadataMap} ->
            UpdatedAcc = [{languages, LanguageValues} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {error, _} = Error ->
            Error;
        skip ->
            validate_recur(Next, MetadataMap, Acc)
    end;
validate_recur([build_epoch | Next], MetadataMap, Acc) ->
    case maps:take(<<"build_epoch">>, MetadataMap) of
        {{uint64, BuildEpochValue}, RemainingMetadataMap} ->
            UpdatedAcc = [{build_epoch, BuildEpochValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {InvalidBuildEpoch, _} ->
            {error, {invalid_build_epoch, InvalidBuildEpoch}};
        error ->
            {error, {missing_build_epoch, MetadataMap}}
    end;
validate_recur([description | Next], MetadataMap, Acc) ->
    case validate_description(MetadataMap) of
        {ok, DescriptionValue, RemainingMetadataMap} ->
            UpdatedAcc = [{description, DescriptionValue} | Acc],
            validate_recur(Next, RemainingMetadataMap, UpdatedAcc);
        {error, _} = Error ->
            Error;
        skip ->
            validate_recur(Next, MetadataMap, Acc)
    end;
validate_recur([], MetadataMap, Acc) ->
    case map_size(MetadataMap) =:= 0 of
        true ->
            Metadata = maps:from_list(Acc),
            {ok, Metadata};
        false ->
            UpdatedAcc = [{{'$_unrecognized_properties'}, MetadataMap}],
            Metadata = maps:from_list(UpdatedAcc),
            {ok, Metadata}
    end.

validate_languages(MetadataMap) ->
    case maps:take(<<"languages">>, MetadataMap) of
        {{array, Elements}, RemainingMetadataMap} ->
            validate_languages_recur(Elements, RemainingMetadataMap, _Acc0 = []);
        {NotAnArray, _} ->
            {error, {languages_not_an_array, NotAnArray}};
        error ->
            % `languages' is optional
            skip
    end.

validate_languages_recur([{utf8_string, LanguageValue} | Next],
                         RemainingMetadataMap, Acc) ->
    UpdatedAcc = [LanguageValue | Acc],
    validate_languages_recur(Next, RemainingMetadataMap, UpdatedAcc);
validate_languages_recur([BadLanguage | _Next],
                         _RemainingMetadataMap, Acc) ->
    Position = length(Acc) + 1,
    {error, {bad_languages, {language_number, Position, not_an_utf8_string, BadLanguage}}};
validate_languages_recur([], RemainingMetadataMap, Acc) ->
    LanguageValues = lists:reverse(Acc),
    {ok, LanguageValues, RemainingMetadataMap}.

validate_description(MetadataMap) ->
    case maps:take(<<"description">>, MetadataMap) of
        {{map, Map}, RemainingMetadataMap} ->
            KvList = maps:to_list(Map),
            validate_description_recur(KvList, RemainingMetadataMap, _Acc0 = []);
        {NotAMap, _} ->
            {error, {description_not_a_map, NotAMap}};
        error ->
            % `description' is optional
            skip
    end.

validate_description_recur([{<<LanguageCode/bytes>>, {utf8_string, LocalizedDescription}}
                            | Next],
                           RemainingMetadataMap, Acc) ->
    UpdatedAcc = [{LanguageCode, LocalizedDescription} | Acc],
    validate_description_recur(Next, RemainingMetadataMap, UpdatedAcc);
validate_description_recur([{<<LanguageCode/bytes>>, BadLocalizedDescription}
                            | _Next],
                           _RemainingMetadataMap, _Acc) ->
    {error, {bad_description, {for_language_code, LanguageCode,
                               {not_an_utf8_string, BadLocalizedDescription}}}};
validate_description_recur([], RemainingMetadataMap, Acc) ->
    Description = maps:from_list(Acc),
    {ok, Description, RemainingMetadataMap}.