%% Copyright (c) 2017-2022 Guilherme Andrade
%%
%% Permission is hereby granted, free of charge, to any person obtaining a
%% copy of this software and associated documentation files (the "Software"),
%% to deal in the Software without restriction, including without limitation
%% the rights to use, copy, modify, merge, publish, distribute, sublicense,
%% and/or sell copies of the Software, and to permit persons to whom the
%% Software is furnished to do so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be included in
%% all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
%% DEALINGS IN THE SOFTWARE.
%%
%% locus is an independent project and has not been authorized, sponsored,
%% or otherwise approved by MaxMind.
%% @reference <a target="_parent" href="https://maxmind.github.io/MaxMind-DB/">
%% MaxMind DB File Format Specification</a>
%% @doc API for working with MMDB - data codec
-module(locus_mmdb_data_codec).
-hank([{unnecessary_function_arguments, [{just_the_value, 2, 1}]}]).
%% ------------------------------------------------------------------
%% API Function Exports
%% ------------------------------------------------------------------
-export([parse_on_index/3]).
%% ------------------------------------------------------------------
%% "Private" API Function Exports
%% ------------------------------------------------------------------
-export([validate_indices_in_tree/7]).
%% ------------------------------------------------------------------
%% Debug API Function Exports
%% ------------------------------------------------------------------
-export([parse_all/2]).
%% ------------------------------------------------------------------
%% Macro Definitions
%% ------------------------------------------------------------------
-define(pointer, 1).
-define(utf8_string, 2).
-define(double, 3).
-define(bytes, 4).
-define(uint16, 5).
-define(uint32, 6).
-define(map, 7).
-define(extended_int32, 1).
-define(extended_uint64, 2).
-define(extended_uint128, 3).
-define(extended_array, 4).
-define(extended_data_cache_container, 5). % Unimplemented
-define(extended_end_marker, 6).
-define(extended_boolean, 7).
-define(extended_float, 8).
%% ------------------------------------------------------------------
%% API Record and Type Definitions
%% ------------------------------------------------------------------
-type index() :: non_neg_integer().
-export_type([index/0]).
%% ------------------------------------------------------------------
%% Internal Record and Type Definitions
%% ------------------------------------------------------------------
-record(parse_opts, {
wrapping_fun :: fun ((atom(), locus_mmdb_data:value())
-> locus_mmdb_data_raw:value()
| locus_mmdb_data:value())
}).
-record(validation_aux, {
indices_in_tree :: locus_shared_bitarray:t(),
visited :: locus_shared_bitarray:t(),
valid_map_keys :: locus_shared_bitarray:t(),
batch_size :: pos_integer(),
data :: binary(),
journal :: locus_mmdb_check_journal:t()
}).
%% ------------------------------------------------------------------
%% API Function Definitions
%% ------------------------------------------------------------------
%% @doc Attempts to parse either the `Value' or `RawValue'
%% (depending on the `Raw' flag) at `Index' in `DataSection'.
%%
%% Will crash upon invalid/unrecognized data, invalid pointers
%% or cyclic pointer chasing (i.e. loops.)
%%
-spec parse_on_index(Index, DataSection, Raw)
-> {Value | RawValue, RemainingData}
when Index :: index(),
DataSection :: binary(),
Raw :: boolean(),
Value :: locus_mmdb_data:value(),
RawValue :: locus_mmdb_data_raw:value(),
RemainingData :: binary().
parse_on_index(Index, FullData, Raw) ->
WrappingFun = parser_wrapping_fun(Raw),
Opts = #parse_opts{wrapping_fun = WrappingFun},
parse_on_index(Index, FullData, Opts, []).
%% ------------------------------------------------------------------
%% "Private" API Function Definitions
%% ------------------------------------------------------------------
-spec validate_indices_in_tree(locus_shared_bitarray:t(),
locus_shared_bitarray:t(),
locus_shared_bitarray:t(),
pos_integer(), non_neg_integer(),
binary(),
locus_mmdb_check_journal:t()) -> ok.
%% @private
validate_indices_in_tree(BitArray, VisitedBitArray, MapKeysBitArray,
BatchSize, BatchOffset, Data, Journal) ->
Aux = #validation_aux{indices_in_tree = BitArray,
visited = VisitedBitArray,
valid_map_keys = MapKeysBitArray,
batch_size = BatchSize,
data = Data,
journal = Journal},
validate_positions_in_tree_recur(Aux, _CurrentOffset = BatchOffset).
%% ------------------------------------------------------------------
%% Debug API Function Definitions
%% ------------------------------------------------------------------
-spec parse_all(binary(), boolean()) -> [locus_mmdb_data:value()
| locus_mmdb_data_raw:value()].
%% @private
parse_all(FullData, Raw) ->
WrappingFun = parser_wrapping_fun(Raw),
Opts = #parse_opts{wrapping_fun = WrappingFun},
parse_all_recur(_Chunk = FullData, FullData, Opts, _Acc = []).
%% ------------------------------------------------------------------
%% Internal Function Definitions - Parsing
%% ------------------------------------------------------------------
parse_on_index(Index, FullData, Opts, Path) ->
UpdatedPath = [Index | Path],
case lists:member(Index, Path) orelse FullData of
true ->
error({circular_path, UpdatedPath});
<<_:Index/bytes, Chunk/bytes>> ->
parse_chunk(Chunk, FullData, Opts, UpdatedPath);
<<_InsufficientData/bytes>> ->
error({invalid_index, Index, #{under_path => Path,
full_data_size => byte_size(FullData)}})
end.
parse_chunk(Chunk, FullData, Opts, Path) ->
case parse_chunk_head(Chunk) of
{pointer, Pointer, RemainingData} ->
{MaybeWrapped, _} = parse_on_index(Pointer, FullData, Opts, Path),
{MaybeWrapped, RemainingData};
{utf8_string, Bytes, RemainingData} ->
Text = decode_utf8_string(Bytes, Path),
MaybeWrapped = maybe_wrap(utf8_string, Text, Opts),
{MaybeWrapped, RemainingData};
{bytes, Bytes, RemainingData} ->
Blob = binary:copy(Bytes),
MaybeWrapped = maybe_wrap(bytes, Blob, Opts),
{MaybeWrapped, RemainingData};
{map, Count, RemainingData} ->
parse_map(Count, RemainingData, FullData, Opts, Path);
{array, Count, RemainingData} ->
parse_array(Count, RemainingData, FullData, Opts, Path);
{Type, Value, RemainingData} ->
MaybeWrapped = maybe_wrap(Type, Value, Opts),
{MaybeWrapped, RemainingData};
{error, Reason} ->
error({failed_to_parse_chunk, #{why => Reason,
under_path => Path}})
end.
decode_utf8_string(Bytes, Path) ->
Copy = binary:copy(Bytes),
case unicode:characters_to_binary(Copy) of
<<_/bytes>> ->
Copy;
_ ->
error({not_utf8_text, #{bytes => Copy,
under_path => Path}})
end.
parse_map(Count, Chunk, FullData, Opts, Path) ->
parse_map_recur(Count, Chunk, FullData, Opts, Path, []).
parse_map_recur(Count, Chunk, FullData, Opts, Path, KvAcc)
when Count > 0 ->
{Key, RemainingData} = parse_map_key(Chunk, FullData, Path),
{MaybeWrappedValue, RemainingData2} = parse_chunk(RemainingData, FullData, Opts, Path),
UpdatedKvAcc = [{Key, MaybeWrappedValue} | KvAcc],
parse_map_recur(Count - 1, RemainingData2, FullData, Opts, Path, UpdatedKvAcc);
parse_map_recur(0, RemainingData, _, Opts, _, KvAcc) ->
Map = maps:from_list(KvAcc),
MaybeWrapped = maybe_wrap(map, Map, Opts),
{MaybeWrapped, RemainingData}.
parse_map_key(Chunk, FullData, Path) ->
case parse_chunk_head(Chunk) of
{pointer, Pointer, RemainingData} ->
{Text, _} = parse_map_key_on_index(Pointer, FullData, Path),
{Text, RemainingData};
{utf8_string, Bytes, RemainingData} ->
Text = decode_utf8_string(Bytes, Path),
{Text, RemainingData};
{KeyType, KeyValue, _} ->
error({unexpected_map_key, #{type => KeyType,
value => KeyValue,
under_path => Path}});
{error, Reason} ->
error({failed_to_parse_map_key, #{why => Reason,
under_path => Path}})
end.
parse_map_key_on_index(Index, FullData, Path) ->
UpdatedPath = [Index | Path],
case lists:member(Index, Path) of
true ->
error({circular_path, UpdatedPath});
_ ->
<<_:Index/bytes, Chunk/bytes>> = FullData,
parse_map_key(Chunk, FullData, UpdatedPath)
end.
parse_array(Count, Chunk, FullData, Opts, Path) ->
parse_array_recur(Count, Chunk, FullData, Opts, Path, []).
parse_array_recur(Count, Chunk, FullData, Opts, Path, Acc)
when Count > 0 ->
{MaybeWrapped, RemainingData} = parse_chunk(Chunk, FullData, Opts, Path),
UpdatedAcc = [MaybeWrapped | Acc],
parse_array_recur(Count - 1, RemainingData, FullData, Opts, Path, UpdatedAcc);
parse_array_recur(0, RemainingData, _, Opts, _, Acc) ->
List = lists:reverse(Acc),
MaybeWrapped = maybe_wrap(array, List, Opts),
{MaybeWrapped, RemainingData}.
-spec parse_chunk_head(binary())
-> {locus_mmdb_data_raw:value_tag(), locus_mmdb_data:value(), binary()}
| {error, term()}.
parse_chunk_head(Data) ->
case Data of
<<?pointer:3, 0:2, Pointer:11, RemainingData/bytes>> ->
{pointer, Pointer, RemainingData};
<<?pointer:3, 1:2, BasePointer:19, RemainingData/bytes>> ->
{pointer, BasePointer + 2048, RemainingData};
<<?pointer:3, 2:2, BasePointer:27, RemainingData/bytes>> ->
{pointer, BasePointer + 526336, RemainingData};
<<?pointer:3, _:5, Pointer:32, RemainingData/bytes>> ->
{pointer, Pointer, RemainingData};
<<?utf8_string:3, Size:5, Bytes:Size/bytes, RemainingData/bytes>>
when Size < 29 ->
{utf8_string, Bytes, RemainingData};
<<?utf8_string:3, 29:5, BaseSize, Tail/bytes>> ->
Size = BaseSize + 29,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{utf8_string, Bytes, RemainingData};
<<?utf8_string:3, 30:5, BaseSize:16, Tail/bytes>> ->
Size = BaseSize + 285,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{utf8_string, Bytes, RemainingData};
<<?utf8_string:3, _:5, BaseSize:24, Tail/bytes>> ->
Size = BaseSize + 65821,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{utf8_string, Bytes, RemainingData};
<<?double:3, 8:5, Value:64/float, RemainingData/bytes>> ->
{double, Value, RemainingData};
<<?double:3, 8:5, Signal:1, Exponent:11, Mantissa:52, RemainingData/bytes>>
when Signal =:= 0, Exponent =:= ((1 bsl 11) - 1), Mantissa =:= 0 ->
{double, '#Inf', RemainingData};
<<?double:3, 8:5, Signal:1, Exponent:11, Mantissa:52, RemainingData/bytes>>
when Signal =:= 1, Exponent =:= ((1 bsl 11) - 1), Mantissa =:= 0 ->
{double, '#-Inf', RemainingData};
<<?bytes:3, Size:5, Bytes:Size/bytes, RemainingData/bytes>>
when Size < 29 ->
{bytes, Bytes, RemainingData};
<<?bytes:3, 29:5, BaseSize, Tail/bytes>> ->
Size = BaseSize + 29,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{bytes, Bytes, RemainingData};
<<?bytes:3, 30:5, BaseSize:16, Tail/bytes>> ->
Size = BaseSize + 285,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{bytes, Bytes, RemainingData};
<<?bytes:3, _:5, BaseSize:24, Tail/bytes>> ->
Size = BaseSize + 65821,
<<Bytes:Size/bytes, RemainingData/bytes>> = Tail,
{bytes, Bytes, RemainingData};
<<?uint16:3, Size:5, Value:Size/integer-unit:8, RemainingData/bytes>>
when Size =< 2 ->
{uint16, Value, RemainingData};
<<?uint32:3, Size:5, Value:Size/integer-unit:8, RemainingData/bytes>>
when Size =< 4 ->
{uint32, Value, RemainingData};
<<?map:3, Count:5, RemainingData/bytes>>
when Count < 29 ->
{map, Count, RemainingData};
<<?map:3, 29:5, BaseCount, RemainingData/bytes>> ->
Count = BaseCount + 29,
{map, Count, RemainingData};
<<?map:3, 30:5, BaseCount:16, RemainingData/bytes>> ->
Count = BaseCount + 285,
{map, Count, RemainingData};
<<?map:3, _:5, BaseCount:24, RemainingData/bytes>> ->
Count = BaseCount + 65821,
{map, Count, RemainingData};
<<0:3, Size:5, ?extended_int32, Value:Size/signed-integer-unit:8, RemainingData/bytes>>
when Size =:= 4 ->
{int32, Value, RemainingData};
<<0:3, Size:5, ?extended_int32, Value:Size/integer-unit:8, RemainingData/bytes>>
when Size < 4 ->
% As per the spec:
% "When storing a signed integer, fields shorter than the maximum byte length
% are always positive. When the field is the maximum length, e.g., 4 bytes
% for 32-bit integers, the left-most bit is the sign.
% A 1 is negative and a 0 is positive."
%
{int32, Value, RemainingData};
<<0:3, Size:5, ?extended_uint64, Value:Size/integer-unit:8, RemainingData/bytes>>
when Size =< 8 ->
{uint64, Value, RemainingData};
<<0:3, Size:5, ?extended_uint128, Value:Size/integer-unit:8, RemainingData/bytes>>
when Size =< 16 ->
{uint128, Value, RemainingData};
<<0:3, Count:5, ?extended_array, RemainingData/bytes>> when Count < 29 ->
{array, Count, RemainingData};
<<0:3, 29:5, ?extended_array, BaseCount, RemainingData/bytes>> ->
Count = BaseCount + 29,
{array, Count, RemainingData};
<<0:3, 30:5, ?extended_array, BaseCount:16, RemainingData/bytes>> ->
Count = BaseCount + 285,
{array, Count, RemainingData};
<<0:3, _:5, ?extended_array, BaseCount:24, RemainingData/bytes>> ->
Count = BaseCount + 65821,
{array, Count, RemainingData};
<<0:3, _:5, ?extended_data_cache_container, _RemainingData/bytes>> ->
{error, '`data cache container` type not yet supported'};
<<0:3, 0:5, ?extended_end_marker>> ->
{error, {finished, extended_end_marker}};
<<0:3, 0:5, ?extended_boolean, RemainingData/bytes>> ->
{boolean, false, RemainingData};
<<0:3, 1:5, ?extended_boolean, RemainingData/bytes>> ->
{boolean, true, RemainingData};
<<0:3, 4:5, ?extended_float, Value:32/float, RemainingData/bytes>> ->
{float, Value, RemainingData};
<<0:3, 4:5, ?extended_float, Signal:1, Exponent:8, Mantissa:23, RemainingData/bytes>>
when Signal =:= 0, Exponent =:= ((1 bsl 8) - 1), Mantissa =:= 0 ->
{float, '#Inf', RemainingData};
<<0:3, 4:5, ?extended_float, Signal:1, Exponent:8, Mantissa:23, RemainingData/bytes>>
when Signal =:= 1, Exponent =:= ((1 bsl 8) - 1), Mantissa =:= 0 ->
{float, '#-Inf', RemainingData};
<<>> ->
{error, {finished, no_more_data}};
_Invalid ->
fail_to_parse_chunk_head(Data)
end.
fail_to_parse_chunk_head(Data) ->
case Data of
<<?pointer:3, 0:2, InsufficientData/bits>> ->
{error, {insufficient_data, pointer_0, bitstring_copy(InsufficientData)}};
<<?pointer:3, 1:2, InsufficientData/bits>> ->
{error, {insufficient_data, pointer_1, bitstring_copy(InsufficientData)}};
<<?pointer:3, 2:2, InsufficientData/bits>> ->
{error, {insufficient_data, pointer_2, bitstring_copy(InsufficientData)}};
<<?pointer:3, _:5, InsufficientData/bits>> ->
{error, {insufficient_data, pointer_3, bitstring_copy(InsufficientData)}};
<<?utf8_string:3, Size:5, InsufficientData/bytes>>
when Size < 29 ->
{error, {insufficient_data, utf8_string_1, {Size, bytes},
binary:copy(InsufficientData)}};
<<?utf8_string:3, 29:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, utf8_string_2, binary:copy(InsufficientData)}};
<<?utf8_string:3, 30:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, utf8_string_3, binary:copy(InsufficientData)}};
<<?utf8_string:3, 31:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, utf8_string_4, binary:copy(InsufficientData)}};
<<?double:3, 8:5, InvalidDouble:64/bits, _RemainingData/bytes>> ->
{error, {invalid_double, binary:copy(InvalidDouble)}};
<<?double:3, 8:5, InsufficientData/bytes>> ->
{error, {insufficient_data, double, binary:copy(InsufficientData)}};
<<?double:3, SizeTag:5, _RemainingData/bytes>> ->
{error, {invalid_double_size_tag, SizeTag}};
<<?bytes:3, Size:5, InsufficientData/bytes>>
when Size < 29 ->
{error, {insufficient_data, bytes_1, {Size, bytes}, binary:copy(InsufficientData)}};
<<?bytes:3, 29:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, bytes_2, binary:copy(InsufficientData)}};
<<?bytes:3, 30:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, bytes_3, binary:copy(InsufficientData)}};
<<?bytes:3, _:5, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, bytes_4, binary:copy(InsufficientData)}};
<<?uint16:3, Size:5, _RemainingData/bytes>>
when Size > 2 ->
{error, {too_large, uint16, {Size, bytes}}};
<<?uint16:3, Size:5, InsufficientData/bytes>> ->
{error, {insufficient_data, uint16, {Size, bytes}, binary:copy(InsufficientData)}};
<<?uint32:3, Size:5, _RemainingData/bytes>>
when Size > 4 ->
{error, {too_large, uint32, {Size, bytes}}};
<<?uint32:3, Size:5, InsufficientData/bytes>> ->
{error, {insufficient_data, uint32, {Size, bytes}, binary:copy(InsufficientData)}};
<<?map:3, 29:5, InsufficientData/bits>> ->
{error, {insufficient_data_for_size, map_2, bitstring_copy(InsufficientData)}};
<<?map:3, 30:5, InsufficientData/bits>> ->
{error, {insufficient_data_for_size, map_3, bitstring_copy(InsufficientData)}};
<<?map:3, 31:5, InsufficientData/bits>> ->
{error, {insufficient_data_for_size, map_4, bitstring_copy(InsufficientData)}};
<<0:3, Size:5, ?extended_int32, _RemainingData/bytes>>
when Size > 4 ->
{error, {too_large, int32, {Size, bytes}}};
<<0:3, Size:5, ?extended_int32, InsufficientData/bytes>> ->
{error, {insufficient_data, int32, {Size, bytes}, binary:copy(InsufficientData)}};
<<0:3, Size:5, ?extended_uint64, _RemainingData/bytes>>
when Size > 8 ->
{error, {too_large, uint64, {Size, bytes}}};
<<0:3, Size:5, ?extended_uint64, InsufficientData/bytes>> ->
{error, {insufficient_data, uint64, {Size, bytes}, binary:copy(InsufficientData)}};
<<0:3, Size:5, ?extended_uint128, _RemainingData/bytes>>
when Size > 16 ->
{error, {too_large, uint128, {Size, bytes}}};
<<0:3, Size:5, ?extended_uint128, InsufficientData/bytes>> ->
{error, {insufficient_data, uint128, {Size, bytes}, binary:copy(InsufficientData)}};
<<0:3, 29:5, ?extended_array, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, array_2, binary:copy(InsufficientData)}};
<<0:3, 30:5, ?extended_array, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, array_3, binary:copy(InsufficientData)}};
<<0:3, 31:5, ?extended_array, InsufficientData/bytes>> ->
{error, {insufficient_data_for_size, array_4, binary:copy(InsufficientData)}};
<<0:3, 0:5, ?extended_end_marker, ExcessiveData/bytes>> ->
{error, {data_beyond_end_marker,
locus_util:purge_term_of_very_large_binaries(ExcessiveData)}};
<<0:3, 4:5, ?extended_float, InvalidFloat:32/bits, _RemainingData/bytes>> ->
{error, {invalid_float, binary:copy(InvalidFloat)}};
<<0:3, 4:5, ?extended_float, InsufficientData/bytes>> ->
{error, {insufficient_data, float, binary:copy(InsufficientData)}};
<<0:3, SizeTag:5, ?extended_float, _RemainingData/bytes>> ->
{error, {invalid_float_size_tag, SizeTag}};
<<0:3, _SizeTag:5, UnknownExtendedType, _RemainingData/bytes>> ->
{error, {unknown_extended_type, UnknownExtendedType}};
<<0:3, _SizeTag:5, InsufficientData/bytes>> ->
{error, {insufficient_data, binary:copy(InsufficientData)}}
end.
bitstring_copy(Bits) ->
SuffixSize = bit_size(Bits) rem 8,
PrefixSize = bit_size(Bits) - SuffixSize,
<<Prefix:PrefixSize/bits, Suffix:SuffixSize/bits>> = Bits,
CopiedPrefix = binary:copy(Prefix),
<<CopiedPrefix/bytes, Suffix/bits>>.
parser_wrapping_fun(Raw) ->
maps:get(Raw, #{true => fun tagged_value/2,
false => fun just_the_value/2}).
tagged_value(Tag, Value) ->
{Tag, Value}.
just_the_value(_Tag, Value) ->
Value.
maybe_wrap(Tag, Value, Opts) ->
(Opts#parse_opts.wrapping_fun)(Tag, Value).
%% ------------------------------------------------------------------
%% Internal Function Definitions - Validation
%% ------------------------------------------------------------------
validate_positions_in_tree_recur(Aux, CurrentOffset) ->
try locus_shared_bitarray:get_positions_set_at_cell(Aux#validation_aux.indices_in_tree,
CurrentOffset) of
Positions ->
validate_positions_batch_in_tree_recur(Aux, Positions, CurrentOffset)
catch
throw:{index_out_of_bounds, CurrentOffset} ->
% All done
ok
end.
validate_positions_batch_in_tree_recur(Aux, [Position | Next], CurrentOffset) ->
try validate_position(Aux, Position) of
ok ->
validate_positions_batch_in_tree_recur(Aux, Next, CurrentOffset)
catch
throw:controlled_validation_error ->
validate_positions_batch_in_tree_recur(Aux, Next, CurrentOffset)
end;
validate_positions_batch_in_tree_recur(Aux, [], CurrentOffset) ->
NextOffset = CurrentOffset + Aux#validation_aux.batch_size,
validate_positions_in_tree_recur(Aux, NextOffset).
validate_position(Aux, Position) ->
validate_position_if_unvisited(Aux, Position, _Path = []).
validate_position_if_unvisited(Aux, Position, Path) ->
try locus_shared_bitarray:is_set(Aux#validation_aux.visited, Position)
orelse locus_shared_bitarray:is_set(Aux#validation_aux.valid_map_keys, Position)
of
true ->
ok;
false ->
validate_position_if_not_in_loop(Aux, Position, Path)
catch
throw:{position_out_of_bounds, Position} ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, Position, lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_position_if_not_in_loop(Aux, Position, Path) ->
case lists:keymember(Position, 1, Path) of
false ->
validate_position_if_its_a_sound_location(Aux, Position, Path);
true ->
locus_mmdb_check_journal:loop_in_data_section(Aux#validation_aux.journal,
lists:reverse(Path)),
throw(controlled_validation_error)
end.
validate_position_if_its_a_sound_location(Aux, Position, Path) ->
case Aux#validation_aux.data of
<<_:Position/bytes, Chunk/bytes>> ->
validate_chunk_in_position(Aux, Position, Path, Chunk);
<<_/bytes>> ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, Position, lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_chunk_in_position(Aux, Position, Path, Chunk) ->
{ok, _RemainingData} = validate_chunk(Aux, Position, Path, Chunk),
ok.
validate_chunk(Aux, Position, Path, Chunk) ->
ParseResult = parse_chunk_head(Chunk),
validate_parsed_chunk(Aux, Position, Path, ParseResult).
validate_parsed_chunk(Aux, Position, Path, ParseResult) ->
locus_shared_bitarray:set(Aux#validation_aux.visited, Position),
case ParseResult of
{pointer, Pointer, RemainingData} ->
UpdatedPath = [{Position, {pointer, Pointer}} | Path],
validate_pointer(Aux, Pointer, RemainingData, UpdatedPath);
{utf8_string, Bytes, RemainingData} ->
validate_utf8_string(Aux, Position, Bytes, RemainingData, Path);
{map, Count, RemainingData} ->
UpdatedPath = [{Position, {map_with, Count, elements}} | Path],
validate_map(Aux, Count, RemainingData, UpdatedPath);
{array, Count, RemainingData} ->
UpdatedPath = [{Position, {array_with, Count, elements}} | Path],
validate_array(Aux, Count, RemainingData, UpdatedPath);
{_, _, RemainingData} ->
{ok, RemainingData};
{error, Reason} ->
locus_mmdb_check_journal:bad_chunk_in_data_section(
Aux#validation_aux.journal, Position, Reason, lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_pointer(Aux, Pointer, RemainingData, Path) ->
validate_position_if_unvisited(Aux, _Position = Pointer, Path),
{ok, RemainingData}.
validate_utf8_string(Aux, Position, Bytes, DataAfter, Path) ->
case unicode:characters_to_binary(Bytes) of
<<_/bytes>> ->
_ = validate_utf8_string_printability(Aux, Position, Bytes, Path),
{ok, DataAfter};
Error ->
locus_mmdb_check_journal:invalid_utf8_string_in_data_section(
Aux#validation_aux.journal, Position,
_OriginalData = binary:copy(Bytes),
Error, lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_utf8_string_printability(Aux, Position, Bytes, Path) ->
List = unicode:characters_to_list(Bytes),
case io_lib:printable_unicode_list(List) of
true ->
ok;
false ->
locus_mmdb_check_journal:unprintable_utf8_string_in_data_section(
Aux#validation_aux.journal, Position,
_Value = binary:copy(Bytes), lists:reverse(Path)
)
end.
validate_map(Aux, Count, RemainingData, Path) ->
validate_map_recur(Aux, Count, RemainingData, Path).
validate_map_recur(Aux, Count, RemainingData, Path)
when Count > 0 ->
DataAfterPair = validate_map_pair(Aux, RemainingData, Path),
validate_map_recur(Aux, Count - 1, DataAfterPair, Path);
validate_map_recur(_Aux, Count, RemainingData, _Path)
when Count =:= 0 ->
{ok, RemainingData}.
validate_map_pair(Aux, RemainingData, Path) ->
{ok, DataAfterKey} = validate_map_key(Aux, RemainingData, Path),
Position = byte_size(Aux#validation_aux.data) - byte_size(DataAfterKey),
validate_map_value(Aux, Position, DataAfterKey, Path).
validate_map_key(Aux, RemainingData, Path) ->
Position = byte_size(Aux#validation_aux.data) - byte_size(RemainingData),
case parse_chunk_head(RemainingData) of
{pointer, Pointer, DataAfterKey} ->
UpdatedPath = [{Position, {pointer, Pointer}} | Path],
validate_indirect_map_key_if_not_validated_before(Aux, Pointer, DataAfterKey,
UpdatedPath);
{utf8_string, Bytes, DataAfterKey} ->
validate_utf8_string(Aux, Position, Bytes, DataAfterKey, Path);
{KeyType, KeyValue, _} ->
SaferKeyValue = locus_util:purge_term_of_very_large_binaries(KeyValue),
locus_mmdb_check_journal:map_key_of_wrong_type_in_data_section(
Aux#validation_aux.journal, Position, {KeyType, SaferKeyValue},
lists:reverse(Path)
),
throw(controlled_validation_error);
{error, Reason} ->
locus_mmdb_check_journal:bad_chunk_in_data_section(Aux#validation_aux.journal,
Position, Reason,
lists:reverse(Path)),
throw(controlled_validation_error)
end.
validate_indirect_map_key_if_not_validated_before(Aux, Pointer, DataAfterKey, Path) ->
try locus_shared_bitarray:is_set(Aux#validation_aux.valid_map_keys, _Position = Pointer)
orelse validate_indirect_map_key_if_not_in_loop(Aux, Pointer, DataAfterKey, Path)
of
true ->
{ok, DataAfterKey};
{ok, _} = Success ->
locus_shared_bitarray:set(Aux#validation_aux.visited, Pointer),
locus_shared_bitarray:set(Aux#validation_aux.valid_map_keys, Pointer),
Success
catch
throw:{position_out_of_bounds, Position} ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, Position, lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_indirect_map_key_if_not_in_loop(Aux, Pointer, DataAfterKey, Path) ->
case lists:keymember(Pointer, 1, Path) of
false ->
validate_indirect_map_key_if_its_a_sound_location(Aux, Pointer, DataAfterKey, Path);
true ->
locus_mmdb_check_journal:loop_in_data_section(Aux#validation_aux.journal,
lists:reverse(Path)),
throw(controlled_validation_error)
end.
validate_indirect_map_key_if_its_a_sound_location(Aux, Pointer, DataAfterKey, Path) ->
case Aux#validation_aux.data of
<<_:Pointer/bytes, Chunk/bytes>> ->
validate_indirect_map_key_chunk(Aux, Pointer, Chunk, DataAfterKey, Path);
<<_/bytes>> ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, _Position = Pointer,
lists:reverse(Path)
),
throw(controlled_validation_error)
end.
validate_indirect_map_key_chunk(Aux, Pointer, Chunk, DataAfterKey, Path) ->
case parse_chunk_head(Chunk) of
{utf8_string, Bytes, _} ->
Position = byte_size(Aux#validation_aux.data) - byte_size(Chunk),
validate_utf8_string(Aux, Position, Bytes, DataAfterKey, Path);
{pointer, NewPointer, _} ->
UpdatedPath = [{_Position = Pointer, {pointer, NewPointer}} | Path],
validate_indirect_map_key_if_not_validated_before(Aux, NewPointer,
DataAfterKey, UpdatedPath);
{KeyType, KeyValue, _} ->
Position = byte_size(Aux#validation_aux.data) - byte_size(Chunk),
SaferKeyValue = locus_util:purge_term_of_very_large_binaries(KeyValue),
locus_mmdb_check_journal:map_key_of_wrong_type_in_data_section(
Aux#validation_aux.journal, Position, {KeyType, SaferKeyValue},
lists:reverse(Path)
),
throw(controlled_validation_error);
{error, Reason} ->
Position = byte_size(Aux#validation_aux.data) - byte_size(Chunk),
locus_mmdb_check_journal:bad_chunk_in_data_section(Aux#validation_aux.journal,
Position, Reason,
lists:reverse(Path)),
throw(controlled_validation_error)
end.
validate_map_value(Aux, Position, DataAfterKey, Path) ->
case parse_chunk_head(DataAfterKey) of
{pointer, Pointer, DataAfterValue} ->
UpdatedPath = [{Position, {pointer, Pointer}} | Path],
validate_indirect_map_value(Aux, Pointer, DataAfterValue, UpdatedPath);
ParseResult ->
validate_direct_map_value(Aux, Position, ParseResult, Path)
end.
validate_indirect_map_value(Aux, Pointer, DataAfterValue, Path) ->
ok = validate_position_if_unvisited(Aux, _Position = Pointer, Path),
DataAfterValue.
validate_direct_map_value(Aux, Position, {Type, _, DataAfterValue} = ParseResult, Path) ->
CanSkip = not is_map_key(Type, #{array => [], map => []}),
try (CanSkip andalso locus_shared_bitarray:is_set(Aux#validation_aux.visited, Position))
orelse validate_parsed_chunk(Aux, Position, Path, ParseResult)
of
true ->
DataAfterValue;
{ok, RemainingData} ->
RemainingData
catch
throw:{position_out_of_bounds, Position} ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, Position, lists:reverse(Path)
),
throw(controlled_validation_error)
end;
validate_direct_map_value(Aux, Position, {error, Reason}, Path) ->
locus_mmdb_check_journal:bad_chunk_in_data_section(Aux#validation_aux.journal,
Position, Reason,
lists:reverse(Path)),
throw(controlled_validation_error).
validate_array(Aux, Count, RemainingData, Path) ->
validate_array_recur(Aux, Count, RemainingData, Path).
validate_array_recur(Aux, Count, RemainingData, Path)
when Count > 0 ->
case validate_array_value(Aux, RemainingData, Path) of
{ok, DataAfterValue} ->
validate_array_recur(Aux, Count - 1, DataAfterValue, Path)
end;
validate_array_recur(_Aux, Count, RemainingData, _Path)
when Count =:= 0 ->
{ok, RemainingData}.
validate_array_value(Aux, RemainingData, Path) ->
Position = byte_size(Aux#validation_aux.data) - byte_size(RemainingData),
case parse_chunk_head(RemainingData) of
{pointer, Pointer, DataAfterValue} ->
UpdatedPath = [{Position, {pointer, Pointer}} | Path],
validate_indirect_array_value(Aux, Pointer, DataAfterValue, UpdatedPath);
ParseResult ->
validate_direct_array_value(Aux, Position, ParseResult, Path)
end.
validate_indirect_array_value(Aux, Pointer, DataAfterValue, Path) ->
ok = validate_position_if_unvisited(Aux, _Position = Pointer, Path),
{ok, DataAfterValue}.
validate_direct_array_value(Aux, Position, {Type, _, DataAfterValue} = ParseResult, Path) ->
CanSkip = not is_map_key(Type, #{array => [], map => []}),
try (CanSkip andalso locus_shared_bitarray:is_set(Aux#validation_aux.visited, Position))
orelse validate_parsed_chunk(Aux, Position, Path, ParseResult)
of
true ->
{ok, DataAfterValue};
{ok, RemainingData} ->
{ok, RemainingData}
catch
throw:{position_out_of_bounds, Position} ->
locus_mmdb_check_journal:invalid_position_in_data_section(
Aux#validation_aux.journal, Position, lists:reverse(Path)
),
throw(controlled_validation_error)
end;
validate_direct_array_value(Aux, Position, {error, Reason}, Path) ->
locus_mmdb_check_journal:bad_chunk_in_data_section(Aux#validation_aux.journal,
Position, Reason,
lists:reverse(Path)),
throw(controlled_validation_error).
%% ------------------------------------------------------------------
%% Internal Function Definitions - Debugging
%% ------------------------------------------------------------------
parse_all_recur(<<>>, _FullData, _Opts, Acc) ->
lists:reverse(Acc);
parse_all_recur(Chunk, FullData, Opts, Acc) ->
try parse_chunk(Chunk, FullData, Opts, _Path = []) of
{Value, RemainingData} ->
UpdatedAcc = [Value | Acc],
parse_all_recur(RemainingData, FullData, Opts, UpdatedAcc)
catch
Class:Reason:Stacktrace ->
SaferReason = locus_util:purge_term_of_very_large_binaries(Reason),
SaferStacktrace = locus_util:purge_term_of_very_large_binaries(Stacktrace),
erlang:raise(Class, SaferReason, SaferStacktrace)
end.