%%%----------------------------------------------------------------------------
%%% @doc Fast decoding of JSON using simdjson C++ library.
%%%
%%% By default JSON decoder uses the atom `null' to represent JSON nulls.
%%% To modify this behavior, set the following configuration option to another
%%% atom value (e.g. `nil' for Elixir):
%%% ```
%%% {simdjsone, [{null, nil}]}.
%%% '''
%%%
%%% See also [https://github.com/simdjson/simdjson]
%%% @end
%%%----------------------------------------------------------------------------
-module(simdjson).
-export([decode/1, decode/2, parse/1, get/2, get/3, minify/1, encode/1, encode/2]).
-export([int_to_bin/1]).
-compile({no_auto_import, [get/2]}).
-on_load(init/0).
-define(LIBNAME, simdjsone).
-define(NOT_LOADED_ERROR,
erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, ?LINE}]})).
-type decode_opt() ::
return_maps |
object_as_tuple |
dedupe_keys |
use_nil |
{null_term, atom()}.
-type decode_opts() :: [decode_opt()].
%% Decode options:
%% <ul>
%% <li>`return_maps' - decode JSON object as map</li>
%% <li>`object_as_tuple' - decode JSON object as a proplist wrapped in a tuple</li>
%% <li>`dedup_keys' - eliminate duplicate keys from a JSON object</li>
%% <li>`use_nil' - decode JSON "null" as `nil'</li>
%% <li>`{null_term, V}' - use the given value `V' for a JSON "null"</li>
%% </ul>
-type encode_opt() ::
uescape |
pretty |
force_utf8 |
use_nil |
escape_forward_slashes |
{bytes_per_red, non_neg_integer()}.
-type encode_opts() :: [encode_opt()].
%% Encode options:
%% <ul>
%% <li>`uescape' - escape UTF-8 sequences to produce a 7-bit clean output</li>
%% <li>`pretty' - return JSON using two-space indentation</li>
%% <li>`use_nil' - encode the atom `nil' as `null`</li>
%% <li>`escape_fwd_slash' - escape the `/' character (useful when encoding URLs)</li>
%% <li>`{bytes_per_red, N}' - where `N' >= 0 - This controls the number of bytes
%% that Jiffy will process as an equivalent to a reduction. Each 20 reductions we
%% consume 1% of our allocated time slice for the current process. When the
%% Erlang VM indicates we need to return from the NIF.</li>
%% </ul>
-export_type([decode_opts/0, encode_opts/0]).
-ifdef(TEST).
-export([benchmark/1, benchmark/2]).
-include_lib("eunit/include/eunit.hrl").
-endif.
init() ->
NullVal = application:get_env(simdjsone, null, null),
is_atom(NullVal) orelse erlang:error("Option simdjsone/null must be an atom"),
SoName =
case code:priv_dir(?LIBNAME) of
{error, bad_name} ->
case code:which(?MODULE) of
Filename when is_list(Filename) ->
Dir = filename:dirname(filename:dirname(Filename)),
filename:join([Dir, "priv", ?LIBNAME]);
_ ->
filename:join("../priv", ?LIBNAME)
end;
Dir ->
filename:join(Dir, ?LIBNAME)
end,
erlang:load_nif(SoName, [{null, NullVal}]).
%% @doc Decode a JSON string or binary to a term representation of JSON.
-spec decode(binary()|list()|reference()) -> term().
decode(_BinOrRef) ->
?NOT_LOADED_ERROR.
%% @doc Decode a JSON string or binary to a term representation of JSON.
-spec decode(binary()|list()|reference(), decode_opts()) -> term().
decode(_BinOrRef, _Opts) ->
?NOT_LOADED_ERROR.
%% @doc Parse a JSON string or binary and save it in a resource for later access by `get/2'.
%% Returns a resource reference owned by the calling pid.
-spec parse(binary()) -> reference().
parse(_Bin) ->
?NOT_LOADED_ERROR.
%% @doc Find a given `Path' (which must start with a slash) in the JSON resource.
%% The resource reference must have been previously created by calling
%% `parse/1,2'.
-spec get(reference(), binary()) -> term().
get(_Ref, Path) when is_binary(Path) ->
?NOT_LOADED_ERROR.
%% @doc Find a given `Path' (which must start with a slash) in the JSON resource.
%% The resource reference must have been previously created by calling
%% `parse/1,2'.
-spec get(reference(), binary(), decode_opts()) -> term().
get(_Ref, Path, _Opts) when is_binary(Path) ->
?NOT_LOADED_ERROR.
%% @doc Minify a JSON string or binary.
-spec minify(binary()|list()) -> {ok, binary()} | {error, binary()}.
minify(_BinOrStr) ->
?NOT_LOADED_ERROR.
%% @doc Encode a term to a JSON string.
-spec encode(term()) -> iodata().
encode(Data) ->
encode(Data, []).
-spec encode(term(), encode_opts()) -> iodata().
encode(Data, Opts) ->
ForceUTF8 = lists:member(force_utf8, Opts),
encode_loop(fun() -> encode_init(Data, Opts) end, {ForceUTF8, Data, Opts}).
encode_loop(Fun, {ForceUTF8, _Data, _Opts} = State) ->
case Fun() of
{error, {invalid_string, _}} when ForceUTF8 ->
{_ForceUTF8, Data, Opts} = State,
FixedData = simdjson_utf8:fix(Data),
encode(FixedData, Opts -- [force_utf8]);
{error, {invalid_object_member_key, _}} when ForceUTF8 ->
{_ForceUTF8, Data, Opts} = State,
FixedData = simdjson_utf8:fix(Data),
encode(FixedData, Opts -- [force_utf8]);
{error, Error} ->
error(Error);
{partial, IOData} ->
finish_encode(IOData, []);
{iter, {NewEncoder, NewStack, NewIOBuf}} ->
encode_loop(fun() -> encode_iter(NewEncoder, NewStack, NewIOBuf) end, State);
[Bin] when is_binary(Bin) ->
Bin;
RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
end.
encode_init(_Data, _Opts) ->
?NOT_LOADED_ERROR.
encode_iter(_Encoder, _Stack, _IOBuf) ->
?NOT_LOADED_ERROR.
finish_encode([], Acc) ->
%% No reverse! The NIF returned us
%% the pieces in reverse order.
Acc;
finish_encode([<<_/binary>>=B | Rest], Acc) ->
finish_encode(Rest, [B | Acc]);
finish_encode([Val | Rest], Acc) when is_integer(Val) ->
Bin = list_to_binary(integer_to_list(Val)),
finish_encode(Rest, [Bin | Acc]);
finish_encode([InvalidEjson | _], _) ->
error({invalid_ejson, InvalidEjson});
finish_encode(_, _) ->
error(invalid_ejson).
%% @doc Fast integer to binary conversion
-spec int_to_bin(integer()) -> binary().
int_to_bin(_Int) ->
?NOT_LOADED_ERROR.
%%%----------------------------------------------------------------------------
%%% TEST
%%%----------------------------------------------------------------------------
-ifdef(EUNIT).
encode_test_() ->
[
?_assertEqual(<<"null">>, encode(null)),
?_assertEqual(<<"null">>, encode(null, [use_nil])),
?_assertEqual(<<"null">>, encode(nil, [use_nil])),
?_assertEqual(<<"{\"a\":1}">>, encode(#{a => 1})),
?_assertEqual(<<"[1000,\"a\"]">>, encode([1000, <<"a">>])),
?_assertEqual(<<"[]">>, encode([])),
fun() -> ok end
].
decode_test_() ->
[
?_assertEqual([], decode("[]")),
?_assertEqual(null, decode("null")),
?_assertEqual(1, decode("1")),
?_assertEqual(1.0, decode("1.0")),
?_assertEqual(<<"abc">>, decode("\"abc\"")),
?_assertEqual(12345678901234567890123, decode("12345678901234567890123")),
?_assertEqual([12312345123412341341234134,
234542345243524524352435243], simdjson:decode("[12312345123412341341234134,
234542345243524524352435243]")),
%% Small Big integer decoding tests
?_assertEqual(binary_to_integer(binary:copy(<<"1">>, 615)),
decode(binary:copy(<<"1">>, 615))),
?_assertEqual(binary_to_integer(<<"-", (binary:copy(<<"1">>, 615))/binary>>),
decode(<<"-", (binary:copy(<<"1">>, 615))/binary>>)),
%% Large Big integer decoding tests
?_assertEqual(binary_to_integer(binary:copy(<<"9">>, 6000)),
decode(binary:copy(<<"9">>, 6000))),
?_assertEqual([1,2,3], decode("[1,2,3]")),
?_assertEqual(#{<<"a">> => 1,<<"b">> => 2}, decode("{\"a\": 1, \"b\": 2}")),
?_assertEqual({[{<<"a">>, 1},{<<"b">>, 2}]}, decode("{\"a\": 1, \"b\": 2}", [object_as_tuple])),
?_assertEqual({[{<<"a">>, 1},{<<"a">>, 2}]}, decode("{\"a\": 1, \"a\": 2}", [object_as_tuple])),
?_assertEqual({[{<<"a">>, 1}]}, decode("{\"a\": 1, \"a\": 2}", [object_as_tuple, dedupe_keys])),
?_assertEqual(#{<<"a">> => 1}, decode("{\"a\": 1, \"a\": 2}", [dedupe_keys])),
?_assertException(error, dup_keys_found, decode("{\"a\": 1, \"a\": 2}")),
?_assertEqual(null, decode("null")),
?_assertEqual(nil, decode("null", [use_nil])),
?_assertEqual(null_atom, decode("null", [{null_term, null_atom}]))
].
cached_decode_test_() ->
{setup,
fun() ->
parse("{\"a\": 1, \"b\": 2, \"c\": {\"a\": 1, \"b\": 2, \"c\": [1,2,3]}}")
end,
fun(Ref) ->
[
?_assertEqual([1,2,3], get(Ref, "/c/c")),
?_assertEqual(1, get(Ref, "/c/c/0"))
]
end
}.
cached_decode_ownership_test_() ->
{setup,
fun() ->
Pid = spawn(fun() -> receive {ready, P} -> P ! done end end),
Ref = parse("{\"a\": 1, \"b\": 2, \"c\": {\"a\": 1, \"b\": 2, \"c\": [1,2,3]}}"),
Pid ! {ready, self()},
receive
done -> Ref
end
end,
fun(Ref) ->
?_assertEqual([1,2,3], get(Ref, "/c/c"))
%%?_assertException(error, badarg, get(Ref, "/c/c"))
end
}.
minify_test_() ->
[
?_assertEqual({ok, <<"[1,2,3]">>}, minify("[ 1, 2, 3 ]")),
?_assertEqual({ok, <<"[{\"a\":true,\"b\":false},2,3]">>}, minify("[ {\"a\": true, \"b\": false}, 2, 3 ]"))
].
int_to_bin_test_() ->
F = fun G(0, A) -> A;
G(N, A) ->
X = rand:uniform(1 bsl 60),
case integer_to_binary(X) == int_to_bin(X) of
true -> G(N-1, A);
false -> G(N-1, [X | A])
end
end,
[
?_assertEqual([], F(1000000, []))
].
benchmark_test_() ->
case os:getenv("MIX_ENV") of
"test" ->
?_assert(true);
_ ->
?_assertEqual(ok, benchmark([]))
end.
benchmark(NameFuns) ->
{ok, Dir} = file:get_cwd(),
File1 = filename:join(Dir, "test/data/twitter.json"),
benchmark(File1, NameFuns),
File2 = filename:join(Dir, "test/data/esad.json"),
benchmark(File2, NameFuns),
File3 = filename:join(Dir, "test/data/small.json"),
benchmark(File3, NameFuns),
ok.
benchmark(File, NameFuns) ->
{ok, Bin} = file:read_file(File),
benchmark(100, Bin, NameFuns).
benchmark(N, Bin, NameFuns) ->
erlang:group_leader(whereis(init), self()),
io:format("\n=== Benchmark (file size: ~.1fK) ===\n", [byte_size(Bin) / 1024]),
P = self(),
L = [
{"simdjsone", fun(B) -> simdjson:decode(B) end},
{"jiffy", fun(B) -> jiffy:decode(B, [return_maps]) end},
{"json", fun(B) -> json:decode(B) end},
{"thoas", fun(B) -> {ok, R} = thoas:decode(B), R end},
{"euneus", fun(B) -> {ok, R} = euneus:decode(B), R end}
] ++ NameFuns,
Tasks = [{Name, spawn(fun() ->
P ! {Name, tc(N, fun() -> Fun(Bin) end)}
end)} || {Name, Fun} <- L],
K = length(Tasks),
R = [receive Msg -> {ok, Msg} after 15000 -> {error, timeout} end || _ <- Tasks],
M = lists:sort(fun({_, {T1, _}}, {_, {T2, _}}) -> T1 =< T2 end, [X || {ok, X} <- R]),
[print(Nm, {T, S}) || {Nm, {T, S}} <- M],
K = length(M),
ok.
print(Fmt, {T, R}) ->
case os:getenv("DEBUG") of
V when V==false; V=="0" ->
io:format("~12s: ~10.3fus\n", [Fmt, T]);
_ ->
io:format("~12s: ~10.3fus | Sample output: ~s\n", [Fmt, T, string:substr(lists:flatten(io_lib:format("~1024p", [R])), 1, 60)])
end.
tc(N, F) when N > 0 ->
time_it(fun() -> exit(call(N, N, F, erlang:system_time(microsecond))) end).
time_it(F) ->
Pid = spawn_opt(F, [{min_heap_size, 16384}]),
MRef = erlang:monitor(process, Pid),
receive
{'DOWN', MRef, process, _, Result} -> Result
end.
call(1, X, F, Time1) ->
Res = (catch F()),
return(X, Res, Time1, erlang:system_time(microsecond));
call(N, X, F, Time1) ->
(catch F()),
call(N-1, X, F, Time1).
return(N, Res, Time1, Time2) ->
Int = Time2 - Time1,
{Int / N, Res}.
-endif.