/*
* murmur_nif -- Erlang NIF wrapper around MurmurHash3 (x64_128) and the
* Cassandra-compatible signed-byte variant used by Cassandra/Scylla's
* partitioner.
*
* API:
* murmur_nif:murmur3_x64_128(Binary) -> <<H1:64, H2:64>>.
* murmur_nif:murmur3_cassandra_x64_128(Binary) -> <<H1:64, H2:64>>.
*
* Both functions return a fixed 16-byte binary. The hash uses seed 0
* (Cassandra's default; not configurable here -- callers that need a
* different seed can vendor murmur3 themselves).
*
* Implementation notes:
* - Raw NIF (no rustler / no enif wrapper crate), same approach as
* lz4_nif and knot. Hash dispatch is the entire cost; rustler adds
* ~20 ns per call we don't want.
* - Inputs over MURMUR_NIF_DIRTY_THRESHOLD bytes (20 KB) are dispatched
* to a dirty CPU scheduler. In practice hash inputs are small (token
* routing on partition keys, generally tens to hundreds of bytes),
* but the threshold is there for safety on larger inputs.
* - Inline path reports its cost to the scheduler via
* enif_consume_timeslice, proportional to bytes processed. Cost model:
* ~500 bytes/reduction (calibrated for ~5 GB/s hash throughput),
* 4000-reduction timeslice. Conservative; actual rate is higher.
*
* License: MIT (wrapper). The vendored MurmurHash3 in c_src/murmur3/
* is public domain by Austin Appleby.
*/
#include <erl_nif.h>
#include <string.h>
#include "murmur3/murmur3.h"
#define MURMUR_NIF_DIRTY_THRESHOLD (20 * 1024)
#define MURMUR_NIF_BYTES_PER_REDUCTION 500
#define MURMUR_NIF_REDUCTION_COUNT 4000
static ERL_NIF_TERM atom_badarg;
static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
(void)priv_data;
(void)load_info;
atom_badarg = enif_make_atom(env, "badarg");
return 0;
}
static inline int
timeslice_percent(size_t bytes) {
size_t reds = bytes / MURMUR_NIF_BYTES_PER_REDUCTION;
long pct = (long)((reds * 100) / MURMUR_NIF_REDUCTION_COUNT);
if (pct < 1) pct = 1;
if (pct > 100) pct = 100;
return (int)pct;
}
/* ---- standard MurmurHash3 x64_128 ----------------------------------- */
static ERL_NIF_TERM
murmur3_x64_128_impl(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
(void)argc;
ErlNifBinary in;
if (!enif_inspect_binary(env, argv[0], &in)) {
return enif_make_badarg(env);
}
ERL_NIF_TERM out_term;
unsigned char *out = enif_make_new_binary(env, 16, &out_term);
MurmurHash3_x64_128(in.data, (int)in.size, 0, out);
enif_consume_timeslice(env, timeslice_percent(in.size));
return out_term;
}
static ERL_NIF_TERM
nif_murmur3_x64_128(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ErlNifBinary in;
if (!enif_inspect_binary(env, argv[0], &in)) {
return enif_make_badarg(env);
}
if (in.size > MURMUR_NIF_DIRTY_THRESHOLD) {
return enif_schedule_nif(env, "murmur3_x64_128_dirty",
ERL_NIF_DIRTY_JOB_CPU_BOUND,
murmur3_x64_128_impl, argc, argv);
}
return murmur3_x64_128_impl(env, argc, argv);
}
/* ---- Cassandra-compatible variant ----------------------------------- */
static ERL_NIF_TERM
murmur3_cassandra_x64_128_impl(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
(void)argc;
ErlNifBinary in;
if (!enif_inspect_binary(env, argv[0], &in)) {
return enif_make_badarg(env);
}
ERL_NIF_TERM out_term;
unsigned char *out = enif_make_new_binary(env, 16, &out_term);
MurmurHash3_Cassandra_x64_128(in.data, (int)in.size, 0, out);
enif_consume_timeslice(env, timeslice_percent(in.size));
return out_term;
}
static ERL_NIF_TERM
nif_murmur3_cassandra_x64_128(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ErlNifBinary in;
if (!enif_inspect_binary(env, argv[0], &in)) {
return enif_make_badarg(env);
}
if (in.size > MURMUR_NIF_DIRTY_THRESHOLD) {
return enif_schedule_nif(env, "murmur3_cassandra_x64_128_dirty",
ERL_NIF_DIRTY_JOB_CPU_BOUND,
murmur3_cassandra_x64_128_impl, argc, argv);
}
return murmur3_cassandra_x64_128_impl(env, argc, argv);
}
/* ---- entry table ---------------------------------------------------- */
static ErlNifFunc nif_functions[] = {
{"murmur3_x64_128", 1, nif_murmur3_x64_128, 0},
{"murmur3_cassandra_x64_128", 1, nif_murmur3_cassandra_x64_128, 0}
};
ERL_NIF_INIT(murmur_nif, nif_functions, load, NULL, NULL, NULL)