c_src/murmur_nif.c

/*
 * murmur_nif -- Erlang NIF wrapper around MurmurHash3 (x64_128) and the
 * Cassandra-compatible signed-byte variant used by Cassandra/Scylla's
 * partitioner.
 *
 * API:
 *   murmur_nif:murmur3_x64_128(Binary)             -> <<H1:64, H2:64>>.
 *   murmur_nif:murmur3_cassandra_x64_128(Binary)   -> <<H1:64, H2:64>>.
 *
 * Both functions return a fixed 16-byte binary. The hash uses seed 0
 * (Cassandra's default; not configurable here -- callers that need a
 * different seed can vendor murmur3 themselves).
 *
 * Implementation notes:
 * - Raw NIF (no rustler / no enif wrapper crate), same approach as
 *   lz4_nif and knot. Hash dispatch is the entire cost; rustler adds
 *   ~20 ns per call we don't want.
 * - Inputs over MURMUR_NIF_DIRTY_THRESHOLD bytes (20 KB) are dispatched
 *   to a dirty CPU scheduler. In practice hash inputs are small (token
 *   routing on partition keys, generally tens to hundreds of bytes),
 *   but the threshold is there for safety on larger inputs.
 * - Inline path reports its cost to the scheduler via
 *   enif_consume_timeslice, proportional to bytes processed. Cost model:
 *   ~500 bytes/reduction (calibrated for ~5 GB/s hash throughput),
 *   4000-reduction timeslice. Conservative; actual rate is higher.
 *
 * License: MIT (wrapper). The vendored MurmurHash3 in c_src/murmur3/
 * is public domain by Austin Appleby.
 */

#include <erl_nif.h>
#include <string.h>

#include "murmur3/murmur3.h"

#define MURMUR_NIF_DIRTY_THRESHOLD     (20 * 1024)
#define MURMUR_NIF_BYTES_PER_REDUCTION 500
#define MURMUR_NIF_REDUCTION_COUNT     4000

static ERL_NIF_TERM atom_badarg;

static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
    (void)priv_data;
    (void)load_info;
    atom_badarg = enif_make_atom(env, "badarg");
    return 0;
}

static inline int
timeslice_percent(size_t bytes) {
    size_t reds = bytes / MURMUR_NIF_BYTES_PER_REDUCTION;
    long pct = (long)((reds * 100) / MURMUR_NIF_REDUCTION_COUNT);
    if (pct < 1)   pct = 1;
    if (pct > 100) pct = 100;
    return (int)pct;
}

/* ---- standard MurmurHash3 x64_128 ----------------------------------- */

static ERL_NIF_TERM
murmur3_x64_128_impl(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
    (void)argc;
    ErlNifBinary in;
    if (!enif_inspect_binary(env, argv[0], &in)) {
        return enif_make_badarg(env);
    }

    ERL_NIF_TERM out_term;
    unsigned char *out = enif_make_new_binary(env, 16, &out_term);
    MurmurHash3_x64_128(in.data, (int)in.size, 0, out);

    enif_consume_timeslice(env, timeslice_percent(in.size));
    return out_term;
}

static ERL_NIF_TERM
nif_murmur3_x64_128(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
    ErlNifBinary in;
    if (!enif_inspect_binary(env, argv[0], &in)) {
        return enif_make_badarg(env);
    }
    if (in.size > MURMUR_NIF_DIRTY_THRESHOLD) {
        return enif_schedule_nif(env, "murmur3_x64_128_dirty",
                                 ERL_NIF_DIRTY_JOB_CPU_BOUND,
                                 murmur3_x64_128_impl, argc, argv);
    }
    return murmur3_x64_128_impl(env, argc, argv);
}

/* ---- Cassandra-compatible variant ----------------------------------- */

static ERL_NIF_TERM
murmur3_cassandra_x64_128_impl(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
    (void)argc;
    ErlNifBinary in;
    if (!enif_inspect_binary(env, argv[0], &in)) {
        return enif_make_badarg(env);
    }

    ERL_NIF_TERM out_term;
    unsigned char *out = enif_make_new_binary(env, 16, &out_term);
    MurmurHash3_Cassandra_x64_128(in.data, (int)in.size, 0, out);

    enif_consume_timeslice(env, timeslice_percent(in.size));
    return out_term;
}

static ERL_NIF_TERM
nif_murmur3_cassandra_x64_128(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
    ErlNifBinary in;
    if (!enif_inspect_binary(env, argv[0], &in)) {
        return enif_make_badarg(env);
    }
    if (in.size > MURMUR_NIF_DIRTY_THRESHOLD) {
        return enif_schedule_nif(env, "murmur3_cassandra_x64_128_dirty",
                                 ERL_NIF_DIRTY_JOB_CPU_BOUND,
                                 murmur3_cassandra_x64_128_impl, argc, argv);
    }
    return murmur3_cassandra_x64_128_impl(env, argc, argv);
}

/* ---- entry table ---------------------------------------------------- */

static ErlNifFunc nif_functions[] = {
    {"murmur3_x64_128",           1, nif_murmur3_x64_128,           0},
    {"murmur3_cassandra_x64_128", 1, nif_murmur3_cassandra_x64_128, 0}
};

ERL_NIF_INIT(murmur_nif, nif_functions, load, NULL, NULL, NULL)