Skip to main content

lib/billdog_eng/murmur.ex

defmodule BilldogEng.Murmur do
  @moduledoc """
  murmurhash3 (32-bit, x86, seed-able) — ported verbatim from the canonical web
  reference `packages/ab-testing/src/ABTest.ts`.

  This is the cross-platform bucketing primitive for local feature-flag
  evaluation. The exact same algorithm runs on web / iOS / Android / every
  server SDK so that `murmurhash3("{key}.{distinctId}") % 100` produces an
  identical bucket everywhere — a user is deterministically in or out of a
  rollout regardless of which platform evaluated the flag.

  Implementation notes (must not change without re-pinning the vectors):

    * UTF-8 bytes (Elixir binaries are already UTF-8)
    * all arithmetic is masked to 32 bits (`&&& 0xFFFFFFFF`)
    * the result is the unsigned 32-bit integer
  """

  import Bitwise

  @mask 0xFFFFFFFF
  @c1 0xCC9E2D51
  @c2 0x1B873593

  @doc """
  Compute the unsigned 32-bit murmurhash3 of `key` with the given `seed`
  (default `0`).
  """
  @spec hash(binary(), non_neg_integer()) :: non_neg_integer()
  def hash(key, seed \\ 0) when is_binary(key) do
    len = byte_size(key)
    blocks = div(len, 4)

    h1 = hash_blocks(key, blocks, 0, seed)
    h1 = hash_tail(key, blocks * 4, rem(len, 4), h1)

    h1
    |> bxor(len)
    |> fmix()
  end

  @doc """
  Deterministic `0..99` bucket for a `"{key}.{distinctId}"` style seed.
  Convenience wrapper around `hash/2`.
  """
  @spec bucket_of(binary()) :: non_neg_integer()
  def bucket_of(seed) when is_binary(seed) do
    rem(hash(seed, 0), 100)
  end

  # ── Internals ──────────────────────────────────────────────────────────────

  defp hash_blocks(_key, blocks, i, h1) when i >= blocks, do: h1

  defp hash_blocks(key, blocks, i, h1) do
    offset = i * 4
    <<_::binary-size(^offset), b0, b1, b2, b3, _::binary>> = key

    k1 =
      b0
      |> bor(b1 <<< 8)
      |> bor(b2 <<< 16)
      |> bor(b3 <<< 24)

    k1 = mul32(k1, @c1)
    k1 = rotl32(k1, 15)
    k1 = mul32(k1, @c2)

    h1 = bxor(h1, k1)
    h1 = rotl32(h1, 13)
    h1 = (mul32(h1, 5) + 0xE6546B64) &&& @mask

    hash_blocks(key, blocks, i + 1, h1)
  end

  defp hash_tail(_key, _offset, 0, h1), do: h1

  defp hash_tail(key, offset, remainder, h1) do
    k1 = tail_k1(key, offset, remainder, 0)
    k1 = mul32(k1, @c1)
    k1 = rotl32(k1, 15)
    k1 = mul32(k1, @c2)
    bxor(h1, k1)
  end

  defp tail_k1(key, offset, 3, k1) do
    <<_::binary-size(^offset), _b0, _b1, b2, _::binary>> = key
    k1 = bxor(k1, b2 <<< 16)
    tail_k1(key, offset, 2, k1)
  end

  defp tail_k1(key, offset, 2, k1) do
    <<_::binary-size(^offset), _b0, b1, _::binary>> = key
    k1 = bxor(k1, b1 <<< 8)
    tail_k1(key, offset, 1, k1)
  end

  defp tail_k1(key, offset, 1, k1) do
    <<_::binary-size(^offset), b0, _::binary>> = key
    bxor(k1, b0)
  end

  # 32-bit unsigned multiply (mirrors Math.imul semantics for non-negative h1).
  defp mul32(a, b), do: a * b &&& @mask

  # Rotate-left within 32 bits.
  defp rotl32(x, r) do
    ((x <<< r) ||| (x >>> (32 - r))) &&& @mask
  end

  # Final avalanche mix; returns the unsigned 32-bit result.
  defp fmix(h) do
    h = bxor(h, h >>> 16)
    h = mul32(h, 0x85EBCA6B)
    h = bxor(h, h >>> 13)
    h = mul32(h, 0xC2B2AE35)
    bxor(h, h >>> 16)
  end
end