/**
* This file uses jsdoc to annotate types.
* These types can be checked using the typescript compiler with "checkjs" option.
*/
import { isEqual, Result$Error, Result$Ok } from "./gleam.mjs";
// -- HASH --------------------------------------------------------------------
const referenceMap = /* @__PURE__ */ new WeakMap();
const tempDataView = /* @__PURE__ */ new DataView(
/* @__PURE__ */ new ArrayBuffer(8),
);
let referenceUID = 0;
/**
* hash the object by reference using a weak map and incrementing uid
* @param {any} o
* @returns {number}
*/
function hashByReference(o) {
const known = referenceMap.get(o);
if (known !== undefined) {
return known;
}
const hash = referenceUID++;
if (referenceUID === 0x7fffffff) {
referenceUID = 0;
}
referenceMap.set(o, hash);
return hash;
}
/**
* merge two hashes in an order sensitive way
* @param {number} a
* @param {number} b
* @returns {number}
*/
function hashMerge(a, b) {
return (a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2))) | 0;
}
/**
* standard string hash popularised by Java
* @param {string} s
* @returns {number}
*/
function hashString(s) {
let hash = 0;
const len = s.length;
for (let i = 0; i < len; i++) {
hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0;
}
return hash;
}
/**
* hash a number by converting to two integers and do some jumbling
* @param {number} n
* @returns {number}
*/
function hashNumber(n) {
tempDataView.setFloat64(0, n);
const i = tempDataView.getInt32(0);
const j = tempDataView.getInt32(4);
return Math.imul(0x45d9f3b, (i >> 16) ^ i) ^ j;
}
/**
* hash a BigInt by converting it to a string and hashing that
* @param {BigInt} n
* @returns {number}
*/
function hashBigInt(n) {
return hashString(n.toString());
}
/**
* hash any js object
* @param {any} o
* @returns {number}
*/
function hashObject(o) {
const proto = Object.getPrototypeOf(o);
if (proto !== null && typeof proto.hashCode === "function") {
try {
const code = o.hashCode(o);
if (typeof code === "number") {
return code;
}
} catch {}
}
if (o instanceof Promise || o instanceof WeakSet || o instanceof WeakMap) {
return hashByReference(o);
}
if (o instanceof Date) {
return hashNumber(o.getTime());
}
let h = 0;
if (o instanceof ArrayBuffer) {
o = new Uint8Array(o);
}
if (Array.isArray(o) || o instanceof Uint8Array) {
for (let i = 0; i < o.length; i++) {
h = (Math.imul(31, h) + getHash(o[i])) | 0;
}
} else if (o instanceof Set) {
o.forEach((v) => {
h = (h + getHash(v)) | 0;
});
} else if (o instanceof Map) {
o.forEach((v, k) => {
h = (h + hashMerge(getHash(v), getHash(k))) | 0;
});
} else {
const keys = Object.keys(o);
for (let i = 0; i < keys.length; i++) {
const k = keys[i];
const v = o[k];
h = (h + hashMerge(getHash(v), hashString(k))) | 0;
}
}
return h;
}
/**
* hash any js value
* @param {any} u
* @returns {number}
*/
export function getHash(u) {
if (u === null) return 0x42108422;
if (u === undefined) return 0x42108423;
if (u === true) return 0x42108421;
if (u === false) return 0x42108420;
switch (typeof u) {
case "number":
return hashNumber(u);
case "string":
return hashString(u);
case "bigint":
return hashBigInt(u);
case "object":
return hashObject(u);
case "symbol":
return hashByReference(u);
case "function":
return hashByReference(u);
default:
return 0; // should be unreachable
}
}
// -- DICT --------------------------------------------------------------------
/**
* An implementation of the CHAMP data structure, an optimised HAMT.
*
* See: M. J. Steindorfer, J.J. Vinju (2015). Optimizing Hash-Array Mapped Tries for
Fast and Lean Immutable JVM Collections. Available: https://michael.steindorfer.name/publications/oopsla15.pdf
*/
export default class Dict {
constructor(size, root) {
this.size = size;
this.root = root;
}
}
/// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree.
const bits = 5;
const mask = (1 << bits) - 1;
/// This symbol is used internally to avoid constructing results.
const noElementMarker = Symbol();
/// This symbol is used to store the "generation" on a node.
/// Using a symbol makes the property not enumerable, which means the generation
/// will be ignored during equality checks.
const generationKey = Symbol();
// Some commonly used constants throughout the code.
const emptyNode = /* @__PURE__ */ newNode(0);
const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode);
const errorNil = /* @__PURE__ */ Result$Error(undefined);
function makeNode(generation, datamap, nodemap, data) {
// The order of fields is important, as they define the order `isEqual` will
// compare our fields. Putting the bitmaps first means that equality can
// early-out if the bitmaps are not equal.
return {
// A node is a high-arity (32 in practice) hybrid tree node.
// Hybrid means that it stores data directly as well as pointers to child nodes.
//
// Each node contains 2 bitmaps:
// - The datamap has a bit set if that slot in the node contains direct data
// - The nodemap has a bit set if that slot in the node contains another node.
//
// Both are exclusive to on another, so datamap & nodemap == 0.
//
// Every key/hash value directly correlates to a specific bit by using a trie
// suffix (least significant bits first) encoding.
// For example, if the last 5 bits of the hash are 1101, the bit to check for
// that value is the 13th bit.
datamap,
nodemap,
// The slots itself are stored in a single contiguous array that contains
// both direct k/v-pairs and child nodes.
//
// The direct children come first, followed by the child nodes in _reverse order_:
//
// 7654321
// datamap: 1000100
// nodemap: 10011
// data: [key3, value3, key7, value7, child5, child2, child1]
// -------------------------> <---------------------
// datamap nodemap
//
// Every `1` bit in the datamap corresponds to a pair of [key, value] entries,
// and every `1` bit in the nodemap corresponds to a child node entry.
//
// Children are stored in reverse order to avoid having to store or calculate an
// "offset" value to skip over the direct children.
data,
// The generation is used to track which nodes need to be copied during transient updates.
// Using a symbol here makes `isEqual` ignore this field.
[generationKey]: generation,
};
}
function newNode(generation) {
return makeNode(generation, 0, 0, []);
}
/**
* Copies a node and its data array if it's from another generation, making it safe
* to mutate the node.
*/
function copyNode(node, generation) {
if (node[generationKey] === generation) {
return node;
}
const newData = node.data.slice(0);
return makeNode(generation, node.datamap, node.nodemap, newData);
}
/**
* Copies a node if needed and sets a new value.
*/
function copyAndSet(node, generation, idx, val) {
if (node.data[idx] === val) {
return node;
}
// Using copyNode is faster than a specialised implementation.
node = copyNode(node, generation);
node.data[idx] = val;
return node;
}
/**
* Copies a node if needed, and then inserts a new key-value pair.
*/
function copyAndInsertPair(node, generation, bit, idx, key, val) {
const data = node.data;
const length = data.length;
// the fastest way to insert a pair is to always copy.
const newData = new Array(length + 2);
let readIndex = 0;
let writeIndex = 0;
while (readIndex < idx) newData[writeIndex++] = data[readIndex++];
newData[writeIndex++] = key;
newData[writeIndex++] = val;
while (readIndex < length) newData[writeIndex++] = data[readIndex++];
return makeNode(generation, node.datamap | bit, node.nodemap, newData);
}
function copyAndRemovePair(node, generation, bit, idx) {
node = copyNode(node, generation);
const data = node.data;
const length = data.length;
for (let w = idx, r = idx + 2; r < length; ++r, ++w) {
data[w] = data[r];
}
data.pop();
data.pop();
node.datamap ^= bit;
return node;
}
export function make() {
return emptyDict;
}
export function from(iterable) {
let transient = toTransient(emptyDict);
for (const [key, value] of iterable) {
transient = destructiveTransientInsert(key, value, transient);
}
return fromTransient(transient);
}
export function size(dict) {
return dict.size;
}
export function get(dict, key) {
const result = lookup(dict.root, key, getHash(key));
return result !== noElementMarker ? Result$Ok(result) : errorNil;
}
export function has(dict, key) {
return lookup(dict.root, key, getHash(key)) !== noElementMarker;
}
function lookup(node, key, hash) {
for (let shift = 0; shift < 32; shift += bits) {
const data = node.data;
const bit = hashbit(hash, shift);
if (node.nodemap & bit) {
// we found our hash inside the nodemap, so we can continue our search there.
node = data[data.length - 1 - index(node.nodemap, bit)];
} else if (node.datamap & bit) {
// we store this hash directly!
//
// this also means that there are no other values with the same
// hash prefix in this dict.
//
// We still need to check if the key matches, but if it does we know for
// sure that this is the correct value, and if it doesn't that we don't
// contain the value in question.
const dataidx = Math.imul(index(node.datamap, bit), 2);
return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker;
} else {
// if the hash bit is not set in neither bitmaps, we immediately know that
// this key cannot be inside this dict.
return noElementMarker;
}
}
// our shift has exceeded 32 bits. Everything that follows is
// implicitely an overflow node and only contains direct children.
const overflow = node.data;
for (let i = 0; i < overflow.length; i += 2) {
if (isEqual(key, overflow[i])) {
return overflow[i + 1];
}
}
return noElementMarker;
}
/**
* We use "transient" values to allow for safer internal mutations of the data
* structure. This is an optimisation only. No mutable API is exposed to the user.
*
* Transients are to be treated as having a linear (single-use, think rust) type.
* A transient value becomes invalid as soon as it's passed to one of the functions.
*
* Internally, we track a "generation" value on each node. If the generation
* doesn't match the one for the current transient, we have to copy - the node
* could still be referenced by another dict instance!
* After that, no other references than the transient one exists, so it's safe
* to mutate in place.
*/
export function toTransient(dict) {
return {
generation: nextGeneration(dict),
root: dict.root,
size: dict.size,
dict: dict,
};
}
/**
* Consume a transient, producing a normal Dict again.
*/
export function fromTransient(transient) {
if (transient.root === transient.dict.root) {
return transient.dict;
}
return new Dict(transient.size, transient.root);
}
/**
* Find and allocate the next generation id.
*
* @template K,V
* @param {Dict<K,V>} dict
* @returns {number}
*/
function nextGeneration(dict) {
const root = dict.root;
if (root[generationKey] < Number.MAX_SAFE_INTEGER) {
return root[generationKey] + 1;
}
// we have reached MAX_SAFE_INTEGER generations -
// at this point, we have to walk the dictionary once to reset the counter
// on every node. This is safe since it's part of the contract for transient
// that only one of them exists at any given time.
//
const queue = [root];
while (queue.length) {
// order doesn't matter, so we can use push/pop for faster array usage.
const node = queue.pop();
// reset the generation to 0
node[generationKey] = 0;
// queue all other referenced nodes
// We need to query the length from the nodemap, as we don't know if this
// is an overflow node or not! if it is, it will never have datamap set!
const nodeStart = data.length - popcount(node.nodemap);
for (let i = nodeStart; i < node.data.length; ++i) {
queue.push(node.data[i]);
}
}
return 1;
}
/// Insert is the second-most performance-sensitive operation.
/// We use a global "transient" value here to avoid doing a memory allocation.
const globalTransient = /* @__PURE__ */ toTransient(emptyDict);
export function insert(dict, key, value) {
globalTransient.generation = nextGeneration(dict);
globalTransient.size = dict.size;
const hash = getHash(key);
const root = insertIntoNode(globalTransient, dict.root, key, value, hash, 0);
if (root === dict.root) {
return dict;
}
return new Dict(globalTransient.size, root);
}
/**
* Consume a transient, writing a new key/value pair into the dictionary it
* represents. If the key already exists, it will be overwritten.
*
* Returns a new transient.
*/
export function destructiveTransientInsert(key, value, transient) {
const hash = getHash(key);
transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0);
return transient;
}
/**
* Consume a transient, writing a new key/value pair if the key doesn't exist or updating
* the existing value with a function if it does.
*
* Returns a new transient.
*/
export function destructiveTransientUpdateWith(key, fun, value, transient) {
const hash = getHash(key);
const existing = lookup(transient.root, key, hash);
if (existing !== noElementMarker) {
value = fun(existing);
}
transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0);
return transient;
}
function insertIntoNode(transient, node, key, value, hash, shift) {
const data = node.data;
const generation = transient.generation;
// 1. Overflow Node
// overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match.
if (shift > 32) {
for (let i = 0; i < data.length; i += 2) {
if (isEqual(key, data[i])) {
return copyAndSet(node, generation, i + 1, value);
}
}
transient.size += 1;
return copyAndInsertPair(node, generation, 0, data.length, key, value);
}
const bit = hashbit(hash, shift);
// 2. Child Node
// We have to check first if there is already a child node we have to traverse to.
if (node.nodemap & bit) {
const nodeidx = data.length - 1 - index(node.nodemap, bit);
let child = data[nodeidx];
child = insertIntoNode(transient, child, key, value, hash, shift + bits);
return copyAndSet(node, generation, nodeidx, child);
}
// 3. New Data Node
// No child node and no data node exists yet, so we can potentially just insert a new value.
const dataidx = Math.imul(index(node.datamap, bit), 2);
if ((node.datamap & bit) === 0) {
transient.size += 1;
return copyAndInsertPair(node, generation, bit, dataidx, key, value);
}
// 4. Existing Data Node
// We have a match that we can update, or remove.
if (isEqual(key, data[dataidx])) {
return copyAndSet(node, generation, dataidx + 1, value);
}
// 5. Collision
// There is no child node, but a data node with the same hash, but with a different key.
// To resolve this, we push both nodes down one level.
const childShift = shift + bits;
let child = emptyNode;
child = insertIntoNode(transient, child, key, value, hash, childShift);
const key2 = data[dataidx];
const value2 = data[dataidx + 1];
const hash2 = getHash(key2);
child = insertIntoNode(transient, child, key2, value2, hash2, childShift);
// we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap.
transient.size -= 1;
// remove the old data pair, and insert the new child node.
const length = data.length;
const nodeidx = length - 1 - index(node.nodemap, bit);
// writing these loops in javascript instead of a combination of splices
// turns out to be faster. Copying always turned out to be faster.
const newData = new Array(length - 1);
let readIndex = 0;
let writeIndex = 0;
// [0..dataidx, skip 2 elements, ..nodeidx, newChild, ..rest]
while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++];
readIndex += 2;
while (readIndex <= nodeidx) newData[writeIndex++] = data[readIndex++];
newData[writeIndex++] = child;
while (readIndex < length) newData[writeIndex++] = data[readIndex++];
return makeNode(generation, node.datamap ^ bit, node.nodemap | bit, newData);
}
/**
* Consume a transient, removing a key if it exists.
* Returns a new transient.
*/
export function destructiveTransientDelete(key, transient) {
const hash = getHash(key);
transient.root = deleteFromNode(transient, transient.root, key, hash, 0);
return transient;
}
function deleteFromNode(transient, node, key, hash, shift) {
const data = node.data;
const generation = transient.generation;
// 1. Overflow Node
// overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match.
if (shift > 32) {
for (let i = 0; i < data.length; i += 2) {
if (isEqual(key, data[i])) {
transient.size -= 1;
return copyAndRemovePair(node, generation, 0, i);
}
}
return node;
}
const bit = hashbit(hash, shift);
const dataidx = Math.imul(index(node.datamap, bit), 2);
// 2. Child Node
// We have to check first if there is already a child node we have to traverse to.
if ((node.nodemap & bit) !== 0) {
const nodeidx = data.length - 1 - index(node.nodemap, bit);
let child = data[nodeidx];
child = deleteFromNode(transient, child, key, hash, shift + bits);
// the node did change, so let's copy to incorporate that change.
if (child.nodemap !== 0 || child.data.length > 2) {
return copyAndSet(node, generation, nodeidx, child);
}
// this node only has a single data (k/v-pair) child.
// to restore the CHAMP invariant, we "pull" that pair up into ourselves.
// this ensures that every tree stays in its single optimal representation,
// and allows dicts to be structurally compared.
const length = data.length;
const newData = new Array(length + 1);
let readIndex = 0;
let writeIndex = 0;
while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++];
newData[writeIndex++] = child.data[0];
newData[writeIndex++] = child.data[1];
while (readIndex < nodeidx) newData[writeIndex++] = data[readIndex++];
readIndex++;
while (readIndex < length) newData[writeIndex++] = data[readIndex++];
return makeNode(generation, node.datamap | bit, node.nodemap ^ bit, newData);
}
// 3. Data Node
// There is no data entry here, or it is a prefix for a different key
if ((node.datamap & bit) === 0 || !isEqual(key, data[dataidx])) {
return node;
}
// we found a data entry that we can delete.
transient.size -= 1;
return copyAndRemovePair(node, generation, bit, dataidx);
}
export function map(dict, fun) {
// map can never modify the structure, so we can walk the dictionary directly,
// but still move to a new generation to make sure we get a new copy of every node.
const generation = nextGeneration(dict);
const root = copyNode(dict.root, generation);
const queue = [root];
while (queue.length) {
// order doesn't matter, so we can use push/pop for faster array usage.
const node = queue.pop();
const data = node.data;
// every node contains popcount(datamap) direct entries
// We need to query the length from the nodemap, as we don't know if this
// is an overflow node or not! if it is, it will never have datamap set!
const edgesStart = data.length - popcount(node.nodemap);
for (let i = 0; i < edgesStart; i += 2) {
// we copied the node while queueing it, so direct mutation here is safe.
data[i + 1] = fun(data[i], data[i + 1]);
}
// the remaining entries are other nodes we can queue
for (let i = edgesStart; i < data.length; ++i) {
// copy the node first to make it safe to mutate
data[i] = copyNode(data[i], generation);
queue.push(data[i]);
}
}
return new Dict(dict.size, root);
}
export function fold(dict, state, fun) {
const queue = [dict.root];
while (queue.length) {
// order doesn't matter, so we can use push/pop for faster array usage.
const node = queue.pop();
const data = node.data;
// every node contains popcount(datamap) direct entries
// We need to query the length from the nodemap, as we don't know if this
// is an overflow node or not! if it is, it will never have datamap set!
const edgesStart = data.length - popcount(node.nodemap);
for (let i = 0; i < edgesStart; i += 2) {
state = fun(state, data[i], data[i + 1]);
}
// the remaining entries are child nodes we can queue.
for (let i = edgesStart; i < data.length; ++i) {
queue.push(data[i]);
}
}
return state;
}
/**
* How many `1` bits are set in a 32-bit integer.
*/
function popcount(n) {
n -= (n >>> 1) & 0x55555555;
n = (n & 0x33333333) + ((n >>> 2) & 0x33333333);
return Math.imul((n + (n >>> 4)) & 0x0f0f0f0f, 0x01010101) >>> 24;
}
/**
* Given a population bitmap and a bit selected from that map, returns
* how many less significant 1 bits there are.
*
* For example, index(10101, 100) returns 1, since there is a single less
* significant `1` bit. This translates to the 0-based "index" of that bit.
*/
function index(bitmap, bit) {
return popcount(bitmap & (bit - 1));
}
/**
* Extracts a single slice of the hash, and returns a bitmask for the resulting value.
* For example, if the slice returns 5, this function returns 10000 = 1 << 5.
*/
function hashbit(hash, shift) {
return 1 << ((hash >>> shift) & mask);
}