Skip to main content

c_src/Makefile

CURDIR   := $(shell pwd)
BASEDIR	 := $(abspath $(dir $(CURDIR)))
CXX      ?= g++
PRIV_DIR ?= $(if $(REBAR_BARE_COMPILER_OUTPUT_DIR),$(REBAR_BARE_COMPILER_OUTPUT_DIR),$(BASEDIR))/priv
OBJ_DIR  ?= $(BASEDIR)/obj
DEBUG    ?= 0

SRC      := glazer_nif.cpp
OBJ      := $(OBJ_DIR)/glazer_nif.o

# erlang:load_nif/2 appends the platform-specific extension itself
# (.dll on Windows, .so elsewhere), so the build output must match.
ifneq (,$(findstring MINGW,$(shell uname -s))$(findstring MSYS,$(shell uname -s)))
  EXT := .dll
else
  EXT := .so
endif

TARGET           := $(PRIV_DIR)/glazer$(EXT)
ERL_ERTS_INCLUDE := $(shell erl -noshell -noinput \
  -eval "io:format(\"~ts/erts-~ts/include\",[code:root_dir(),erlang:system_info(version)]),halt(0).")
ERL_EI_INCLUDE   := $(shell erl -noshell -noinput \
  -eval "io:format(\"~ts\",[code:lib_dir(erl_interface,include)]),halt(0).")
ERL_EI_LIB       := $(shell erl -noshell -noinput \
  -eval "io:format(\"~ts\",[code:lib_dir(erl_interface,lib)]),halt(0).")

CXXFLAGS := -std=c++2b -fPIC -Wall -finline-functions -fvisibility=hidden \
            -I$(ERL_ERTS_INCLUDE) -I$(ERL_EI_INCLUDE) -I.

# Detect compiler family for LTO flags.
# AppleClang uses ld64 which handles thin-LTO natively — no -fuse-ld=lld.
# Upstream Clang on Linux emits LLVM bitcode that requires lld.
# GCC uses -flto=auto with GNU ld/gold.
CXX_ID := $(shell $(CXX) --version 2>&1 | head -1)
ifneq ($(findstring Apple,$(CXX_ID)),)
  LTO_FLAGS  := -flto=thin
  LINK_FLAGS :=
else ifneq ($(findstring clang,$(CXX_ID)),)
  LTO_FLAGS  := -flto=thin
  LINK_FLAGS := -fuse-ld=lld
else
  LTO_FLAGS  := -flto=auto -fno-fat-lto-objects
  LINK_FLAGS :=
endif

# Optional jq support (glazer:json_query/2) — detected via a compile+link
# probe since most distros don't ship a jq.pc pkg-config file. Falls back
# to pkg-config first in case a future libjq release adds one.
HAVE_JQ := $(shell pkg-config --exists jq 2>/dev/null && echo 1 || \
  (printf '\#include <jq.h>\nint main(){jq_init();return 0;}\n' | \
   $(CXX) -xc++ - -ljq -o /dev/null >/dev/null 2>&1 && echo 1 || echo 0))
ifeq ($(HAVE_JQ),1)
  CXXFLAGS += -DGLAZER_HAVE_JQ
  LDFLAGS  += -ljq
  $(info "==> Building glazer with jq support")
endif

# Profile-Guided Optimisation (PGO) — lets the compiler use real branch
# frequencies and call counts to lay out hot paths, inline more aggressively,
# and reorder basic blocks for better instruction-cache usage.  Typical gain
# on a JSON-heavy workload is 5–15 % over plain -O3.
#
# Three-step workflow (or just run `make pgo` from the project root):
#
#   1. make PGO=generate   — build an instrumented .so that writes profile
#                            data (*.gcda) into obj/pgo/ at runtime
#   2. run a representative workload (e.g. `rebar3 eunit`, a benchmark)
#   3. make PGO=use        — rebuild optimised using the collected profile
#
# Profile data lives in $(OBJ_DIR)/pgo/ and is removed by `make clean`.
# -fprofile-correction tolerates minor counter mismatches (e.g. if the
# workload was interrupted) instead of aborting the build.
PGO      ?= 0
PGO_DIR  := $(OBJ_DIR)/pgo
ASAN     ?= 0

# Build metadata exposed at runtime via glazer:info/0 — see nif_info() in
# glazer_nif.cpp. GLAZER_VERSION is the git describe of the checkout used to
# build this .so; GLAZER_APP_VERSION is the `vsn` from glazer.app.src;
# GLAZER_OPT_LEVEL/GLAZER_PGO reflect the flags chosen below.
GLAZER_VERSION     := $(shell git -C $(BASEDIR) describe --tags --dirty --always --abbrev=6 2>/dev/null | sed 's/g//; s/-dirty/*/')
GLAZER_APP_VERSION := $(shell sed -nE 's/.*\{vsn, *"([^"]+)".*/\1/p' $(BASEDIR)/src/*.app.src | head -n1)
CXXFLAGS += -DGLAZER_VERSION='"$(GLAZER_VERSION)"' -DGLAZER_APP_VERSION='"$(GLAZER_APP_VERSION)"'

ifeq ($(DEBUG),1)
  CXXFLAGS += -O0 -g -DGLAZER_OPT_LEVEL='"none"'
else ifeq ($(ASAN),1)
  # Compile with ASan instrumentation but do NOT link -fsanitize=address.
  # Linking it would embed or reference the runtime inside the NIF .so; when
  # the Erlang VM dlopen's that .so the runtime loads too late and ASan aborts.
  # Instead all __asan_* symbols stay undefined in the .so and are resolved
  # from the runtime preloaded via LD_PRELOAD (Linux only — see Makefile).
  CXXFLAGS += -O1 -g -fsanitize=address -fno-omit-frame-pointer -DGLAZER_OPT_LEVEL='"O1"'
else
  CXXFLAGS += -O3 -DNDEBUG -march=native -mtune=native $(LTO_FLAGS) -DGLAZER_OPT_LEVEL='"O3"'
  LDFLAGS  += $(LINK_FLAGS)
  ifeq ($(PGO),generate)
    CXXFLAGS += -fprofile-generate=$(PGO_DIR)
    LDFLAGS  += -fprofile-generate=$(PGO_DIR)
  else ifeq ($(PGO),use)
    CXXFLAGS += -fprofile-use=$(PGO_DIR) -fprofile-correction -Wno-missing-profile -DGLAZER_PGO=1
    LDFLAGS  += -fprofile-use=$(PGO_DIR)
  endif
endif

LDFLAGS += -shared -L$(ERL_EI_LIB) -lei
ifeq ($(shell uname -s),Darwin)
  LDFLAGS += -undefined dynamic_lookup
endif

.PHONY: all clean force

all: $(TARGET)

$(TARGET): $(OBJ) | $(PRIV_DIR)
	$(CXX) $(OBJ) $(LDFLAGS) -o $@

# When PGO=use the .o is stale by timestamp (same source, same flags string)
# but must be recompiled against the newly collected profile data — force it.
# Note: GCC writes .gcda files under a path derived from the source file's
# absolute path, so the layout inside $(PGO_DIR) is nested.  This is harmless:
# GCC searches $(PGO_DIR) recursively on -fprofile-use, and clean wipes the tree.
$(OBJ): $(SRC) $(wildcard *.hpp) $(if $(filter use,$(PGO)),force) \
        | $(OBJ_DIR) $(if $(filter generate use,$(PGO)),$(PGO_DIR))
	$(CXX) $(CXXFLAGS) -c $< -o $@

$(PRIV_DIR) $(OBJ_DIR) $(PGO_DIR):
	@mkdir -p $@

clean:
	rm -f $(OBJ) $(TARGET)
	rm -rf $(PGO_DIR)