Select File:
AGENTS.md
CHANGELOG.md
LICENSE
README.md
ROADMAP.md
UPDATE_LLAMA.md
c_src/CMake/FindErlang.cmake
c_src/CMakeLists.txt
c_src/crc32c.c
c_src/crc32c.h
c_src/erllama_nif.c
c_src/erllama_safe.cpp
c_src/llama.cpp/CMakeLists.txt
c_src/llama.cpp/LICENSE
c_src/llama.cpp/cmake/arm64-apple-clang.cmake
c_src/llama.cpp/cmake/arm64-linux-clang.cmake
c_src/llama.cpp/cmake/arm64-windows-llvm.cmake
c_src/llama.cpp/cmake/build-info.cmake
c_src/llama.cpp/cmake/common.cmake
c_src/llama.cpp/cmake/download-models.cmake
c_src/llama.cpp/cmake/git-vars.cmake
c_src/llama.cpp/cmake/license.cmake
c_src/llama.cpp/cmake/llama-config.cmake.in
c_src/llama.cpp/cmake/llama.pc.in
c_src/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
c_src/llama.cpp/cmake/x64-windows-llvm.cmake
c_src/llama.cpp/ggml/CMakeLists.txt
c_src/llama.cpp/ggml/cmake/FindNCCL.cmake
c_src/llama.cpp/ggml/cmake/GitVars.cmake
c_src/llama.cpp/ggml/cmake/common.cmake
c_src/llama.cpp/ggml/cmake/ggml-config.cmake.in
c_src/llama.cpp/ggml/include/ggml-alloc.h
c_src/llama.cpp/ggml/include/ggml-backend.h
c_src/llama.cpp/ggml/include/ggml-blas.h
c_src/llama.cpp/ggml/include/ggml-cann.h
c_src/llama.cpp/ggml/include/ggml-cpp.h
c_src/llama.cpp/ggml/include/ggml-cpu.h
c_src/llama.cpp/ggml/include/ggml-cuda.h
c_src/llama.cpp/ggml/include/ggml-hexagon.h
c_src/llama.cpp/ggml/include/ggml-metal.h
c_src/llama.cpp/ggml/include/ggml-opencl.h
c_src/llama.cpp/ggml/include/ggml-openvino.h
c_src/llama.cpp/ggml/include/ggml-opt.h
c_src/llama.cpp/ggml/include/ggml-rpc.h
c_src/llama.cpp/ggml/include/ggml-sycl.h
c_src/llama.cpp/ggml/include/ggml-virtgpu.h
c_src/llama.cpp/ggml/include/ggml-vulkan.h
c_src/llama.cpp/ggml/include/ggml-webgpu.h
c_src/llama.cpp/ggml/include/ggml-zdnn.h
c_src/llama.cpp/ggml/include/ggml-zendnn.h
c_src/llama.cpp/ggml/include/ggml.h
c_src/llama.cpp/ggml/include/gguf.h
c_src/llama.cpp/ggml/src/CMakeLists.txt
c_src/llama.cpp/ggml/src/ggml-alloc.c
c_src/llama.cpp/ggml/src/ggml-backend-dl.cpp
c_src/llama.cpp/ggml/src/ggml-backend-dl.h
c_src/llama.cpp/ggml/src/ggml-backend-impl.h
c_src/llama.cpp/ggml/src/ggml-backend-meta.cpp
c_src/llama.cpp/ggml/src/ggml-backend-reg.cpp
c_src/llama.cpp/ggml/src/ggml-backend.cpp
c_src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt
c_src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp
c_src/llama.cpp/ggml/src/ggml-common.h
c_src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
c_src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h
c_src/llama.cpp/ggml/src/ggml-cpu/amx/common.h
c_src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h
c_src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h
c_src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h
c_src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
c_src/llama.cpp/ggml/src/ggml-cpu/common.h
c_src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h
c_src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c
c_src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/hbm.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/hbm.h
c_src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h
c_src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h
c_src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h
c_src/llama.cpp/ggml/src/ggml-cpu/ops.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/ops.h
c_src/llama.cpp/ggml/src/ggml-cpu/quants.c
c_src/llama.cpp/ggml/src/ggml-cpu/quants.h
c_src/llama.cpp/ggml/src/ggml-cpu/repack.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/repack.h
c_src/llama.cpp/ggml/src/ggml-cpu/simd-gemm.h
c_src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h
c_src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h
c_src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h
c_src/llama.cpp/ggml/src/ggml-cpu/traits.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/traits.h
c_src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h
c_src/llama.cpp/ggml/src/ggml-cpu/vec.cpp
c_src/llama.cpp/ggml/src/ggml-cpu/vec.h
c_src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt
c_src/llama.cpp/ggml/src/ggml-cuda/acc.cu
c_src/llama.cpp/ggml/src/ggml-cuda/acc.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/add-id.cu
c_src/llama.cpp/ggml/src/ggml-cuda/add-id.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/arange.cu
c_src/llama.cpp/ggml/src/ggml-cuda/arange.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/argmax.cu
c_src/llama.cpp/ggml/src/ggml-cuda/argmax.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/argsort.cu
c_src/llama.cpp/ggml/src/ggml-cuda/argsort.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/binbcast.cu
c_src/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/clamp.cu
c_src/llama.cpp/ggml/src/ggml-cuda/clamp.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/common.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/concat.cu
c_src/llama.cpp/ggml/src/ggml-cuda/concat.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu
c_src/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d.cu
c_src/llama.cpp/ggml/src/ggml-cuda/conv2d.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/convert.cu
c_src/llama.cpp/ggml/src/ggml-cuda/convert.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/count-equal.cu
c_src/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/cpy.cu
c_src/llama.cpp/ggml/src/ggml-cuda/cpy.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu
c_src/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/cumsum.cu
c_src/llama.cpp/ggml/src/ggml-cuda/cumsum.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/diag.cu
c_src/llama.cpp/ggml/src/ggml-cuda/diag.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/diagmask.cu
c_src/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fattn.cu
c_src/llama.cpp/ggml/src/ggml-cuda/fattn.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/fill.cu
c_src/llama.cpp/ggml/src/ggml-cuda/fill.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/gated_delta_net.cu
c_src/llama.cpp/ggml/src/ggml-cuda/gated_delta_net.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/getrows.cu
c_src/llama.cpp/ggml/src/ggml-cuda/getrows.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu
c_src/llama.cpp/ggml/src/ggml-cuda/gla.cu
c_src/llama.cpp/ggml/src/ggml-cuda/gla.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/im2col.cu
c_src/llama.cpp/ggml/src/ggml-cuda/im2col.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mean.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mean.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mma.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mmf.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mmf.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mmid.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mmid.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mmq.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mmq.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mmvf.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/mmvq.cu
c_src/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/norm.cu
c_src/llama.cpp/ggml/src/ggml-cuda/norm.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu
c_src/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/opt-step-sgd.cu
c_src/llama.cpp/ggml/src/ggml-cuda/opt-step-sgd.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/out-prod.cu
c_src/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/pad.cu
c_src/llama.cpp/ggml/src/ggml-cuda/pad.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu
c_src/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/pool2d.cu
c_src/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/quantize.cu
c_src/llama.cpp/ggml/src/ggml-cuda/quantize.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/reduce_rows.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/roll.cu
c_src/llama.cpp/ggml/src/ggml-cuda/roll.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/rope.cu
c_src/llama.cpp/ggml/src/ggml-cuda/rope.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/scale.cu
c_src/llama.cpp/ggml/src/ggml-cuda/scale.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/set-rows.cu
c_src/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/set.cu
c_src/llama.cpp/ggml/src/ggml-cuda/set.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/snake.cu
c_src/llama.cpp/ggml/src/ggml-cuda/snake.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/softcap.cu
c_src/llama.cpp/ggml/src/ggml-cuda/softcap.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/softmax.cu
c_src/llama.cpp/ggml/src/ggml-cuda/softmax.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/solve_tri.cu
c_src/llama.cpp/ggml/src/ggml-cuda/solve_tri.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu
c_src/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu
c_src/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/sum.cu
c_src/llama.cpp/ggml/src/ggml-cuda/sum.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/sumrows.cu
c_src/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
c_src/llama.cpp/ggml/src/ggml-cuda/top-k.cu
c_src/llama.cpp/ggml/src/ggml-cuda/top-k.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu
c_src/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/tri.cu
c_src/llama.cpp/ggml/src/ggml-cuda/tri.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/tsembd.cu
c_src/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/unary.cu
c_src/llama.cpp/ggml/src/ggml-cuda/unary.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/upscale.cu
c_src/llama.cpp/ggml/src/ggml-cuda/upscale.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh
c_src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h
c_src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h
c_src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h
c_src/llama.cpp/ggml/src/ggml-cuda/wkv.cu
c_src/llama.cpp/ggml/src/ggml-cuda/wkv.cuh
c_src/llama.cpp/ggml/src/ggml-impl.h
c_src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp
c_src/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal
c_src/llama.cpp/ggml/src/ggml-opt.cpp
c_src/llama.cpp/ggml/src/ggml-quants.c
c_src/llama.cpp/ggml/src/ggml-quants.h
c_src/llama.cpp/ggml/src/ggml-threading.cpp
c_src/llama.cpp/ggml/src/ggml-threading.h
c_src/llama.cpp/ggml/src/ggml.c
c_src/llama.cpp/ggml/src/ggml.cpp
c_src/llama.cpp/ggml/src/gguf.cpp
c_src/llama.cpp/include/llama-cpp.h
c_src/llama.cpp/include/llama.h
c_src/llama.cpp/src/CMakeLists.txt
c_src/llama.cpp/src/llama-adapter.cpp
c_src/llama.cpp/src/llama-adapter.h
c_src/llama.cpp/src/llama-arch.cpp
c_src/llama.cpp/src/llama-arch.h
c_src/llama.cpp/src/llama-batch.cpp
c_src/llama.cpp/src/llama-batch.h
c_src/llama.cpp/src/llama-chat.cpp
c_src/llama.cpp/src/llama-chat.h
c_src/llama.cpp/src/llama-context.cpp
c_src/llama.cpp/src/llama-context.h
c_src/llama.cpp/src/llama-cparams.cpp
c_src/llama.cpp/src/llama-cparams.h
c_src/llama.cpp/src/llama-ext.h
c_src/llama.cpp/src/llama-grammar.cpp
c_src/llama.cpp/src/llama-grammar.h
c_src/llama.cpp/src/llama-graph.cpp
c_src/llama.cpp/src/llama-graph.h
c_src/llama.cpp/src/llama-hparams.cpp
c_src/llama.cpp/src/llama-hparams.h
c_src/llama.cpp/src/llama-impl.cpp
c_src/llama.cpp/src/llama-impl.h
c_src/llama.cpp/src/llama-io.cpp
c_src/llama.cpp/src/llama-io.h
c_src/llama.cpp/src/llama-kv-cache-iswa.cpp
c_src/llama.cpp/src/llama-kv-cache-iswa.h
c_src/llama.cpp/src/llama-kv-cache.cpp
c_src/llama.cpp/src/llama-kv-cache.h
c_src/llama.cpp/src/llama-kv-cells.h
c_src/llama.cpp/src/llama-memory-hybrid-iswa.cpp
c_src/llama.cpp/src/llama-memory-hybrid-iswa.h
c_src/llama.cpp/src/llama-memory-hybrid.cpp
c_src/llama.cpp/src/llama-memory-hybrid.h
c_src/llama.cpp/src/llama-memory-recurrent.cpp
c_src/llama.cpp/src/llama-memory-recurrent.h
c_src/llama.cpp/src/llama-memory.cpp
c_src/llama.cpp/src/llama-memory.h
c_src/llama.cpp/src/llama-mmap.cpp
c_src/llama.cpp/src/llama-mmap.h
c_src/llama.cpp/src/llama-model-loader.cpp
c_src/llama.cpp/src/llama-model-loader.h
c_src/llama.cpp/src/llama-model-saver.cpp
c_src/llama.cpp/src/llama-model-saver.h
c_src/llama.cpp/src/llama-model.cpp
c_src/llama.cpp/src/llama-model.h
c_src/llama.cpp/src/llama-quant.cpp
c_src/llama.cpp/src/llama-quant.h
c_src/llama.cpp/src/llama-sampler.cpp
c_src/llama.cpp/src/llama-sampler.h
c_src/llama.cpp/src/llama-vocab.cpp
c_src/llama.cpp/src/llama-vocab.h
c_src/llama.cpp/src/llama.cpp
c_src/llama.cpp/src/models/afmoe.cpp
c_src/llama.cpp/src/models/apertus.cpp
c_src/llama.cpp/src/models/arcee.cpp
c_src/llama.cpp/src/models/arctic.cpp
c_src/llama.cpp/src/models/arwkv7.cpp
c_src/llama.cpp/src/models/baichuan.cpp
c_src/llama.cpp/src/models/bailingmoe.cpp
c_src/llama.cpp/src/models/bailingmoe2.cpp
c_src/llama.cpp/src/models/bert.cpp
c_src/llama.cpp/src/models/bitnet.cpp
c_src/llama.cpp/src/models/bloom.cpp
c_src/llama.cpp/src/models/chameleon.cpp
c_src/llama.cpp/src/models/chatglm.cpp
c_src/llama.cpp/src/models/codeshell.cpp
c_src/llama.cpp/src/models/cogvlm.cpp
c_src/llama.cpp/src/models/cohere2.cpp
c_src/llama.cpp/src/models/command-r.cpp
c_src/llama.cpp/src/models/dbrx.cpp
c_src/llama.cpp/src/models/deci.cpp
c_src/llama.cpp/src/models/deepseek.cpp
c_src/llama.cpp/src/models/deepseek2.cpp
c_src/llama.cpp/src/models/deepseek2ocr.cpp
c_src/llama.cpp/src/models/delta-net-base.cpp
c_src/llama.cpp/src/models/dots1.cpp
c_src/llama.cpp/src/models/dream.cpp
c_src/llama.cpp/src/models/ernie4-5-moe.cpp
c_src/llama.cpp/src/models/ernie4-5.cpp
c_src/llama.cpp/src/models/eurobert.cpp
c_src/llama.cpp/src/models/exaone-moe.cpp
c_src/llama.cpp/src/models/exaone.cpp
c_src/llama.cpp/src/models/exaone4.cpp
c_src/llama.cpp/src/models/falcon-h1.cpp
c_src/llama.cpp/src/models/falcon.cpp
c_src/llama.cpp/src/models/gemma-embedding.cpp
c_src/llama.cpp/src/models/gemma.cpp
c_src/llama.cpp/src/models/gemma2.cpp
c_src/llama.cpp/src/models/gemma3.cpp
c_src/llama.cpp/src/models/gemma3n.cpp
c_src/llama.cpp/src/models/gemma4.cpp
c_src/llama.cpp/src/models/glm-dsa.cpp
c_src/llama.cpp/src/models/glm4-moe.cpp
c_src/llama.cpp/src/models/glm4.cpp
c_src/llama.cpp/src/models/gpt2.cpp
c_src/llama.cpp/src/models/gptneox.cpp
c_src/llama.cpp/src/models/granite-hybrid.cpp
c_src/llama.cpp/src/models/granite-moe.cpp
c_src/llama.cpp/src/models/granite.cpp
c_src/llama.cpp/src/models/grok.cpp
c_src/llama.cpp/src/models/grovemoe.cpp
c_src/llama.cpp/src/models/hunyuan-dense.cpp
c_src/llama.cpp/src/models/hunyuan-moe.cpp
c_src/llama.cpp/src/models/hunyuan-vl.cpp
c_src/llama.cpp/src/models/internlm2.cpp
c_src/llama.cpp/src/models/jais.cpp
c_src/llama.cpp/src/models/jais2.cpp
c_src/llama.cpp/src/models/jamba.cpp
c_src/llama.cpp/src/models/jina-bert-v2.cpp
c_src/llama.cpp/src/models/jina-bert-v3.cpp
c_src/llama.cpp/src/models/kimi-linear.cpp
c_src/llama.cpp/src/models/lfm2.cpp
c_src/llama.cpp/src/models/lfm2moe.cpp
c_src/llama.cpp/src/models/llada-moe.cpp
c_src/llama.cpp/src/models/llada.cpp
c_src/llama.cpp/src/models/llama-embed.cpp
c_src/llama.cpp/src/models/llama.cpp
c_src/llama.cpp/src/models/llama4.cpp
c_src/llama.cpp/src/models/maincoder.cpp
c_src/llama.cpp/src/models/mamba-base.cpp
c_src/llama.cpp/src/models/mamba.cpp
c_src/llama.cpp/src/models/mamba2.cpp
c_src/llama.cpp/src/models/mimo2.cpp
c_src/llama.cpp/src/models/minicpm.cpp
c_src/llama.cpp/src/models/minicpm3.cpp
c_src/llama.cpp/src/models/minimax-m2.cpp
c_src/llama.cpp/src/models/mistral3.cpp
c_src/llama.cpp/src/models/mistral4.cpp
c_src/llama.cpp/src/models/models.h
c_src/llama.cpp/src/models/modern-bert.cpp
c_src/llama.cpp/src/models/mpt.cpp
c_src/llama.cpp/src/models/nemotron-h-moe.cpp
c_src/llama.cpp/src/models/nemotron-h.cpp
c_src/llama.cpp/src/models/nemotron.cpp
c_src/llama.cpp/src/models/neo-bert.cpp
c_src/llama.cpp/src/models/nomic-bert-moe.cpp
c_src/llama.cpp/src/models/nomic-bert.cpp
c_src/llama.cpp/src/models/olmo.cpp
c_src/llama.cpp/src/models/olmo2.cpp
c_src/llama.cpp/src/models/olmoe.cpp
c_src/llama.cpp/src/models/openai-moe.cpp
c_src/llama.cpp/src/models/openelm.cpp
c_src/llama.cpp/src/models/orion.cpp
c_src/llama.cpp/src/models/paddleocr.cpp
c_src/llama.cpp/src/models/pangu-embed.cpp
c_src/llama.cpp/src/models/phi2.cpp
c_src/llama.cpp/src/models/phi3.cpp
c_src/llama.cpp/src/models/phimoe.cpp
c_src/llama.cpp/src/models/plamo.cpp
c_src/llama.cpp/src/models/plamo2.cpp
c_src/llama.cpp/src/models/plamo3.cpp
c_src/llama.cpp/src/models/plm.cpp
c_src/llama.cpp/src/models/qwen.cpp
c_src/llama.cpp/src/models/qwen2.cpp
c_src/llama.cpp/src/models/qwen2moe.cpp
c_src/llama.cpp/src/models/qwen2vl.cpp
c_src/llama.cpp/src/models/qwen3.cpp
c_src/llama.cpp/src/models/qwen35.cpp
c_src/llama.cpp/src/models/qwen35moe.cpp
c_src/llama.cpp/src/models/qwen3moe.cpp
c_src/llama.cpp/src/models/qwen3next.cpp
c_src/llama.cpp/src/models/qwen3vl.cpp
c_src/llama.cpp/src/models/qwen3vlmoe.cpp
c_src/llama.cpp/src/models/refact.cpp
c_src/llama.cpp/src/models/rnd1.cpp
c_src/llama.cpp/src/models/rwkv6-base.cpp
c_src/llama.cpp/src/models/rwkv6.cpp
c_src/llama.cpp/src/models/rwkv6qwen2.cpp
c_src/llama.cpp/src/models/rwkv7-base.cpp
c_src/llama.cpp/src/models/rwkv7.cpp
c_src/llama.cpp/src/models/seed-oss.cpp
c_src/llama.cpp/src/models/smallthinker.cpp
c_src/llama.cpp/src/models/smollm3.cpp
c_src/llama.cpp/src/models/stablelm.cpp
c_src/llama.cpp/src/models/starcoder.cpp
c_src/llama.cpp/src/models/starcoder2.cpp
c_src/llama.cpp/src/models/step35.cpp
c_src/llama.cpp/src/models/t5.cpp
c_src/llama.cpp/src/models/t5encoder.cpp
c_src/llama.cpp/src/models/wavtokenizer-dec.cpp
c_src/llama.cpp/src/models/xverse.cpp
c_src/llama.cpp/src/unicode-data.cpp
c_src/llama.cpp/src/unicode-data.h
c_src/llama.cpp/src/unicode.cpp
c_src/llama.cpp/src/unicode.h
config/sys.config
do_cmake.sh
do_llama.sh
guides/building.md
guides/caching.md
guides/configuration.md
guides/examples.md
guides/loading.md
include/erllama_cache.hrl
internals/cache-design.md
internals/nif-safety.md
internals/publish-protocol.md
rebar.config
rebar.lock
src/erllama.app.src
src/erllama.erl
src/erllama_app.erl
src/erllama_cache.erl
src/erllama_cache_bench.erl
src/erllama_cache_counters.erl
src/erllama_cache_disk_srv.erl
src/erllama_cache_key.erl
src/erllama_cache_kvc.erl
src/erllama_cache_meta_srv.erl
src/erllama_cache_policy.erl
src/erllama_cache_ram.erl
src/erllama_cache_ramfile_srv.erl
src/erllama_cache_sup.erl
src/erllama_cache_writer.erl
src/erllama_inflight.erl
src/erllama_model.erl
src/erllama_model_backend.erl
src/erllama_model_llama.erl
src/erllama_model_stub.erl
src/erllama_model_sup.erl
src/erllama_nif.erl
src/erllama_pressure.erl
src/erllama_pressure_nvidia_smi.erl
src/erllama_pressure_system.erl
src/erllama_registry.erl
src/erllama_scheduler.erl
src/erllama_sup.erl
rebar.lock
[].
About Hex
About
Blog
Sponsors
GitHub
Twitter
Help
Documentation
Specifications
Report Client Issue
Report General Issue
Contact Support
Policies and Terms
Code of Conduct
Terms of Service
Privacy Policy
Copyright Policy
Dispute Policy
2026 © Six Colors AB.
Powered by the
Erlang VM
and the
Elixir programming language
.