Telosnex
diff --git a/‎example/macos/Podfile.lock
Lines changed: 3 additions & 3 deletions b/‎example/macos/Podfile.lock
Lines changed: 3 additions & 3 deletions
diff --git a/‎example/macos/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
Lines changed: 1 addition & 0 deletions b/‎example/macos/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
Lines changed: 1 addition & 0 deletions
diff --git a/‎macos/llama.cpp/.devops/cpu.Dockerfile
Lines changed: 12 additions & 1 deletion b/‎macos/llama.cpp/.devops/cpu.Dockerfile
Lines changed: 12 additions & 1 deletion
diff --git a/‎macos/llama.cpp/.devops/cuda.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎macos/llama.cpp/.devops/cuda.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎macos/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
Lines changed: 2 additions & 2 deletions b/‎macos/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
Lines changed: 2 additions & 2 deletions
diff --git a/‎macos/llama.cpp/.devops/llama-cpp.srpm.spec
Lines changed: 2 additions & 2 deletions b/‎macos/llama.cpp/.devops/llama-cpp.srpm.spec
Lines changed: 2 additions & 2 deletions
diff --git a/‎macos/llama.cpp/.devops/musa.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎macos/llama.cpp/.devops/musa.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎macos/llama.cpp/.devops/nix/package.nix
Lines changed: 3 additions & 3 deletions b/‎macos/llama.cpp/.devops/nix/package.nix
Lines changed: 3 additions & 3 deletions
diff --git a/‎macos/llama.cpp/.devops/rocm.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎macos/llama.cpp/.devops/rocm.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎macos/llama.cpp/.devops/tools.sh
Lines changed: 9 additions & 1 deletion b/‎macos/llama.cpp/.devops/tools.sh
Lines changed: 9 additions & 1 deletion
diff --git a/‎macos/llama.cpp/.devops/vulkan.Dockerfile
Lines changed: 6 additions & 5 deletions b/‎macos/llama.cpp/.devops/vulkan.Dockerfile
Lines changed: 6 additions & 5 deletions
diff --git a/‎macos/llama.cpp/.editorconfig
Lines changed: 8 additions & 0 deletions b/‎macos/llama.cpp/.editorconfig
Lines changed: 8 additions & 0 deletions
diff --git a/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
Lines changed: 3 additions & 3 deletions b/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
Lines changed: 1 addition & 1 deletion b/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
Lines changed: 2 additions & 2 deletions b/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
Lines changed: 3 additions & 3 deletions b/‎macos/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎macos/llama.cpp/.github/pull_request_template.md
Lines changed: 1 addition & 1 deletion b/‎macos/llama.cpp/.github/pull_request_template.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎macos/llama.cpp/.github/workflows/bench.yml.disabled
Lines changed: 1 addition & 11 deletions b/‎macos/llama.cpp/.github/workflows/bench.yml.disabled
Lines changed: 1 addition & 11 deletions
@@ -25,10 +25,10 @@ EXTERNAL SOURCES:
     :path: Flutter/ephemeral/.symlinks/plugins/shared_preferences_foundation/darwin
 
 SPEC CHECKSUMS:
-  file_selector_macos: 54fdab7caa3ac3fc43c9fac4d7d8d231277f8cf2
-  fllama: 26c3b6cb0e6466f5d492ce5996717c7222f22bd9
+  file_selector_macos: 585232b688707857504f9cb5f985a7c97fe4dd30
+  fllama: 70eecc4bce57bde96fc985659224585564a2bfa2
   FlutterMacOS: 8f6f14fa908a6fb3fba0cd85dbd81ec4b251fb24
-  shared_preferences_foundation: fcdcbc04712aee1108ac7fda236f363274528f78
+  shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
 
 PODFILE CHECKSUM: 236401fc2c932af29a9fcf0e97baeeb2d750d367
 
 
@@ -59,6 +59,7 @@
       ignoresPersistentStateOnLaunch = "NO"
       debugDocumentVersioning = "YES"
       debugServiceExtension = "internal"
+      enableGPUValidationMode = "1"
       allowLocationSimulation = "YES">
       <BuildableProductRunnable
          runnableDebuggingMode = "0">
 
@@ -2,14 +2,25 @@ ARG UBUNTU_VERSION=22.04
 
 FROM ubuntu:$UBUNTU_VERSION AS build
 
+ARG TARGETARCH
+
+ARG GGML_CPU_ARM_ARCH=armv8-a
+
 RUN apt-get update && \
     apt-get install -y build-essential git cmake libcurl4-openssl-dev
 
 WORKDIR /app
 
 COPY . .
 
-RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
+    elif [ "$TARGETARCH" = "arm64" ]; then \
+        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
+    else \
+        echo "Unsupported architecture"; \
+        exit 1; \
+    fi && \
     cmake --build build -j $(nproc)
 
 RUN mkdir -p /app/lib && \
 
@@ -1,6 +1,6 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
-ARG CUDA_VERSION=12.6.0
+ARG CUDA_VERSION=12.4.0
 # Target the CUDA build image
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 
 
@@ -17,10 +17,10 @@ Version:        %( date "+%%Y%%m%%d" )
 Release:        1%{?dist}
 Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
 License:        MIT
-Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
+Source0:        https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
 BuildRequires:  coreutils make gcc-c++ git cuda-toolkit
 Requires:       cuda-toolkit
-URL:            https://github.com/ggerganov/llama.cpp
+URL:            https://github.com/ggml-org/llama.cpp
 
 %define debug_package %{nil}
 %define source_date_epoch_from_changelog 0
 
@@ -18,10 +18,10 @@ Version:        %( date "+%%Y%%m%%d" )
 Release:        1%{?dist}
 Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
 License:        MIT
-Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
+Source0:        https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
 BuildRequires:  coreutils make gcc-c++ git libstdc++-devel
 Requires:       libstdc++
-URL:            https://github.com/ggerganov/llama.cpp
+URL:            https://github.com/ggml-org/llama.cpp
 
 %define debug_package %{nil}
 %define source_date_epoch_from_changelog 0
 
@@ -1,6 +1,6 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
-ARG MUSA_VERSION=rc3.1.0
+ARG MUSA_VERSION=rc3.1.1
 # Target the MUSA build image
 ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 
 
@@ -133,12 +133,12 @@ effectiveStdenv.mkDerivation (finalAttrs: {
       --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
   '';
 
-  # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
+  # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
   # `default.metallib` may be compiled with Metal compiler from XCode
   # and we need to escape sandbox on MacOS to access Metal compiler.
   # `xcrun` is used find the path of the Metal compiler, which is varible
   # and not on $PATH
-  # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
+  # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
   __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
 
   nativeBuildInputs =
@@ -220,7 +220,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
     broken = (useMetalKit && !effectiveStdenv.isDarwin);
 
     description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-    homepage = "https://github.com/ggerganov/llama.cpp/";
+    homepage = "https://github.com/ggml-org/llama.cpp/";
     license = lib.licenses.mit;
 
     # Accommodates `nix run` and `lib.getExe`
 
@@ -11,7 +11,7 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co
 FROM ${BASE_ROCM_DEV_CONTAINER} AS build
 
 # Unless otherwise specified, we make a fat build.
-# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
+# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
 # This is mostly tied to rocBLAS supported archs.
 # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
 # gfx906 is deprecated
 
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
     exec ./llama-quantize "$@"
 elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
     exec ./llama-cli "$@"
+elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
+    exec ./llama-bench "$@"
+elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
+    exec ./llama-perplexity "$@"
 elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
     echo "Converting PTH to GGML..."
-    for i in `ls $1/$2/ggml-model-f16.bin*`; do
+    for i in $(ls $1/$2/ggml-model-f16.bin*); do
         if [ -f "${i/f16/q4_0}" ]; then
             echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
         else
@@ -30,6 +34,10 @@ else
     echo "Available commands: "
     echo "  --run (-r): Run a model previously converted into ggml"
     echo "              ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
+    echo "  --bench (-b): Benchmark the performance of the inference for various parameters."
+    echo "              ex: -m model.gguf"
+    echo "  --perplexity (-p): Measure the perplexity of a model over a given text."
+    echo "              ex: -m model.gguf -f file.txt"
     echo "  --convert (-c): Convert a llama model into ggml"
     echo "              ex: --outtype f16 \"/models/7B/\" "
     echo "  --quantize (-q): Optimize with quantization process ggml"
 
@@ -1,4 +1,4 @@
-ARG UBUNTU_VERSION=jammy
+ARG UBUNTU_VERSION=24.04
 
 FROM ubuntu:$UBUNTU_VERSION AS build
 
@@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget
 
 # Install Vulkan SDK and cURL
 RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
     apt update -y && \
     apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
 
@@ -34,7 +34,7 @@ RUN mkdir -p /app/full \
 FROM ubuntu:$UBUNTU_VERSION AS base
 
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl libvulkan-dev \
     && apt autoremove -y \
     && apt clean -y \
     && rm -rf /tmp/* /var/tmp/* \
@@ -55,8 +55,9 @@ RUN apt-get update \
     git \
     python3 \
     python3-pip \
-    && pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt \
+    python3-wheel \
+    && pip install --break-system-packages --upgrade setuptools \
+    && pip install --break-system-packages -r requirements.txt \
     && apt autoremove -y \
     && apt clean -y \
     && rm -rf /tmp/* /var/tmp/* \
 
@@ -40,3 +40,11 @@ indent_style = tab
 [examples/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
+
+[models/templates/*.jinja]
+indent_style = unset
+indent_size = unset
+end_of_line = unset
+charset = unset
+trim_trailing_whitespace = unset
+insert_final_newline = unset
@@ -6,7 +6,7 @@ body:
   - type: markdown
     attributes:
       value: |
-        [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
+        [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
 
   - type: checkboxes
     id: prerequisites
@@ -16,11 +16,11 @@ body:
       options:
         - label: I am running the latest code. Mention the version if possible as well.
           required: true
-        - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
+        - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
           required: true
         - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
           required: true
-        - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
+        - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
           required: true
 
   - type: textarea
 
@@ -6,7 +6,7 @@ body:
   - type: markdown
     attributes:
       value: |
-        Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
+        Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
 
   - type: checkboxes
     id: research-stage
 
@@ -6,8 +6,8 @@ body:
   - type: markdown
     attributes:
       value: |
-        Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
-        Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
+        Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
+        Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
 
   - type: textarea
     id: background-description
 
@@ -1,11 +1,11 @@
 blank_issues_enabled: true
 contact_links:
   - name: Got an idea?
-    url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
+    url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
     about: Pop it there. It may then become an enhancement ticket.
   - name: Got a question?
-    url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
+    url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
     about: Ask a question there!
   - name: Want to contribute?
-    url: https://github.com/ggerganov/llama.cpp/wiki/contribute
+    url: https://github.com/ggml-org/llama.cpp/wiki/contribute
     about: Head to the contribution guide page of the wiki for areas you can help with
@@ -1 +1 @@
-*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
+*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
@@ -1,5 +1,5 @@
 # TODO: there have been some issues with the workflow, so disabling for now
-#       https://github.com/ggerganov/llama.cpp/issues/7893
+#       https://github.com/ggml-org/llama.cpp/issues/7893
 #
 # Benchmark
 name: Benchmark
@@ -57,17 +57,7 @@ jobs:
 
     if: |
       inputs.gpu-series == 'Standard_NC4as_T4_v3'
-      || (
-        github.event_name == 'schedule'
-        && github.ref_name == 'master'
-        && github.repository_owner == 'ggerganov'
-      )
       || github.event_name == 'pull_request_target'
-      || (
-        github.event_name == 'push'
-        && github.event.ref == 'refs/heads/master'
-        && github.repository_owner == 'ggerganov'
-      )
     steps:
       - name: Clone
         id: checkout
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR`
	`1`	`+Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR`