Skip to content

Using ramalama run --rag makes the model unable to answer things outside the RAG #1260

Open
@jlebon

Description

@jlebon

Issue Description

When running with a RAG,

  1. The prompt changes (instead of the seal emoji, it's just >)
  2. The model becomes very slow
  3. But more importantly, the model seems to know nothing about things outside the RAG

Steps to reproduce the issue

Without RAG:

$ ramalama run qwen2.5-coder:14b
🦭 > how do I reverse a list in Python?
In Python, you can reverse a list in several ways. Here are three common methods:

...

With RAG:

$ echo "Jonathan's favourite food is pizza." > out.md
$ ramalama --image quay.io/ramalama/ramalama-rag rag out.md localhost/myrag:0.1

Building localhost/myrag:0.1...
adding vectordb...
138bd1bdce699c0e73dd52603b409762a680da4d9372a733ddfa84060e78fabb
$ ramalama run --rag localhost/myrag:0.1 qwen2.5-coder:14b
> What's Jonathan's favourite food?
Pizza
> how do I reverse a list in Python?
!['I don't know']

Describe the results you received

It doesn't know how to answer questions it previously did when run without the RAG.

Describe the results you expected

It knows how to answer questions it previously did when run without the RAG.

ramalama info output

{
    "Accelerator": "none",
    "Engine": {
        "Info": {
            "host": {
                "arch": "amd64",
                "buildahVersion": "1.39.2",
                "cgroupControllers": [
                    "cpu",
                    "io",
                    "memory",
                    "pids"
                ],
                "cgroupManager": "systemd",
                "cgroupVersion": "v2",
                "conmon": {
                    "package": "conmon-2.1.13-1.fc41.x86_64",
                    "path": "/usr/bin/conmon",
                    "version": "conmon version 2.1.13, commit: "
                },
                "cpuUtilization": {
                    "idlePercent": 98.76,
                    "systemPercent": 0.27,
                    "userPercent": 0.98
                },
                "cpus": 16,
                "databaseBackend": "sqlite",
                "distribution": {
                    "distribution": "fedora",
                    "variant": "silverblue",
                    "version": "41"
                },
                "eventLogger": "journald",
                "freeLocks": 2043,
                "hostname": "flux",
                "idMappings": {
                    "gidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 100000,
                            "size": 65536
                        }
                    ],
                    "uidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 100000,
                            "size": 65536
                        }
                    ]
                },
                "kernel": "6.13.9-200.fc41.x86_64",
                "linkmode": "dynamic",
                "logDriver": "journald",
                "memFree": 13267943424,
                "memTotal": 67100880896,
                "networkBackend": "netavark",
                "networkBackendInfo": {
                    "backend": "netavark",
                    "dns": {
                        "package": "aardvark-dns-1.14.0-1.fc41.x86_64",
                        "path": "/usr/libexec/podman/aardvark-dns",
                        "version": "aardvark-dns 1.14.0"
                    },
                    "package": "netavark-1.14.1-1.fc41.x86_64",
                    "path": "/usr/libexec/podman/netavark",
                    "version": "netavark 1.14.1"
                },
                "ociRuntime": {
                    "name": "crun",
                    "package": "crun-1.20-2.fc41.x86_64",
                    "path": "/usr/bin/crun",
                    "version": "crun version 1.20\ncommit: 9c9a76ac11994701dd666c4f0b869ceffb599a66\nrundir: /run/user/1000/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +WASM:wasmedge +YAJL"
                },
                "os": "linux",
                "pasta": {
                    "executable": "/usr/bin/pasta",
                    "package": "passt-0^20250320.g32f6212-2.fc41.x86_64",
                    "version": ""
                },
                "remoteSocket": {
                    "exists": true,
                    "path": "/run/user/1000/podman/podman.sock"
                },
                "rootlessNetworkCmd": "pasta",
                "security": {
                    "apparmorEnabled": false,
                    "capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
                    "rootless": true,
                    "seccompEnabled": true,
                    "seccompProfilePath": "/usr/share/containers/seccomp.json",
                    "selinuxEnabled": true
                },
                "serviceIsRemote": false,
                "slirp4netns": {
                    "executable": "/usr/bin/slirp4netns",
                    "package": "slirp4netns-1.3.1-1.fc41.x86_64",
                    "version": "slirp4netns version 1.3.1\ncommit: e5e368c4f5db6ae75c2fce786e31eef9da6bf236\nlibslirp: 4.8.0\nSLIRP_CONFIG_VERSION_MAX: 5\nlibseccomp: 2.5.5"
                },
                "swapFree": 5491326976,
                "swapTotal": 8589930496,
                "uptime": "201h 32m 52.00s (Approximately 8.38 days)",
                "variant": ""
            },
            "plugins": {
                "authorization": null,
                "log": [
                    "k8s-file",
                    "none",
                    "passthrough",
                    "journald"
                ],
                "network": [
                    "bridge",
                    "macvlan",
                    "ipvlan"
                ],
                "volume": [
                    "local"
                ]
            },
            "registries": {
                "search": [
                    "registry.fedoraproject.org",
                    "registry.access.redhat.com",
                    "docker.io"
                ]
            },
            "store": {
                "configFile": "/var/home/jlebon/.config/containers/storage.conf",
                "containerStore": {
                    "number": 3,
                    "paused": 0,
                    "running": 1,
                    "stopped": 2
                },
                "graphDriverName": "overlay",
                "graphOptions": {},
                "graphRoot": "/var/home/jlebon/.local/share/containers/storage",
                "graphRootAllocated": 1022488477696,
                "graphRootUsed": 908609159168,
                "graphStatus": {
                    "Backing Filesystem": "btrfs",
                    "Native Overlay Diff": "true",
                    "Supports d_type": "true",
                    "Supports shifting": "false",
                    "Supports volatile": "true",
                    "Using metacopy": "false"
                },
                "imageCopyTmpDir": "/var/tmp",
                "imageStore": {
                    "number": 263
                },
                "runRoot": "/run/user/1000/containers",
                "transientStore": false,
                "volumePath": "/var/home/jlebon/.local/share/containers/storage/volumes"
            },
            "version": {
                "APIVersion": "5.4.1",
                "BuildOrigin": "Fedora Project",
                "Built": 1741651200,
                "BuiltTime": "Mon Mar 10 20:00:00 2025",
                "GitCommit": "b79bc8afe796cba51dd906270a7e1056ccdfcf9e",
                "GoVersion": "go1.23.7",
                "Os": "linux",
                "OsArch": "linux/amd64",
                "Version": "5.4.1"
            }
        },
        "Name": "podman"
    },
    "Image": "quay.io/ramalama/ramalama:0.7",
    "Runtime": "llama.cpp",
    "Store": "/var/home/jlebon/.local/share/ramalama",
    "UseContainer": true,
    "Version": "0.7.4"
}

Upstream Latest Release

No

Additional environment details

No response

Additional information

Unrelated: you need to update

description: Have you tried running the [latest upstream release](https://github.com/containers/podman/releases/latest)
.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions