Skip to content

Don't use jinja in the multimodal case #1435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 24, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 29 additions & 22 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,12 +322,16 @@ def setup_mounts(self, model_path, args):
if self.store is not None:
_, tag, _ = self.extract_model_identifiers()
ref_file = self.store.get_ref_file(tag)
if ref_file is not None and ref_file.chat_template_name != "":
chat_template_path = self.store.get_snapshot_file_path(ref_file.hash, ref_file.chat_template_name)
self.engine.add([f"--mount=type=bind,src={chat_template_path},destination={MNT_CHAT_TEMPLATE_FILE},ro"])
if ref_file is not None and ref_file.mmproj_name != "":
mmproj_path = self.store.get_snapshot_file_path(ref_file.hash, ref_file.mmproj_name)
self.engine.add([f"--mount=type=bind,src={mmproj_path},destination={MNT_MMPROJ_FILE},ro"])
if ref_file is not None:
if ref_file.chat_template_name != "":
chat_template_path = self.store.get_snapshot_file_path(ref_file.hash, ref_file.chat_template_name)
self.engine.add(
[f"--mount=type=bind,src={chat_template_path},destination={MNT_CHAT_TEMPLATE_FILE},ro"]
)

if ref_file.mmproj_name != "":
mmproj_path = self.store.get_snapshot_file_path(ref_file.hash, ref_file.mmproj_name)
self.engine.add([f"--mount=type=bind,src={mmproj_path},destination={MNT_MMPROJ_FILE},ro"])

def handle_rag_mode(self, args, cmd_args):
# force accel_image to use -rag version. Drop TAG if it exists
Expand Down Expand Up @@ -523,19 +527,23 @@ def build_exec_args_serve(self, args, exec_model_path, chat_template_path="", mm
exec_args += ["llama-server", "--port", args.port, "--model", exec_model_path]
if mmproj_path:
exec_args += ["--mmproj", mmproj_path]
else:
exec_args += ["--jinja"]

exec_args += [
"--alias",
self.model,
"--ctx-size",
f"{args.context}",
"--temp",
f"{args.temp}",
"--jinja",
"--cache-reuse",
"256",
] + args.runtime_args

if draft_model_path:
exec_args += ['--model_draft', draft_model_path]

# Placeholder for clustering, it might be kept for override
rpc_nodes = os.getenv("RAMALAMA_LLAMACPP_RPC_NODES")
if rpc_nodes:
Expand Down Expand Up @@ -609,35 +617,34 @@ def execute_command(self, model_path, exec_args, args):
def serve(self, args, quiet=False):
self.validate_args(args)
args.port = compute_serving_port(args.port, args.debug, quiet)

model_path = self.get_model_path(args)
if is_split_file_model(model_path):
mnt_file = MNT_DIR + '/' + self.mnt_path
else:
mnt_file = MNT_FILE

exec_model_path = mnt_file if args.container or args.generate else model_path

chat_template_path = ""
mmproj_path = ""
if self.store is not None:
_, tag, _ = self.extract_model_identifiers()
ref_file = self.store.get_ref_file(tag)
if ref_file is not None and ref_file.chat_template_name != "":
chat_template_path = (
MNT_CHAT_TEMPLATE_FILE
if args.container or args.generate
else self.store.get_snapshot_file_path(ref_file.hash, ref_file.chat_template_name)
)
if ref_file is not None and ref_file.mmproj_name != "":
mmproj_path = (
MNT_MMPROJ_FILE
if args.container or args.generate
else self.store.get_snapshot_file_path(ref_file.hash, ref_file.mmproj_name)
)
if ref_file is not None:
if ref_file.chat_template_name != "":
chat_template_path = (
MNT_CHAT_TEMPLATE_FILE
if args.container or args.generate
else self.store.get_snapshot_file_path(ref_file.hash, ref_file.chat_template_name)
)

if ref_file.mmproj_name != "":
mmproj_path = (
MNT_MMPROJ_FILE
if args.container or args.generate
else self.store.get_snapshot_file_path(ref_file.hash, ref_file.mmproj_name)
)

exec_args = self.build_exec_args_serve(args, exec_model_path, chat_template_path, mmproj_path)

exec_args = self.handle_runtime(args, exec_args, exec_model_path)
if self.generate_container_config(model_path, chat_template_path, args, exec_args):
return
Expand Down
Loading