You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi,
I am trying to build an agentic chatbot using RAG. I have gone through the documentation linked below, but I’m still encountering some errors and have a few questions:
How many past conversations are shared with the model?
Is it possible to control the number of conversations sent to the model?
import os
import asyncio
from openai import OpenAI
from typing import List
from autogen_agentchat.ui import Console
from autogen_agentchat.agents import AssistantAgent
from autogen_core.memory import MemoryContent, MemoryMimeType
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_core.memory import Memory, MemoryContent, MemoryMimeType
from autogen_ext.memory.chromadb import ChromaDBVectorMemory, PersistentChromaDBVectorMemoryConfig
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
from typing import Literal, Callable, Any, Dict
from pydantic import Field, BaseModel
from file_extraction import raw_text_from_file
from prompt import SYSTEM_PROMPT
from config import load_config_data
CHROMA_PERSIST_DIR = load_config_data.get("VECTOR_DB_PATH", "./Vector DB")
CHROMA_COLLECTION_NAME = "collection_1"
class BaseEmbeddingFunctionConfig(BaseModel):
"""Base configuration for embedding functions."""
function_type: Literal["default", "sentence_transformer", "openai", "custom"]
class CustomEmbeddingFunctionConfig(BaseEmbeddingFunctionConfig):
"""Configuration for custom embedding functions."""
function_type: Literal["default", "sentence_transformer", "openai", "custom"] = "custom"
function: Callable[..., Any] = Field(description="A factory function that returns a ChromaDB-compatible embedding function instance.")
params: Dict[str, Any] = Field(default_factory=dict, description="Parameters to pass to the factory function.")
class PersistentChromaDBVectorMemoryConfig(BaseModel):
embedding_function_config: BaseEmbeddingFunctionConfig = Field(
default_factory=dict,
description="Configuration for the embedding function"
)
def create_custom_embedding_function(openai_client: OpenAI, model_name: str) -> EmbeddingFunction:
"""
This is the factory function. It takes the necessary components and returns
a fully configured instance of our custom embedding function class.
"""
class MyCustomEmbeddingFunction(EmbeddingFunction):
def __init__(self, openai_client: OpenAI, model_name: str):
self.client = openai_client
self.model_name = model_name
def __call__(self, input_texts: Documents) -> Embeddings:
"""
Embeds a list of texts using the specified OpenAI-compatible client and model.
"""
if not input_texts:
return []
input_texts = [text if text.strip() else " " for text in input_texts]
response = self.client.embeddings.create(input=input_texts, model=self.model_name)
embeddings = [item.embedding for item in response.data]
return embeddings
return MyCustomEmbeddingFunction(openai_client=openai_client, model_name=model_name)
embd_client = OpenAI(api_key=MISTRAL_API_KEY, base_url=MISTRAL_BASE_URL )
custom_embedding_config = CustomEmbeddingFunctionConfig(
function=create_custom_embedding_function,
params={"openai_client": embd_client, "model_name": MISTRAL_EMBEDDING_MODEL_NAME}
)
model_client = OpenAIChatCompletionClient(
model = chat_model,
api_key = huggingface_api_key,
base_url = huggingface_base_url,
temperature = 0.3,
max_tokens = 4096,
model_info = {
"vision": False,
"function_calling": True,
"json_output": False,
"family": "unknown",
"multiple_system_messages": True
},
)
chroma_user_memory = ChromaDBVectorMemory(
config=PersistentChromaDBVectorMemoryConfig(
embedding_function_config=custom_embedding_config,
collection_name=CHROMA_COLLECTION_NAME,
persistence_path=os.path.join(CHROMA_PERSIST_DIR, CHROMA_COLLECTION_NAME),
k=3,
score_threshold=0.4,
)
)
# chroma_user_memory.clear() # Clear existing memory
class SimpleDocumentIndexer:
"""Basic document indexer for AutoGen Memory."""
def __init__(self, memory: Memory, chunk_size: int = 1500) -> None:
self.memory = memory
self.chunk_size = chunk_size
async def _fetch_content(self, file_path: str) -> str:
"""Fetch content from URL or file."""
text = raw_text_from_file(file_path)
return text
def _split_text(self, text: str) -> List[str]:
"""Split text into fixed-size chunks."""
chunks: list[str] = []
for i in range(0, len(text), self.chunk_size):
chunk = text[i : i + self.chunk_size]
chunks.append(chunk.strip())
return chunks
async def index_documents(self, sources: List[str]) -> int:
"""Index documents into memory."""
total_chunks = 0
for source in sources:
try:
content = await self._fetch_content(source)
chunks = self._split_text(content)
for i, chunk in enumerate(chunks):
await self.memory.add(MemoryContent(content=chunk, mime_type=MemoryMimeType.TEXT, metadata={"source": os.path.basename(source), "chunk_index": i}))
total_chunks += len(chunks)
except Exception as e:
print(f"Error indexing {source}: {str(e)}")
return total_chunks
async def data_injection_to_db(files:List[str]):
"""
Main function to set up and run the RAG agent.
"""
indexer = SimpleDocumentIndexer(memory=chroma_user_memory)
chunks: int = await indexer.index_documents(files)
return f"Successfully injected {chunks} chunks to the DB"
file_list = ["/home/Downloads/Stuff/Agentic AI.txt", "/home/Downloads/Stuff/Note.txt"]
print(asyncio.run(data_injection_to_db(file_list)))
Below is the error
Failed to initialize ChromaDB client: 'PersistentChromaDBVectorMemoryConfig' object has no attribute 'allow_reset'
Error indexing /home/Downloads/Stuff/Agentic AI.txt: 'PersistentChromaDBVectorMemoryConfig' object has no attribute 'allow_reset'
Failed to initialize ChromaDB client: 'PersistentChromaDBVectorMemoryConfig' object has no attribute 'allow_reset'
Error indexing /home/Downloads/Stuff/Note.txt: 'PersistentChromaDBVectorMemoryConfig' object has no attribute 'allow_reset'
Could someone please guide me on where the issue might be?
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
-
Hi,
I am trying to build an agentic chatbot using RAG. I have gone through the documentation linked below, but I’m still encountering some errors and have a few questions:
memory
custom embedding
Below is the code i am using
Below is the error
Could someone please guide me on where the issue might be?
Thank you in advance.
Beta Was this translation helpful? Give feedback.
All reactions