Skip to content

Feature/Add ability to create new doc store on upsert #3965

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions packages/api-documentation/src/yml/swagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,11 @@ paths:
type: string
format: binary
description: Files to be uploaded
docId:
type: string
nullable: true
example: '603a7b51-ae7c-4b0a-8865-e454ed2f6766'
description: Document ID to use existing configuration
loader:
type: string
nullable: true
Expand All @@ -704,6 +709,32 @@ paths:
nullable: true
example: '{"name":"postgresRecordManager"}'
description: Record Manager configurations
metadata:
type: object
nullable: true
description: Metadata associated with the document
example: { 'foo': 'bar' }
replaceExisting:
type: boolean
nullable: true
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
createNewDocStore:
type: boolean
nullable: true
description: Whether to create a new document store
docStore:
type: object
nullable: true
description: Only when createNewDocStore is true, pass in the new document store configuration
properties:
name:
type: string
example: plainText
description: Name of the new document store to be created
description:
type: string
example: plainText
description: Description of the new document store to be created
required:
- files
required: true
Expand Down Expand Up @@ -2350,16 +2381,37 @@ components:
docId:
type: string
format: uuid
nullable: true
description: Document ID within the store. If provided, existing configuration from the document will be used for the new document
metadata:
type: object
nullable: true
description: Metadata associated with the document
example: { 'foo': 'bar' }
replaceExisting:
type: boolean
nullable: true
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
createNewDocStore:
type: boolean
nullable: true
description: Whether to create a new document store
docStore:
type: object
nullable: true
description: Only when createNewDocStore is true, pass in the new document store configuration
properties:
name:
type: string
example: plainText
description: Name of the new document store to be created
description:
type: string
example: plainText
description: Description of the new document store to be created
loader:
type: object
nullable: true
properties:
name:
type: string
Expand All @@ -2370,6 +2422,7 @@ components:
description: Configuration for the loader
splitter:
type: object
nullable: true
properties:
name:
type: string
Expand All @@ -2380,6 +2433,7 @@ components:
description: Configuration for the text splitter
embedding:
type: object
nullable: true
properties:
name:
type: string
Expand All @@ -2390,6 +2444,7 @@ components:
description: Configuration for the embedding generator
vectorStore:
type: object
nullable: true
properties:
name:
type: string
Expand All @@ -2400,6 +2455,7 @@ components:
description: Configuration for the vector store
recordManager:
type: object
nullable: true
properties:
name:
type: string
Expand Down
38 changes: 37 additions & 1 deletion packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class FireCrawl_DocumentLoaders implements INode {
this.name = 'fireCrawl'
this.type = 'Document'
this.icon = 'firecrawl.png'
this.version = 2.0
this.version = 2.1
this.category = 'Document Loaders'
this.description = 'Load data from URL using FireCrawl'
this.baseClasses = [this.type]
Expand Down Expand Up @@ -307,6 +307,42 @@ class FireCrawl_DocumentLoaders implements INode {
}
],
default: 'crawl'
},
{
// maxCrawlPages
label: 'Max Crawl Pages',
name: 'maxCrawlPages',
type: 'string',
description: 'Maximum number of pages to crawl',
optional: true,
additionalParams: true
},
{
// generateImgAltText
label: 'Generate Image Alt Text',
name: 'generateImgAltText',
type: 'boolean',
description: 'Generate alt text for images',
optional: true,
additionalParams: true
},
{
// returnOnlyUrls
label: 'Return Only URLs',
name: 'returnOnlyUrls',
type: 'boolean',
description: 'Return only URLs of the crawled pages',
optional: true,
additionalParams: true
},
{
// onlyMainContent
label: 'Only Main Content',
name: 'onlyMainContent',
type: 'boolean',
description: 'Extract only the main content of the page',
optional: true,
additionalParams: true
}
// ... (other input parameters)
]
Expand Down
2 changes: 2 additions & 0 deletions packages/server/src/Interface.DocumentStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ export interface IDocumentStoreUpsertData {
docId: string
metadata?: string | object
replaceExisting?: boolean
createNewDocStore?: boolean
docStore?: IDocumentStore
loader?: {
name: string
config: ICommonObject
Expand Down
14 changes: 13 additions & 1 deletion packages/server/src/services/documentstore/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import {
INodeData,
MODE,
IOverrideConfig,
IExecutePreviewLoader
IExecutePreviewLoader,
DocumentStoreDTO
} from '../../Interface'
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
import { v4 as uuidv4 } from 'uuid'
Expand Down Expand Up @@ -1464,6 +1465,7 @@ const upsertDocStore = async (
}
}
const replaceExisting = data.replaceExisting ?? false
const createNewDocStore = data.createNewDocStore ?? false
const newLoader = typeof data.loader === 'string' ? JSON.parse(data.loader) : data.loader
const newSplitter = typeof data.splitter === 'string' ? JSON.parse(data.splitter) : data.splitter
const newVectorStore = typeof data.vectorStore === 'string' ? JSON.parse(data.vectorStore) : data.vectorStore
Expand Down Expand Up @@ -1533,6 +1535,15 @@ const upsertDocStore = async (
recordManagerConfig = JSON.parse(entity.recordManagerConfig || '{}')?.config
}

if (createNewDocStore) {
const docStoreBody = typeof data.docStore === 'string' ? JSON.parse(data.docStore) : data.docStore
const newDocumentStore = docStoreBody ?? { name: `Document Store ${Date.now().toString()}` }
const docStore = DocumentStoreDTO.toEntity(newDocumentStore)
const documentStore = appDataSource.getRepository(DocumentStore).create(docStore)
const dbResponse = await appDataSource.getRepository(DocumentStore).save(documentStore)
storeId = dbResponse.id
}

// Step 2: Replace with new values
loaderName = newLoader?.name ? getComponentLabelFromName(newLoader?.name) : loaderName
loaderId = newLoader?.name || loaderId
Expand Down Expand Up @@ -1687,6 +1698,7 @@ const upsertDocStore = async (
isVectorStoreInsert: true
})
res.docId = newDocId
if (createNewDocStore) res.storeId = storeId

return res
} catch (error) {
Expand Down
15 changes: 14 additions & 1 deletion packages/ui/src/views/docstore/DocStoreAPIDialog.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ body_data = {
"docId": "${dialogProps.loaderId}",
"metadata": {}, # Add additional metadata to the document chunks
"replaceExisting": True, # Replace existing document with the new upserted chunks
"createNewDocStore": False, # Create a new document store
"splitter": json.dumps({"config":{"chunkSize":20000}}) # Override existing configuration
# "loader": "",
# "vectorStore": "",
# "embedding": "",
# "recordManager": "",
# "docStore": ""
}

headers = {
Expand All @@ -71,11 +73,14 @@ formData.append("splitter", JSON.stringify({"config":{"chunkSize":20000}}));
formData.append("metadata", "{}");
// Replace existing document with the new upserted chunks
formData.append("replaceExisting", "true");
// Create a new document store
formData.append("createNewDocStore", "false");
// Override existing configuration
// formData.append("loader", "");
// formData.append("embedding", "");
// formData.append("vectorStore", "");
// formData.append("recordManager", "");
// formData.append("docStore", "");

async function query(formData) {
const response = await fetch(
Expand Down Expand Up @@ -105,11 +110,13 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
-F "splitter={"config":{"chunkSize":20000}}" \\
-F "metadata={}" \\
-F "replaceExisting=true" \\
-F "createNewDocStore=false" \\
# Override existing configuration:
# -F "loader=" \\
# -F "embedding=" \\
# -F "vectorStore=" \\
# -F "recordManager="
# -F "recordManager=" \\
# -F "docStore="
\`\`\`
`
}
Expand All @@ -135,6 +142,7 @@ output = query({
"docId": "${dialogProps.loaderId}",
"metadata": "{}", # Add additional metadata to the document chunks
"replaceExisting": True, # Replace existing document with the new upserted chunks
"createNewDocStore": False, # Create a new document store
# Override existing configuration
"loader": {
"config": {
Expand All @@ -149,6 +157,7 @@ output = query({
# embedding: {},
# vectorStore: {},
# recordManager: {}
# docStore: {}
})
print(output)
\`\`\`
Expand All @@ -174,6 +183,7 @@ query({
"docId": "${dialogProps.loaderId},
"metadata": "{}", // Add additional metadata to the document chunks
"replaceExisting": true, // Replace existing document with the new upserted chunks
"createNewDocStore": false, // Create a new document store
// Override existing configuration
"loader": {
"config": {
Expand All @@ -188,6 +198,7 @@ query({
// embedding: {},
// vectorStore: {},
// recordManager: {}
// docStore: {}
}).then((response) => {
console.log(response);
});
Expand All @@ -201,6 +212,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
"docId": "${dialogProps.loaderId}",
"metadata": "{}",
"replaceExisting": true,
"createNewDocStore": false,
"loader": {
"config": {
"text": "This is a new text"
Expand All @@ -215,6 +227,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
// "embedding": {},
// "vectorStore": {},
// "recordManager": {}
// "docStore": {}
}'

\`\`\`
Expand Down