Skip to content

Commit 33c7dad

Browse files
committed
feat: add indexing script
1 parent f29a942 commit 33c7dad

File tree

3 files changed

+305
-1
lines changed

3 files changed

+305
-1
lines changed

core/components/modai/bin/indexer.php

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
#!/usr/bin/env php
2+
<?php
3+
/**
4+
* CLI script to index MODX resources and elements with ability to resume after interruption
5+
* Usage: php index_resources.php [--reset] [--resources] [--elements] [--chunks] [--snippets] [--templates]
6+
*
7+
* Examples:
8+
* - Index everything: php modx-indexer.php
9+
* - Index all resources: php modx-indexer.php --resources
10+
* - Index chunks and templates: php modx-indexer.php --chunks --templates
11+
* - Index all elements: php modx-indexer.php --elements
12+
* - Reset and index all types: php modx-indexer.php --reset
13+
*
14+
* - If no type is specified, everything will index
15+
*/
16+
17+
use MODX\Revolution\modX;
18+
use MODX\Revolution\modChunk;
19+
use MODX\Revolution\modSnippet;
20+
use MODX\Revolution\modTemplate;
21+
use xPDO\xPDO;
22+
23+
define('MODX_API_MODE', true);
24+
require_once dirname(__DIR__, 4) . '/config.core.php';
25+
26+
if (!(require_once MODX_CORE_PATH . 'vendor/autoload.php')) {
27+
die('Failed to load vendor/autoload.');
28+
}
29+
30+
$modx = \MODX\Revolution\modX::getInstance(null, [\xPDO\xPDO::OPT_CONN_INIT => [\xPDO\xPDO::OPT_CONN_MUTABLE => true]]);
31+
if (!is_object($modx) || !($modx instanceof \MODX\Revolution\modX)) {
32+
die('Failed to init MODX');
33+
}
34+
35+
$modx->initialize('mgr');
36+
$modx->setLogLevel(modX::LOG_LEVEL_INFO);
37+
$modx->setLogTarget('ECHO');
38+
39+
$fakeSudo = $modx->newObject('modUser');
40+
$fakeSudo->set('id', 1);
41+
$fakeSudo->setSudo(true);
42+
$modx->user = $fakeSudo;
43+
44+
if (!$modx->services->has('modai')) {
45+
die('modAI not found');
46+
}
47+
48+
/** @var \modAI\modAI | null $modAI */
49+
$modAI = $modx->services->get('modai');
50+
51+
if ($modAI === null) {
52+
die('Failed to init modAI');
53+
}
54+
55+
$availableTypes = [
56+
'resource',
57+
'element',
58+
'chunk',
59+
'snippet',
60+
'template'
61+
];
62+
63+
$reset = in_array('--reset', $argv);
64+
65+
$typesToIndex = [];
66+
foreach ($availableTypes as $option) {
67+
if (in_array('--' . $option . 's', $argv)) {
68+
if ($option === 'element') {
69+
$typesToIndex['chunk'] = 'chunk';
70+
$typesToIndex['snippet'] = 'snippet';
71+
$typesToIndex['template'] = 'template';
72+
continue;
73+
}
74+
75+
$typesToIndex[$option] = $option;
76+
}
77+
}
78+
79+
if (empty($typesToIndex)) {
80+
$typesToIndex = [
81+
'resource' => 'resource',
82+
'chunk' => 'chunk',
83+
'snippet' => 'snippet',
84+
'template' => 'template'
85+
];
86+
}
87+
88+
class Indexer {
89+
private $modx;
90+
private $modAI;
91+
private $state;
92+
private $batchSize = 10;
93+
private $stateFile = __DIR__ . '/indexer_state.json';
94+
private $options;
95+
96+
/**
97+
* @var \modAI\ContextProviders\Pinecone[]
98+
*/
99+
private $contextProviders = [];
100+
101+
public function __construct($modAI, $options) {
102+
$this->modAI = $modAI;
103+
$this->modx = $this->modAI->modx;
104+
$this->options = $options;
105+
106+
foreach ($this->options['typesToIndex'] as $type) {
107+
$contextName = $this->modx->getOption("modai.contexts.{$type}s.name");
108+
if (empty($contextName)) {
109+
continue;
110+
}
111+
112+
/** @var \modAI\Model\ContextProvider $provider */
113+
$provider = $this->modx->getObject(
114+
\modAI\Model\ContextProvider::class,
115+
['enabled' => true, 'name' => $contextName, 'class' => \modAI\ContextProviders\Pinecone::class]
116+
);
117+
if (!$provider) {
118+
continue;
119+
}
120+
121+
$this->contextProviders[$type] = $provider->getContextProviderInstance();
122+
}
123+
124+
if (count($this->contextProviders) === 0) {
125+
throw new Exception('No context providers, check out system settings: "modai.contexts.type.name".');
126+
}
127+
128+
$this->loadState();
129+
}
130+
131+
private function loadState() {
132+
if (file_exists($this->stateFile) && !$this->options['reset']) {
133+
$content = file_get_contents($this->stateFile);
134+
$this->state = json_decode($content, true);
135+
if (json_last_error() !== JSON_ERROR_NONE) {
136+
$this->initializeState();
137+
}
138+
} else {
139+
$this->initializeState();
140+
}
141+
}
142+
143+
private function initializeState() {
144+
$this->state = [
145+
'processed' => ['resource' =>[], 'chunk' =>[], 'snippet' =>[], 'template' =>[]],
146+
'failed' => ['resource' =>[], 'chunk' =>[], 'snippet' =>[], 'template' =>[]],
147+
'lastRun' => null,
148+
'completed' => false
149+
];
150+
$this->saveState();
151+
}
152+
153+
private function saveState() {
154+
file_put_contents($this->stateFile, json_encode($this->state, JSON_PRETTY_PRINT));
155+
}
156+
157+
private function log($level, $message) {
158+
switch ($level) {
159+
case xPDO::LOG_LEVEL_DEBUG :
160+
$levelText= 'DEBUG';
161+
break;
162+
case xPDO::LOG_LEVEL_INFO :
163+
$levelText= 'INFO';
164+
break;
165+
case xPDO::LOG_LEVEL_WARN :
166+
$levelText= 'WARN';
167+
break;
168+
case xPDO::LOG_LEVEL_ERROR :
169+
$levelText= 'ERROR';
170+
break;
171+
default :
172+
$levelText= 'FATAL';
173+
}
174+
175+
echo '[' . date('Y-m-d H:i:s') . '] ' . $levelText . ': ' . $message . "\n";
176+
}
177+
178+
public function run() {
179+
$this->log(modX::LOG_LEVEL_INFO, 'Starting indexing process...');
180+
181+
if ($this->state['completed'] === true) {
182+
$this->log(modX::LOG_LEVEL_INFO, 'Previous indexing completed successfully. Starting over...');
183+
$this->initializeState();
184+
}
185+
186+
$completed = true;
187+
foreach ($this->options['typesToIndex'] as $type) {
188+
$resources = $this->getItemsToProcess($type);
189+
190+
$counter = 0;
191+
foreach ($resources as $resource) {
192+
$this->indexResource($type, $resource);
193+
$counter++;
194+
195+
if ($counter >= $this->batchSize) {
196+
$counter = 0;
197+
$this->saveState();
198+
}
199+
}
200+
201+
$completed = $completed && empty($this->state['failed'][$type]);
202+
}
203+
204+
$this->state['completed'] = $completed;
205+
$this->state['lastRun'] = date('Y-m-d H:i:s');
206+
$this->saveState();
207+
208+
if ($this->state['completed']) {
209+
$this->log(modX::LOG_LEVEL_INFO, 'Indexing completed successfully!');
210+
} else {
211+
foreach ($this->options['typesToIndex'] as $type) {
212+
$this->log(
213+
modX::LOG_LEVEL_INFO,
214+
'Indexing completed with ' . count($this->state['failed'][$type]) . " failed {$type}s."
215+
);
216+
$this->log(modX::LOG_LEVEL_INFO, 'Failed IDs: ' . implode(', ', $this->state['failed'][$type]));
217+
}
218+
}
219+
}
220+
221+
private function getItemsToProcess($type) {
222+
$where = [];
223+
224+
$classMap = [
225+
'resource' => modResource::class,
226+
'chunk' => modChunk::class,
227+
'snippet' => modSnippet::class,
228+
'template' => modTemplate::class,
229+
];
230+
231+
if (!empty($this->state['processed'][$type])) {
232+
$where['id:NOT IN'] = $this->state['processed'][$type];
233+
}
234+
235+
$count = $this->modx->getCount($classMap[$type], $where);
236+
$this->log(modX::LOG_LEVEL_INFO, 'Found ' . $count . " {$type}s to index.");
237+
238+
return $this->modx->getIterator($classMap[$type], $where);
239+
}
240+
241+
/**
242+
* @param xPDOObject $item
243+
* @return void
244+
*/
245+
private function indexResource($type, $item) {
246+
if (!isset($this->contextProviders[$type])) {
247+
return;
248+
}
249+
250+
try {
251+
$data = $item->toArray();
252+
foreach ($data as $key => $value) {
253+
if (is_array($value)) {
254+
$value = json_encode($value);
255+
}
256+
257+
$data[$key] = strip_tags($value);
258+
}
259+
260+
$this->contextProviders[$type]->index($type, $item->get('id'), $data);
261+
262+
// Add to processed, remove from failed if it was there
263+
$this->state['processed'][$type][] = $item->id;
264+
$this->state['failed'][$type] = array_diff($this->state['failed'][$type], [$item->id]);
265+
$this->log(modX::LOG_LEVEL_INFO, "Indexed $type #{$item->id} successfully");
266+
} catch (Exception $e) {
267+
$this->log(modX::LOG_LEVEL_ERROR, "Indexing failed for $type #{$item->id}. Error: " . $e->getMessage());
268+
269+
// Add to failed, remove from processed if it was there
270+
$this->state['failed'][$type][] = $item->id;
271+
$this->state['processed'][$type] = array_diff($this->state['processed'][$type], [$item->id]);
272+
}
273+
}
274+
}
275+
276+
try {
277+
$indexer = new Indexer($modAI, ['reset' => $reset, 'typesToIndex' => $typesToIndex]);
278+
$indexer->run();
279+
} catch (Exception $e) {
280+
die('Indexing error: ' . $e->getMessage());
281+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"processed": {
3+
"resource": [
4+
1,
5+
2,
6+
3,
7+
4,
8+
5,
9+
6,
10+
7
11+
],
12+
"chunk": [],
13+
"snippet": [],
14+
"template": []
15+
},
16+
"failed": {
17+
"resource": [],
18+
"chunk": [],
19+
"snippet": [],
20+
"template": []
21+
},
22+
"lastRun": "2025-04-16 14:27:39",
23+
"completed": true
24+
}

core/components/modai/src/Elements/Events/OnDocFormSave.php

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ public function run()
3131
/** @var \modAI\ContextProviders\Pinecone $instance */
3232
$instance = $provider->getContextProviderInstance();
3333

34-
3534
$data = $resource->toArray();
3635
foreach ($data as $key => $value) {
3736
if (is_array($value)) {

0 commit comments

Comments
 (0)