Skip to content

Commit 4e4d720

Browse files
committed
prepare for separation of html-parser, move html-tree from api space, move parseData to lib
1 parent 81fdf93 commit 4e4d720

File tree

12 files changed

+331
-286
lines changed

12 files changed

+331
-286
lines changed

app.js

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ global.opts = loadOptions();
1818

1919
// Arguments parse */
2020
commander
21-
.option('-l, --log [string]', 'Log level (default: ' + global.opts.core.common.defaultLogLevel + ')', global.opts.core.common.defaultLogLevel)
22-
.option('-p, --port [number]', 'Server port (default: ' + global.opts.core.common.port + ')', global.opts.core.common.port)
23-
.option('--html', 'Turn on HTML parser on app start')
21+
.option('-l, --log [string]', 'Log level (default: ' + global.opts.core.common.defaultLogLevel + ').', global.opts.core.common.defaultLogLevel)
22+
.option('-p, --port [number]', 'Server port (default: ' + global.opts.core.common.port + ').', global.opts.core.common.port)
23+
.option('--html', 'Turn on HTML parser on app start (requires installed and enabled parser).')
2424
.parse(process.argv);
2525

2626
global.commander = commander;
@@ -150,7 +150,13 @@ require('./core/routes');
150150
require('./core/api');
151151

152152
global.app.use('/api/options', function(req, res){
153-
res.jsonp(loadOptions().assets);
153+
var options = loadOptions();
154+
var assetsOptions = options.assets;
155+
156+
// TODO: https://github.com/sourcejs/Source/issues/142
157+
assetsOptions.plugins = options.plugins;
158+
159+
res.jsonp(assetsOptions);
154160
});
155161

156162
// User extenstions

assets/js/modules/sectionsParser.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* */
88

9-
// TODO: wrap as requirejs module, and combine with phantom-runner.js
9+
// TODO: wrap as requirejs module, and combine with phantomRunner.js
1010

1111
function SourceGetSections() {
1212
// Defining strict inside func, because PhantomJS stops evaluating this script if it's on top

core/api/index.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
var express = require('express');
44
var path = require('path');
5-
var parseData = require('./parseData');
5+
var parseData = require(path.join(global.pathToApp, 'core/lib/parseData'));
66
var pathToApp = path.dirname(require.main.filename);
77
var deepExtend = require('deep-extend');
8-
var parseHTML = require(path.join(global.pathToApp, 'core/api/parseHTML'));
8+
var htmlTree = require(path.join(global.pathToApp, 'core/html-tree'));
99
var unflatten = require(path.join(global.pathToApp,'core/unflat'));
1010

1111
var config = {
@@ -145,7 +145,7 @@ var postHTML = function (req, res, dataPath) {
145145
data = unflatten(data, { delimiter: '/', overwrite: 'root' });
146146
}
147147

148-
parseHTML.writeDataFile(data, true, dataPath, function(err, finalData){
148+
htmlTree.writeDataFile(data, true, dataPath, function(err, finalData){
149149
if (err || !finalData) {
150150
res.status(config.statusCodes.error).json({
151151
message: err
@@ -171,7 +171,7 @@ var deleteHTML = function (req, res, dataPath) {
171171
var body = req.body;
172172
var reqID = body.id || req.query.id;
173173

174-
parseHTML.deleteFromDataFile(dataPath, reqID, function(err, finalData){
174+
htmlTree.deleteFromDataFile(dataPath, reqID, function(err, finalData){
175175
if (err || !finalData) {
176176
res.status(config.statusCodes.error).json({
177177
message: err

core/html-tree/html-parser/index.js

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
'use strict';
2+
3+
var path = require('path');
4+
var async = require('async');
5+
var deepExtend = require('deep-extend');
6+
var ParseData = require(path.join(global.pathToApp,'core/lib/parseData'));
7+
var phantom = require('phantomjs');
8+
var unflatten = require(path.join(global.pathToApp,'core/unflat'));
9+
var childProcess = require('child_process');
10+
var htmlTree = require(path.join(global.pathToApp,'core/html-tree'));
11+
12+
var processFlagNotExec = true;
13+
14+
var config = {
15+
enabled: true,
16+
17+
// Run HTML parser on app start
18+
onStart: false,
19+
cron: false,
20+
cronProd: true,
21+
cronRepeatTime: 600000,
22+
23+
// PhantomJS retry limit
24+
errorLimit: 2,
25+
asyncPhantomCallLimit: 5,
26+
27+
specsFilter: {
28+
filterOut: {
29+
cats: ['docs'],
30+
tags: ['parse-problems']
31+
}
32+
},
33+
34+
// Path to HTML data otput
35+
pathToSpecs: path.join(global.pathToApp, global.opts.core.api.specsData)
36+
};
37+
38+
// Overwriting base options
39+
if (global.opts.core.parseHTML) deepExtend(config, global.opts.core.parseHTML); // Legacy support
40+
if (global.opts.plugins && global.opts.plugins.htmlParser) deepExtend(config, global.opts.plugins.htmlParser);
41+
42+
/**
43+
* Get list of specs for parsing with PhantomJS
44+
*
45+
* @returns {Array} Returns array with spec URLs
46+
*/
47+
var getSpecsList = function() {
48+
var parseSpecs = new ParseData({
49+
scope: 'specs',
50+
path: require.resolve(config.pathToSpecs)
51+
});
52+
53+
var specs = parseSpecs.getFilteredData(config.specsFilter, true);
54+
55+
// Preparing data for specs iteration
56+
specs = specs.map(function(item){
57+
return item.url.substring(1);
58+
});
59+
60+
return specs;
61+
};
62+
63+
/**
64+
* PhantomJS async runner, calls writeDataFile on finish
65+
*
66+
* @param {Array} specs - array with URL list, that will be passed to PhantomJS
67+
*
68+
* @param {Function} [callback] - callback function
69+
* @param {Object} callback.err - Error object
70+
* @param {Object} callback.outputData - Passes output data to callback
71+
*/
72+
var processSpecs = module.exports.processSpecs = function(specs, callback){
73+
callback = typeof callback === 'function' ? callback : function(){};
74+
75+
if (!config.enabled) {
76+
callback('HTML parser disabled.');
77+
78+
return;
79+
}
80+
81+
if (processFlagNotExec) {
82+
global.log.info('HTML API update started');
83+
84+
var _specs = specs || getSpecsList();
85+
var specsLeft = _specs.slice(0);
86+
var PhantomPath = phantom.path;
87+
var outputHTML = {};
88+
var errorCounter = {};
89+
var specLength = _specs.length;
90+
var doneCounter = 0;
91+
var phExecCommand = PhantomPath + " " + path.join(global.pathToApp, 'core/html-tree/html-parser/phantomRunner.js');
92+
93+
processFlagNotExec = false;
94+
95+
global.log.trace('Processing ' + specLength + ' specs.');
96+
97+
async.mapLimit(_specs, config.asyncPhantomCallLimit, function (spec, next) {
98+
var n = _specs.indexOf(spec) + 1;
99+
100+
global.log.trace('Starts...' + n, spec);
101+
102+
childProcess.exec(phExecCommand + " " + spec + " " + global.opts.core.common.port, function (error, stdout, stderr) {
103+
handler(error, stdout, stderr, spec);
104+
next();
105+
});
106+
});
107+
108+
var handler = function(error, stdout, stderr, spec) {
109+
if (error) {
110+
if (typeof errorCounter[spec] !== 'number') {
111+
errorCounter[spec] = 0;
112+
}
113+
114+
errorCounter[spec]++;
115+
116+
// If limit is not reached, try again
117+
if (errorCounter[spec] <= config.errorLimit) {
118+
global.log.debug('Rerun', spec);
119+
120+
childProcess.exec(phExecCommand + " " + spec, function (error, stdout, stderr) {
121+
handler(error, stdout, stderr, spec, writeCallback);
122+
});
123+
return;
124+
}
125+
126+
global.log.error('Exec error on spec ' + spec + ': '+ error);
127+
global.log.debug('Error info: ', JSON.stringify({
128+
spec: spec,
129+
error: error,
130+
stdount: stdout,
131+
stderr: stderr
132+
}));
133+
} else {
134+
var parsedStdout = [];
135+
136+
try {
137+
parsedStdout = JSON.parse(stdout);
138+
} catch(e) {
139+
global.log.debug('HTML Parser stdout parse error: ', e, stdout);
140+
global.log.debug('Error from Phantom parser: ', stdout);
141+
parsedStdout = {
142+
message: "Stdout parse error"
143+
};
144+
}
145+
146+
global.log.debug('Spec done: ', JSON.stringify({
147+
spec: spec,
148+
error: error,
149+
stderr: stderr
150+
}));
151+
152+
// Writing contents to common obj
153+
outputHTML[spec+'/specFile/contents'] = parsedStdout.contents;
154+
outputHTML[spec+'/specFile/headResources'] = parsedStdout.headResources;
155+
outputHTML[spec+'/specFile/bodyResources'] = parsedStdout.bodyResources;
156+
}
157+
158+
global.log.debug((doneCounter/specLength*100).toFixed(2),'%...Done', spec);
159+
160+
// Logging specs queen
161+
specsLeft.splice(specsLeft.indexOf(spec), 1);
162+
if (specsLeft.length < 5 && specsLeft.length !== 0) {
163+
global.log.trace('Specs queen', specsLeft);
164+
}
165+
166+
doneCounter++;
167+
168+
// We handled all requested specs
169+
if (doneCounter === specLength) {
170+
var outputData = unflatten(outputHTML, { delimiter: '/', overwrite: 'root' });
171+
172+
// Callback is passed to writeDataFile
173+
var writeCallback = function() {
174+
global.log.info('HTML API successfully updated');
175+
processFlagNotExec = true;
176+
177+
callback(null, outputData);
178+
};
179+
180+
htmlTree.writeDataFile(outputData, true, false, writeCallback);
181+
}
182+
};
183+
}
184+
};
185+
186+
if (config.enabled) {
187+
// Running processSpecs by cron
188+
if (config.cron || (global.MODE === 'production' && config.cronProd)) {
189+
setInterval(function () {
190+
processSpecs();
191+
}, config.cronRepeatTime);
192+
}
193+
194+
if (config.onStart) {
195+
setTimeout(processSpecs, 100);
196+
}
197+
}

0 commit comments

Comments
 (0)