1
+ 'use strict' ;
2
+
3
+ var path = require ( 'path' ) ;
4
+ var async = require ( 'async' ) ;
5
+ var deepExtend = require ( 'deep-extend' ) ;
6
+ var ParseData = require ( path . join ( global . pathToApp , 'core/lib/parseData' ) ) ;
7
+ var phantom = require ( 'phantomjs' ) ;
8
+ var unflatten = require ( path . join ( global . pathToApp , 'core/unflat' ) ) ;
9
+ var childProcess = require ( 'child_process' ) ;
10
+ var htmlTree = require ( path . join ( global . pathToApp , 'core/html-tree' ) ) ;
11
+
12
+ var processFlagNotExec = true ;
13
+
14
+ var config = {
15
+ enabled : true ,
16
+
17
+ // Run HTML parser on app start
18
+ onStart : false ,
19
+ cron : false ,
20
+ cronProd : true ,
21
+ cronRepeatTime : 600000 ,
22
+
23
+ // PhantomJS retry limit
24
+ errorLimit : 2 ,
25
+ asyncPhantomCallLimit : 5 ,
26
+
27
+ specsFilter : {
28
+ filterOut : {
29
+ cats : [ 'docs' ] ,
30
+ tags : [ 'parse-problems' ]
31
+ }
32
+ } ,
33
+
34
+ // Path to HTML data otput
35
+ pathToSpecs : path . join ( global . pathToApp , global . opts . core . api . specsData )
36
+ } ;
37
+
38
+ // Overwriting base options
39
+ if ( global . opts . core . parseHTML ) deepExtend ( config , global . opts . core . parseHTML ) ; // Legacy support
40
+ if ( global . opts . plugins && global . opts . plugins . htmlParser ) deepExtend ( config , global . opts . plugins . htmlParser ) ;
41
+
42
+ /**
43
+ * Get list of specs for parsing with PhantomJS
44
+ *
45
+ * @returns {Array } Returns array with spec URLs
46
+ */
47
+ var getSpecsList = function ( ) {
48
+ var parseSpecs = new ParseData ( {
49
+ scope : 'specs' ,
50
+ path : require . resolve ( config . pathToSpecs )
51
+ } ) ;
52
+
53
+ var specs = parseSpecs . getFilteredData ( config . specsFilter , true ) ;
54
+
55
+ // Preparing data for specs iteration
56
+ specs = specs . map ( function ( item ) {
57
+ return item . url . substring ( 1 ) ;
58
+ } ) ;
59
+
60
+ return specs ;
61
+ } ;
62
+
63
+ /**
64
+ * PhantomJS async runner, calls writeDataFile on finish
65
+ *
66
+ * @param {Array } specs - array with URL list, that will be passed to PhantomJS
67
+ *
68
+ * @param {Function } [callback] - callback function
69
+ * @param {Object } callback.err - Error object
70
+ * @param {Object } callback.outputData - Passes output data to callback
71
+ */
72
+ var processSpecs = module . exports . processSpecs = function ( specs , callback ) {
73
+ callback = typeof callback === 'function' ? callback : function ( ) { } ;
74
+
75
+ if ( ! config . enabled ) {
76
+ callback ( 'HTML parser disabled.' ) ;
77
+
78
+ return ;
79
+ }
80
+
81
+ if ( processFlagNotExec ) {
82
+ global . log . info ( 'HTML API update started' ) ;
83
+
84
+ var _specs = specs || getSpecsList ( ) ;
85
+ var specsLeft = _specs . slice ( 0 ) ;
86
+ var PhantomPath = phantom . path ;
87
+ var outputHTML = { } ;
88
+ var errorCounter = { } ;
89
+ var specLength = _specs . length ;
90
+ var doneCounter = 0 ;
91
+ var phExecCommand = PhantomPath + " " + path . join ( global . pathToApp , 'core/html-tree/html-parser/phantomRunner.js' ) ;
92
+
93
+ processFlagNotExec = false ;
94
+
95
+ global . log . trace ( 'Processing ' + specLength + ' specs.' ) ;
96
+
97
+ async . mapLimit ( _specs , config . asyncPhantomCallLimit , function ( spec , next ) {
98
+ var n = _specs . indexOf ( spec ) + 1 ;
99
+
100
+ global . log . trace ( 'Starts...' + n , spec ) ;
101
+
102
+ childProcess . exec ( phExecCommand + " " + spec + " " + global . opts . core . common . port , function ( error , stdout , stderr ) {
103
+ handler ( error , stdout , stderr , spec ) ;
104
+ next ( ) ;
105
+ } ) ;
106
+ } ) ;
107
+
108
+ var handler = function ( error , stdout , stderr , spec ) {
109
+ if ( error ) {
110
+ if ( typeof errorCounter [ spec ] !== 'number' ) {
111
+ errorCounter [ spec ] = 0 ;
112
+ }
113
+
114
+ errorCounter [ spec ] ++ ;
115
+
116
+ // If limit is not reached, try again
117
+ if ( errorCounter [ spec ] <= config . errorLimit ) {
118
+ global . log . debug ( 'Rerun' , spec ) ;
119
+
120
+ childProcess . exec ( phExecCommand + " " + spec , function ( error , stdout , stderr ) {
121
+ handler ( error , stdout , stderr , spec , writeCallback ) ;
122
+ } ) ;
123
+ return ;
124
+ }
125
+
126
+ global . log . error ( 'Exec error on spec ' + spec + ': ' + error ) ;
127
+ global . log . debug ( 'Error info: ' , JSON . stringify ( {
128
+ spec : spec ,
129
+ error : error ,
130
+ stdount : stdout ,
131
+ stderr : stderr
132
+ } ) ) ;
133
+ } else {
134
+ var parsedStdout = [ ] ;
135
+
136
+ try {
137
+ parsedStdout = JSON . parse ( stdout ) ;
138
+ } catch ( e ) {
139
+ global . log . debug ( 'HTML Parser stdout parse error: ' , e , stdout ) ;
140
+ global . log . debug ( 'Error from Phantom parser: ' , stdout ) ;
141
+ parsedStdout = {
142
+ message : "Stdout parse error"
143
+ } ;
144
+ }
145
+
146
+ global . log . debug ( 'Spec done: ' , JSON . stringify ( {
147
+ spec : spec ,
148
+ error : error ,
149
+ stderr : stderr
150
+ } ) ) ;
151
+
152
+ // Writing contents to common obj
153
+ outputHTML [ spec + '/specFile/contents' ] = parsedStdout . contents ;
154
+ outputHTML [ spec + '/specFile/headResources' ] = parsedStdout . headResources ;
155
+ outputHTML [ spec + '/specFile/bodyResources' ] = parsedStdout . bodyResources ;
156
+ }
157
+
158
+ global . log . debug ( ( doneCounter / specLength * 100 ) . toFixed ( 2 ) , '%...Done' , spec ) ;
159
+
160
+ // Logging specs queen
161
+ specsLeft . splice ( specsLeft . indexOf ( spec ) , 1 ) ;
162
+ if ( specsLeft . length < 5 && specsLeft . length !== 0 ) {
163
+ global . log . trace ( 'Specs queen' , specsLeft ) ;
164
+ }
165
+
166
+ doneCounter ++ ;
167
+
168
+ // We handled all requested specs
169
+ if ( doneCounter === specLength ) {
170
+ var outputData = unflatten ( outputHTML , { delimiter : '/' , overwrite : 'root' } ) ;
171
+
172
+ // Callback is passed to writeDataFile
173
+ var writeCallback = function ( ) {
174
+ global . log . info ( 'HTML API successfully updated' ) ;
175
+ processFlagNotExec = true ;
176
+
177
+ callback ( null , outputData ) ;
178
+ } ;
179
+
180
+ htmlTree . writeDataFile ( outputData , true , false , writeCallback ) ;
181
+ }
182
+ } ;
183
+ }
184
+ } ;
185
+
186
+ if ( config . enabled ) {
187
+ // Running processSpecs by cron
188
+ if ( config . cron || ( global . MODE === 'production' && config . cronProd ) ) {
189
+ setInterval ( function ( ) {
190
+ processSpecs ( ) ;
191
+ } , config . cronRepeatTime ) ;
192
+ }
193
+
194
+ if ( config . onStart ) {
195
+ setTimeout ( processSpecs , 100 ) ;
196
+ }
197
+ }
0 commit comments