Skip to content

Commit 8a30894

Browse files
committed
publisher: Skip script, pre and textarea content when looking for HTML elements
Updates #7567
1 parent 7b4ade5 commit 8a30894

File tree

2 files changed

+60
-33
lines changed

2 files changed

+60
-33
lines changed

publisher/htmlElementsCollector.go

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ type cssClassCollectorWriter struct {
6464
buff bytes.Buffer
6565

6666
isCollecting bool
67-
dropValue bool
67+
inPreTag string
6868

6969
inQuote bool
7070
quoteValue byte
@@ -90,56 +90,70 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
9090
b := p[i]
9191
w.toggleIfQuote(b)
9292
if !w.inQuote && b == '>' {
93-
w.endCollecting(false)
93+
w.endCollecting()
9494
break
9595
}
9696
w.buff.WriteByte(b)
9797
}
9898

9999
if !w.isCollecting {
100-
if w.dropValue {
101-
w.buff.Reset()
102-
} else {
103-
// First check if we have processed this element before.
104-
w.collector.mu.RLock()
105-
106-
// See https://github.com/dominikh/go-tools/issues/723
107-
//lint:ignore S1030 This construct avoids memory allocation for the string.
108-
seen := w.collector.elementSet[string(w.buff.Bytes())]
109-
w.collector.mu.RUnlock()
110-
if seen {
111-
w.buff.Reset()
112-
continue
100+
if w.inPreTag != "" {
101+
s := w.buff.String()
102+
if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName {
103+
w.inPreTag = ""
113104
}
105+
w.buff.Reset()
106+
continue
107+
}
114108

115-
s := w.buff.String()
109+
// First check if we have processed this element before.
110+
w.collector.mu.RLock()
116111

112+
// See https://github.com/dominikh/go-tools/issues/723
113+
//lint:ignore S1030 This construct avoids memory allocation for the string.
114+
seen := w.collector.elementSet[string(w.buff.Bytes())]
115+
w.collector.mu.RUnlock()
116+
if seen {
117117
w.buff.Reset()
118+
continue
119+
}
118120

119-
if strings.HasPrefix(s, "</") {
120-
continue
121-
}
121+
s := w.buff.String()
122122

123-
key := s
123+
w.buff.Reset()
124124

125-
s, tagName := w.insertStandinHTMLElement(s)
126-
el := parseHTMLElement(s)
127-
el.Tag = tagName
125+
if strings.HasPrefix(s, "</") {
126+
continue
127+
}
128128

129-
w.collector.mu.Lock()
130-
w.collector.elementSet[key] = true
131-
if el.Tag != "" {
132-
w.collector.elements = append(w.collector.elements, el)
133-
}
134-
w.collector.mu.Unlock()
129+
key := s
130+
131+
s, tagName := w.insertStandinHTMLElement(s)
132+
el := parseHTMLElement(s)
133+
el.Tag = tagName
134+
if w.isPreFormatted(tagName) {
135+
w.inPreTag = tagName
135136
}
137+
138+
w.collector.mu.Lock()
139+
w.collector.elementSet[key] = true
140+
if el.Tag != "" {
141+
w.collector.elements = append(w.collector.elements, el)
142+
}
143+
w.collector.mu.Unlock()
144+
136145
}
137146
}
138147
}
139148

140149
return
141150
}
142151

152+
// No need to look inside these for HTML elements.
153+
func (c *cssClassCollectorWriter) isPreFormatted(s string) bool {
154+
return s == "pre" || s == "textarea" || s == "script"
155+
}
156+
143157
// The net/html parser does not handle single table elements as input, e.g. tbody.
144158
// We only care about the element/class/ids, so just store away the original tag name
145159
// and pretend it's a <div>.
@@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s
154168
return newv, strings.ToLower(tag)
155169
}
156170

157-
func (c *cssClassCollectorWriter) endCollecting(drop bool) {
171+
func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) {
172+
if !strings.HasPrefix(s, "</") {
173+
return "", false
174+
}
175+
s = strings.TrimPrefix(s, "</")
176+
s = strings.TrimSuffix(s, ">")
177+
return strings.ToLower(strings.TrimSpace(s)), true
178+
}
179+
180+
func (c *cssClassCollectorWriter) endCollecting() {
158181
c.isCollecting = false
159182
c.inQuote = false
160-
c.dropValue = drop
183+
161184
}
162185

163186
func (c *cssClassCollectorWriter) startCollecting() {
164187
c.isCollecting = true
165-
c.dropValue = false
188+
166189
}
167190

168191
func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {

publisher/htmlElementsCollector_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) {
8989

9090
{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
9191
{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
92-
// https://github.com/gohugoio/hugo/issues/7746
92+
// Issue #7746
9393
{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
94+
// Issue #7567
95+
{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
96+
{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
97+
{"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
9498
} {
9599
c.Run(test.name, func(c *qt.C) {
96100
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())

0 commit comments

Comments
 (0)