Skip to content

Commit f10dc03

Browse files
authored
refactor(feeds): Move getFeed to domutils (#931)
We still have an export to keep backwards compatibility. This allows us to drop feed support & `domutils` in a future version of htmlparser2 (and point users to `domutils`).
1 parent e852205 commit f10dc03

File tree

3 files changed

+11
-243
lines changed

3 files changed

+11
-243
lines changed

package-lock.json

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"dependencies": {
5151
"domelementtype": "^2.0.1",
5252
"domhandler": "^4.0.0",
53-
"domutils": "^2.5.2",
53+
"domutils": "^2.8.0",
5454
"entities": "^3.0.1"
5555
},
5656
"devDependencies": {

src/FeedHandler.ts

Lines changed: 3 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,8 @@
1-
import DomHandler, { DomHandlerOptions, Node, Element } from "domhandler";
2-
import * as DomUtils from "domutils";
1+
import DomHandler, { DomHandlerOptions } from "domhandler";
2+
import { getFeed, Feed } from "domutils";
33
import { Parser, ParserOptions } from "./Parser";
44

5-
enum FeedItemMediaMedium {
6-
image,
7-
audio,
8-
video,
9-
document,
10-
executable,
11-
}
12-
13-
enum FeedItemMediaExpression {
14-
sample,
15-
full,
16-
nonstop,
17-
}
18-
19-
interface FeedItemMedia {
20-
url?: string;
21-
fileSize?: number;
22-
type?: string;
23-
medium: FeedItemMediaMedium | undefined;
24-
isDefault: boolean;
25-
expression?: FeedItemMediaExpression;
26-
bitrate?: number;
27-
framerate?: number;
28-
samplingrate?: number;
29-
channels?: number;
30-
duration?: number;
31-
height?: number;
32-
width?: number;
33-
lang?: string;
34-
}
35-
36-
interface FeedItem {
37-
id?: string;
38-
title?: string;
39-
link?: string;
40-
description?: string;
41-
pubDate?: Date;
42-
media?: FeedItemMedia[];
43-
}
44-
45-
interface Feed {
46-
type?: string;
47-
id?: string;
48-
title?: string;
49-
link?: string;
50-
description?: string;
51-
updated?: Date;
52-
author?: string;
53-
items?: FeedItem[];
54-
}
5+
export { getFeed };
556

567
/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */
578
export class FeedHandler extends DomHandler {
@@ -85,189 +36,6 @@ export class FeedHandler extends DomHandler {
8536
}
8637
}
8738

88-
/**
89-
* Get the feed object from the root of a DOM tree.
90-
*
91-
* @param dom - The DOM to to extract the feed from.
92-
* @returns The feed.
93-
*/
94-
export function getFeed(dom: Node[]): Feed | null {
95-
const feedRoot = getOneElement(isValidFeed, dom);
96-
97-
if (!feedRoot) return null;
98-
99-
const feed: Feed = {};
100-
101-
if (feedRoot.name === "feed") {
102-
const childs = feedRoot.children;
103-
feed.type = "atom";
104-
addConditionally(feed, "id", "id", childs);
105-
addConditionally(feed, "title", "title", childs);
106-
const href = getAttribute("href", getOneElement("link", childs));
107-
if (href) {
108-
feed.link = href;
109-
}
110-
addConditionally(feed, "description", "subtitle", childs);
111-
112-
const updated = fetch("updated", childs);
113-
if (updated) {
114-
feed.updated = new Date(updated);
115-
}
116-
117-
addConditionally(feed, "author", "email", childs, true);
118-
feed.items = getElements("entry", childs).map((item) => {
119-
const entry: FeedItem = {};
120-
const { children } = item;
121-
122-
addConditionally(entry, "id", "id", children);
123-
addConditionally(entry, "title", "title", children);
124-
125-
const href = getAttribute("href", getOneElement("link", children));
126-
if (href) {
127-
entry.link = href;
128-
}
129-
130-
const description =
131-
fetch("summary", children) || fetch("content", children);
132-
if (description) {
133-
entry.description = description;
134-
}
135-
136-
const pubDate = fetch("updated", children);
137-
if (pubDate) {
138-
entry.pubDate = new Date(pubDate);
139-
}
140-
141-
entry.media = getMediaElements(children);
142-
143-
return entry;
144-
});
145-
} else {
146-
const childs =
147-
getOneElement("channel", feedRoot.children)?.children ?? [];
148-
feed.type = feedRoot.name.substr(0, 3);
149-
feed.id = "";
150-
151-
addConditionally(feed, "title", "title", childs);
152-
addConditionally(feed, "link", "link", childs);
153-
addConditionally(feed, "description", "description", childs);
154-
155-
const updated = fetch("lastBuildDate", childs);
156-
if (updated) {
157-
feed.updated = new Date(updated);
158-
}
159-
160-
addConditionally(feed, "author", "managingEditor", childs, true);
161-
162-
feed.items = getElements("item", feedRoot.children).map(
163-
(item: Element) => {
164-
const entry: FeedItem = {};
165-
const { children } = item;
166-
addConditionally(entry, "id", "guid", children);
167-
addConditionally(entry, "title", "title", children);
168-
addConditionally(entry, "link", "link", children);
169-
addConditionally(entry, "description", "description", children);
170-
const pubDate = fetch("pubDate", children);
171-
if (pubDate) entry.pubDate = new Date(pubDate);
172-
entry.media = getMediaElements(children);
173-
return entry;
174-
}
175-
);
176-
}
177-
178-
return feed;
179-
}
180-
181-
function getMediaElements(where: Node | Node[]): FeedItemMedia[] {
182-
return getElements("media:content", where).map((elem) => {
183-
const media: FeedItemMedia = {
184-
medium: elem.attribs.medium as unknown as
185-
| FeedItemMediaMedium
186-
| undefined,
187-
isDefault: !!elem.attribs.isDefault,
188-
};
189-
190-
if (elem.attribs.url) {
191-
media.url = elem.attribs.url;
192-
}
193-
if (elem.attribs.fileSize) {
194-
media.fileSize = parseInt(elem.attribs.fileSize, 10);
195-
}
196-
if (elem.attribs.type) {
197-
media.type = elem.attribs.type;
198-
}
199-
if (elem.attribs.expression) {
200-
media.expression = elem.attribs
201-
.expression as unknown as FeedItemMediaExpression;
202-
}
203-
if (elem.attribs.bitrate) {
204-
media.bitrate = parseInt(elem.attribs.bitrate, 10);
205-
}
206-
if (elem.attribs.framerate) {
207-
media.framerate = parseInt(elem.attribs.framerate, 10);
208-
}
209-
if (elem.attribs.samplingrate) {
210-
media.samplingrate = parseInt(elem.attribs.samplingrate, 10);
211-
}
212-
if (elem.attribs.channels) {
213-
media.channels = parseInt(elem.attribs.channels, 10);
214-
}
215-
if (elem.attribs.duration) {
216-
media.duration = parseInt(elem.attribs.duration, 10);
217-
}
218-
if (elem.attribs.height) {
219-
media.height = parseInt(elem.attribs.height, 10);
220-
}
221-
if (elem.attribs.width) {
222-
media.width = parseInt(elem.attribs.width, 10);
223-
}
224-
if (elem.attribs.lang) {
225-
media.lang = elem.attribs.lang;
226-
}
227-
228-
return media;
229-
});
230-
}
231-
232-
function getElements(tagName: string, where: Node | Node[]) {
233-
return DomUtils.getElementsByTagName(tagName, where, true);
234-
}
235-
function getOneElement(
236-
tagName: string | ((name: string) => boolean),
237-
node: Node | Node[]
238-
): Element | null {
239-
return DomUtils.getElementsByTagName(tagName, node, true, 1)[0];
240-
}
241-
function fetch(tagName: string, where: Node | Node[], recurse = false): string {
242-
return DomUtils.textContent(
243-
DomUtils.getElementsByTagName(tagName, where, recurse, 1)
244-
).trim();
245-
}
246-
247-
function getAttribute(name: string, elem: Element | null): string | null {
248-
if (!elem) {
249-
return null;
250-
}
251-
252-
const { attribs } = elem;
253-
return attribs[name];
254-
}
255-
256-
function addConditionally<T>(
257-
obj: T,
258-
prop: keyof T,
259-
what: string,
260-
where: Node | Node[],
261-
recurse = false
262-
) {
263-
const tmp = fetch(what, where, recurse);
264-
if (tmp) obj[prop] = tmp as unknown as T[keyof T];
265-
}
266-
267-
function isValidFeed(value: string) {
268-
return value === "rss" || value === "feed" || value === "rdf:RDF";
269-
}
270-
27139
/**
27240
* Parse a feed.
27341
*

0 commit comments

Comments
 (0)