-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathnode.ts
87 lines (71 loc) · 2.58 KB
/
node.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
export * from './index.js';
import type { CheerioAPI, CheerioOptions } from './index.js';
import { load } from './index.js';
import { flatten as flattenOptions } from './options.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
// eslint-disable-next-line node/file-extension-in-import
import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream';
import DomHandler from 'domhandler';
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
import { DecodeStream } from 'encoding-sniffer';
import * as undici from 'undici';
import { type Writable, finished } from 'node:stream';
export function stringStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable {
const opts = flattenOptions(options);
if (opts && (opts.xmlMode || opts._useHtmlParser2)) {
const handler: DomHandler = new DomHandler(
(err) => cb(err, load(handler.root)),
opts
);
return new Htmlparser2Stream(handler, opts);
}
const stream = new Parse5Stream({ ...opts, treeAdapter: htmlparser2Adapter });
finished(stream, (err) => cb(err, load(stream.document)));
return stream;
}
export function decodeStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable {
// TODO: Set the encoding to UTF8 for XML mode
const decodeStream = new DecodeStream();
const loadStream = stringStream(options, cb);
decodeStream.pipe(loadStream);
return decodeStream;
}
// Get a document from a URL
export async function request(
// eslint-disable-next-line node/no-unsupported-features/node-builtins
url: string | URL,
options: CheerioOptions
): Promise<CheerioAPI> {
let undiciStream: Promise<undici.Dispatcher.StreamData> | undefined;
const promise = new Promise<CheerioAPI>((resolve, reject) => {
undiciStream = undici.stream(
url,
{
method: 'GET',
headers: {
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
},
},
(data) => {
// TODO Add support for handling status codes.
if (!data.statusCode) {
throw new Error(`Received ${data.statusCode}`);
}
// TODO: Forward the charset from the header to the decodeStream.
return decodeStream(options, (err, $) =>
err ? reject(err) : resolve($)
);
}
);
});
// Let's make sure the request is completed before returning the promise.
await undiciStream;
return promise;
}