From 514c9c4e311e08ea43302f36568c2837da88cd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 10 Oct 2022 19:08:41 +0100 Subject: [PATCH 01/18] feat: Add batteries included methods for Node --- package-lock.json | 54 ++++++++++++++++++++++++++++++++------- package.json | 6 +++-- src/node.ts | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 src/node.ts diff --git a/package-lock.json b/package-lock.json index 476edae4be..3728ee3ad6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,9 +13,11 @@ "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.0.1", + "encoding-sniffer": "^0.0.0", "htmlparser2": "^8.0.1", "parse5": "^7.1.2", - "parse5-htmlparser2-tree-adapter": "^7.0.0" + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2" }, "devDependencies": { "@imgix/js-core": "^3.7.0", @@ -2671,6 +2673,18 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "dev": true }, + "node_modules/encoding-sniffer": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.0.tgz", + "integrity": "sha512-5JV4vyWTXIPhI9mnBrzBdwQ0R2ConTevpHznpSYzmEFG44rkMygNjwtixTDQEBbL43u/UwbolEZgv/N7RdYueg==", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^2.0.0" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, "node_modules/entities": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.4.0.tgz", @@ -3620,7 +3634,6 @@ "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" }, @@ -5889,6 +5902,17 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -6289,8 +6313,7 @@ "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "node_modules/saxes": { "version": "6.0.0", @@ -7095,7 +7118,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz", "integrity": "sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==", - "dev": true, "dependencies": { "iconv-lite": "0.6.3" }, @@ -9368,6 +9390,15 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "dev": true }, + "encoding-sniffer": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.0.tgz", + "integrity": "sha512-5JV4vyWTXIPhI9mnBrzBdwQ0R2ConTevpHznpSYzmEFG44rkMygNjwtixTDQEBbL43u/UwbolEZgv/N7RdYueg==", + "requires": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^2.0.0" + } + }, "entities": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.4.0.tgz", @@ -10049,7 +10080,6 @@ "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, "requires": { "safer-buffer": ">= 2.1.2 < 3.0.0" } @@ -11652,6 +11682,14 @@ "parse5": "^7.0.0" } }, + "parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "requires": { + "parse5": "^7.0.0" + } + }, "path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -11922,8 +11960,7 @@ "safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "saxes": { "version": "6.0.0", @@ -12504,7 +12541,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz", "integrity": "sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==", - "dev": true, "requires": { "iconv-lite": "0.6.3" } diff --git a/package.json b/package.json index c9768fb3ae..bb6a92b23b 100644 --- a/package.json +++ b/package.json @@ -52,9 +52,11 @@ "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.0.1", + "encoding-sniffer": "^0.0.0", "htmlparser2": "^8.0.1", "parse5": "^7.1.2", - "parse5-htmlparser2-tree-adapter": "^7.0.0" + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2" }, "devDependencies": { "@imgix/js-core": "^3.7.0", @@ -112,7 +114,7 @@ "lint-staged": { "*.js": [ "prettier --write", - "npm run test:lint -- --fix" + "npm run lint:es -- --fix" ], "*.{json,md,ts,yml}": [ "prettier --write" diff --git a/src/node.ts b/src/node.ts new file mode 100644 index 0000000000..1c5e44977a --- /dev/null +++ b/src/node.ts @@ -0,0 +1,65 @@ +export * from './index.js'; + +import type { CheerioAPI, CheerioOptions } from './index.js'; +import { load } from './index.js'; +import { flatten as flattenOptions } from './options.js'; +import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter'; + +// eslint-disable-next-line node/file-extension-in-import +import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream'; +import DomHandler from 'domhandler'; +import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; +import { DecodeStream } from 'encoding-sniffer'; +import { type Writable, finished } from 'node:stream'; + +export function stream( + options: CheerioOptions, + cb: (err: Error | null | undefined, $: CheerioAPI) => void +): Writable { + const opts = flattenOptions(options); + + if (opts && (opts.xmlMode || opts._useHtmlParser2)) { + const handler: DomHandler = new DomHandler( + (err) => cb(err, load(handler.root)), + opts + ); + + return new Htmlparser2Stream(handler, opts); + } + + const stream = new Parse5Stream({ ...opts, treeAdapter: htmlparser2Adapter }); + + finished(stream, (err) => cb(err, load(stream.document))); + + return stream; +} + +// Get a document from a URL +export async function request( + url: string | URL, + options: CheerioOptions +): Promise { + const opts = flattenOptions(options); + const str = stream(opts, (err, $) => { + if (err) { + throw err; + } + }); + + const res = await fetch(url, { + method: 'GET', + headers: { + 'user-agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', + }, + }); + + if (!res.ok) { + throw new Error(`${res.status} ${res.statusText}`); + } + + str.write(res.body); + str.end(); + + return stream; +} From ff4561bef9d7efd2fc867c33540121c1fa4ed778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Fri, 14 Oct 2022 10:21:33 +0100 Subject: [PATCH 02/18] Update node.ts --- src/node.ts | 62 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/src/node.ts b/src/node.ts index 1c5e44977a..9f6346a324 100644 --- a/src/node.ts +++ b/src/node.ts @@ -10,9 +10,10 @@ import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStr import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; import { DecodeStream } from 'encoding-sniffer'; +import * as undici from 'undici'; import { type Writable, finished } from 'node:stream'; -export function stream( +export function stringStream( options: CheerioOptions, cb: (err: Error | null | undefined, $: CheerioAPI) => void ): Writable { @@ -34,32 +35,53 @@ export function stream( return stream; } +export function decodeStream( + options: CheerioOptions, + cb: (err: Error | null | undefined, $: CheerioAPI) => void +): Writable { + // TODO: Set the encoding to UTF8 for XML mode + const decodeStream = new DecodeStream(); + const loadStream = stringStream(options, cb); + + decodeStream.pipe(loadStream); + + return decodeStream; +} + // Get a document from a URL export async function request( + // eslint-disable-next-line node/no-unsupported-features/node-builtins url: string | URL, options: CheerioOptions ): Promise { - const opts = flattenOptions(options); - const str = stream(opts, (err, $) => { - if (err) { - throw err; - } - }); + let undiciStream: Promise | undefined; - const res = await fetch(url, { - method: 'GET', - headers: { - 'user-agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', - }, - }); + const promise = new Promise((resolve, reject) => { + undiciStream = undici.stream( + url, + { + method: 'GET', + headers: { + 'user-agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', + }, + }, + (data) => { + // TODO Add support for handling status codes. + if (!data.statusCode) { + throw new Error(`Received ${data.statusCode}`); + } - if (!res.ok) { - throw new Error(`${res.status} ${res.statusText}`); - } + // TODO: Forward the charset from the header to the decodeStream. + return decodeStream(options, (err, $) => + err ? reject(err) : resolve($) + ); + } + ); + }); - str.write(res.body); - str.end(); + // Let's make sure the request is completed before returning the promise. + await undiciStream; - return stream; + return promise; } From 2779ed3d6689eabc50428d5df5aaf723eb556f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Wed, 2 Nov 2022 14:03:27 +0100 Subject: [PATCH 03/18] Start writing tests --- src/node.spec.ts | 87 ++++++++++++++++++++++++++++++++++++++++++++++++ src/node.ts | 38 +++++++++++++-------- 2 files changed, 112 insertions(+), 13 deletions(-) create mode 100644 src/node.spec.ts diff --git a/src/node.spec.ts b/src/node.spec.ts new file mode 100644 index 0000000000..65d64b84cd --- /dev/null +++ b/src/node.spec.ts @@ -0,0 +1,87 @@ +/* eslint-disable jest/no-done-callback */ +import * as cheerio from './node.js'; +import { Writable } from 'node:stream'; + +function noop() { + // Ignore +} + +const TEST_HTML = '

Hello World

'; +const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le'); +const TEST_HTML_UTF16_BOM = Buffer.from([ + // UTF16-LE BOM + 0xff, + 0xfe, + ...Array.from(TEST_HTML_UTF16), +]); + +describe('Node API', () => { + describe('stringStream', () => { + it('should use parse5 by default', (cb) => { + const stream = cheerio.stringStream({}, (err, $) => { + expect(err).toBeUndefined(); + + expect($.html()).toBe( + `${TEST_HTML}` + ); + + cb(); + }); + expect(stream).toBeInstanceOf(Writable); + + stream.end(TEST_HTML); + }); + + it('should error from parse5 on buffer', () => { + const stream = cheerio.stringStream({}, noop); + expect(stream).toBeInstanceOf(Writable); + + expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow( + 'Parser can work only with string streams.' + ); + }); + + it('should use htmlparser2 for XML', (cb) => { + const stream = cheerio.stringStream({ xmlMode: true }, (err, $) => { + expect(err).toBeNull(); + + expect($.html()).toBe(TEST_HTML); + + cb(); + }); + expect(stream).toBeInstanceOf(Writable); + + stream.end(TEST_HTML); + }); + }); + + describe('decodeStream', () => { + it('should use parse5 by default', (cb) => { + const stream = cheerio.decodeStream({}, (err, $) => { + expect(err).toBeUndefined(); + + expect($.html()).toBe( + `${TEST_HTML}` + ); + + cb(); + }); + expect(stream).toBeInstanceOf(Writable); + + stream.end(TEST_HTML_UTF16_BOM); + }); + + it('should use htmlparser2 for XML', (cb) => { + const stream = cheerio.decodeStream({ xmlMode: true }, (err, $) => { + expect(err).toBeNull(); + + expect($.html()).toBe(TEST_HTML); + + cb(); + }); + expect(stream).toBeInstanceOf(Writable); + + stream.end(TEST_HTML_UTF16_BOM); + }); + }); +}); diff --git a/src/node.ts b/src/node.ts index 9f6346a324..3f27ef987f 100644 --- a/src/node.ts +++ b/src/node.ts @@ -2,46 +2,58 @@ export * from './index.js'; import type { CheerioAPI, CheerioOptions } from './index.js'; import { load } from './index.js'; -import { flatten as flattenOptions } from './options.js'; +import { flatten as flattenOptions, InternalOptions } from './options.js'; import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter'; // eslint-disable-next-line node/file-extension-in-import import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream'; import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; -import { DecodeStream } from 'encoding-sniffer'; +import { DecodeStream, type SnifferOptions } from 'encoding-sniffer'; import * as undici from 'undici'; import { type Writable, finished } from 'node:stream'; -export function stringStream( - options: CheerioOptions, +function _stringStream( + options: InternalOptions | undefined, cb: (err: Error | null | undefined, $: CheerioAPI) => void ): Writable { - const opts = flattenOptions(options); - - if (opts && (opts.xmlMode || opts._useHtmlParser2)) { + if (options && (options.xmlMode || options._useHtmlParser2)) { const handler: DomHandler = new DomHandler( (err) => cb(err, load(handler.root)), - opts + options ); - return new Htmlparser2Stream(handler, opts); + return new Htmlparser2Stream(handler, options); } - const stream = new Parse5Stream({ ...opts, treeAdapter: htmlparser2Adapter }); + const stream = new Parse5Stream({ + ...options, + treeAdapter: htmlparser2Adapter, + }); finished(stream, (err) => cb(err, load(stream.document))); return stream; } +export function stringStream( + options: CheerioOptions, + cb: (err: Error | null | undefined, $: CheerioAPI) => void +): Writable { + return _stringStream(flattenOptions(options), cb); +} + export function decodeStream( options: CheerioOptions, cb: (err: Error | null | undefined, $: CheerioAPI) => void ): Writable { - // TODO: Set the encoding to UTF8 for XML mode - const decodeStream = new DecodeStream(); - const loadStream = stringStream(options, cb); + const opts = flattenOptions(options); + const snifferOpts: SnifferOptions = { + // Set the encoding to UTF8 for XML mode + defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252', + }; + const decodeStream = new DecodeStream(snifferOpts); + const loadStream = _stringStream(opts, cb); decodeStream.pipe(loadStream); From 5c4bc880646dd4cbf6a2a554d262b03a7264fe9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:36:49 +0000 Subject: [PATCH 04/18] Bump encoding-sniffer --- package-lock.json | 14 +++++++------- package.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3728ee3ad6..da6d2f1063 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,7 @@ "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.0.1", - "encoding-sniffer": "^0.0.0", + "encoding-sniffer": "^0.0.1", "htmlparser2": "^8.0.1", "parse5": "^7.1.2", "parse5-htmlparser2-tree-adapter": "^7.0.0", @@ -2674,9 +2674,9 @@ "dev": true }, "node_modules/encoding-sniffer": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.0.tgz", - "integrity": "sha512-5JV4vyWTXIPhI9mnBrzBdwQ0R2ConTevpHznpSYzmEFG44rkMygNjwtixTDQEBbL43u/UwbolEZgv/N7RdYueg==", + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.1.tgz", + "integrity": "sha512-8BhuvYsCaMxDgyQVZgnoXyoSD/jS8Ipj2Fxti+lhA0fAT14aU7Y7ofwc8npIz0x49ikH188Mdi6K7MlUbZOqmg==", "dependencies": { "iconv-lite": "^0.6.3", "whatwg-encoding": "^2.0.0" @@ -9391,9 +9391,9 @@ "dev": true }, "encoding-sniffer": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.0.tgz", - "integrity": "sha512-5JV4vyWTXIPhI9mnBrzBdwQ0R2ConTevpHznpSYzmEFG44rkMygNjwtixTDQEBbL43u/UwbolEZgv/N7RdYueg==", + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.0.1.tgz", + "integrity": "sha512-8BhuvYsCaMxDgyQVZgnoXyoSD/jS8Ipj2Fxti+lhA0fAT14aU7Y7ofwc8npIz0x49ikH188Mdi6K7MlUbZOqmg==", "requires": { "iconv-lite": "^0.6.3", "whatwg-encoding": "^2.0.0" diff --git a/package.json b/package.json index bb6a92b23b..dcd303cfd6 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.0.1", - "encoding-sniffer": "^0.0.0", + "encoding-sniffer": "^0.0.1", "htmlparser2": "^8.0.1", "parse5": "^7.1.2", "parse5-htmlparser2-tree-adapter": "^7.0.0", From 00499b42e21c7f59dd93a90b16cc4073f92024c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 7 Nov 2022 16:45:29 +0000 Subject: [PATCH 05/18] Handle mime types, work on getting streams going --- package-lock.json | 20 +++++++++--- package.json | 4 ++- src/node.ts | 80 +++++++++++++++++++++++++++++++---------------- tsconfig.json | 2 +- 4 files changed, 73 insertions(+), 33 deletions(-) diff --git a/package-lock.json b/package-lock.json index da6d2f1063..1401d10a5f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,8 @@ "htmlparser2": "^8.0.1", "parse5": "^7.1.2", "parse5-htmlparser2-tree-adapter": "^7.0.0", - "parse5-parser-stream": "^7.1.2" + "parse5-parser-stream": "^7.1.2", + "whatwg-mimetype": "^3.0.0" }, "devDependencies": { "@imgix/js-core": "^3.7.0", @@ -25,6 +26,7 @@ "@types/benchmark": "^2.1.2", "@types/jest": "^29.2.3", "@types/node": "^18.11.9", + "@types/whatwg-mimetype": "^3.0.0", "@typescript-eslint/eslint-plugin": "^5.44.0", "@typescript-eslint/parser": "^5.44.0", "benchmark": "^2.1.4", @@ -1479,6 +1481,12 @@ "integrity": "sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==", "dev": true }, + "node_modules/@types/whatwg-mimetype": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", + "integrity": "sha512-xHFOhd41VpUR6Y0k8ZinlyFv5cyhC/r2zghJgWWN8oNxqNo45Nf0qCBInJsFeifLeoHcIF4voEfap4A2GYHWkw==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.13", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.13.tgz", @@ -7129,7 +7137,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==", - "dev": true, "engines": { "node": ">=12" } @@ -8535,6 +8542,12 @@ "integrity": "sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==", "dev": true }, + "@types/whatwg-mimetype": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", + "integrity": "sha512-xHFOhd41VpUR6Y0k8ZinlyFv5cyhC/r2zghJgWWN8oNxqNo45Nf0qCBInJsFeifLeoHcIF4voEfap4A2GYHWkw==", + "dev": true + }, "@types/yargs": { "version": "17.0.13", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.13.tgz", @@ -12548,8 +12561,7 @@ "whatwg-mimetype": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", - "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==", - "dev": true + "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==" }, "whatwg-url": { "version": "11.0.0", diff --git a/package.json b/package.json index dcd303cfd6..797ebfc993 100644 --- a/package.json +++ b/package.json @@ -56,7 +56,8 @@ "htmlparser2": "^8.0.1", "parse5": "^7.1.2", "parse5-htmlparser2-tree-adapter": "^7.0.0", - "parse5-parser-stream": "^7.1.2" + "parse5-parser-stream": "^7.1.2", + "whatwg-mimetype": "^3.0.0" }, "devDependencies": { "@imgix/js-core": "^3.7.0", @@ -64,6 +65,7 @@ "@types/benchmark": "^2.1.2", "@types/jest": "^29.2.3", "@types/node": "^18.11.9", + "@types/whatwg-mimetype": "^3.0.0", "@typescript-eslint/eslint-plugin": "^5.44.0", "@typescript-eslint/parser": "^5.44.0", "benchmark": "^2.1.4", diff --git a/src/node.ts b/src/node.ts index 3f27ef987f..9af7392a97 100644 --- a/src/node.ts +++ b/src/node.ts @@ -11,6 +11,7 @@ import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; import { DecodeStream, type SnifferOptions } from 'encoding-sniffer'; import * as undici from 'undici'; +import MIMEType from 'whatwg-mimetype'; import { type Writable, finished } from 'node:stream'; function _stringStream( @@ -43,16 +44,21 @@ export function stringStream( return _stringStream(flattenOptions(options), cb); } +export interface DecodeStreamOptions extends CheerioOptions { + encoding?: SnifferOptions; +} + export function decodeStream( - options: CheerioOptions, + options: DecodeStreamOptions, cb: (err: Error | null | undefined, $: CheerioAPI) => void ): Writable { - const opts = flattenOptions(options); - const snifferOpts: SnifferOptions = { - // Set the encoding to UTF8 for XML mode - defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252', - }; - const decodeStream = new DecodeStream(snifferOpts); + const { encoding = {}, ...cheerioOptions } = options; + const opts = flattenOptions(cheerioOptions); + + // Set the encoding to UTF8 for XML mode + encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252'; + + const decodeStream = new DecodeStream(encoding); const loadStream = _stringStream(opts, cb); decodeStream.pipe(loadStream); @@ -60,36 +66,56 @@ export function decodeStream( return decodeStream; } +interface CheerioRequestOptions extends DecodeStreamOptions { + requestOptions?: Parameters[1]; +} + // Get a document from a URL export async function request( // eslint-disable-next-line node/no-unsupported-features/node-builtins url: string | URL, - options: CheerioOptions + options: CheerioRequestOptions = {} ): Promise { + const { + requestOptions = { method: 'GET' }, + encoding = {}, + ...cheerioOptions + } = options; let undiciStream: Promise | undefined; + requestOptions.method ??= 'GET'; + const promise = new Promise((resolve, reject) => { - undiciStream = undici.stream( - url, - { - method: 'GET', - headers: { - 'user-agent': - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36', - }, - }, - (data) => { - // TODO Add support for handling status codes. - if (!data.statusCode) { - throw new Error(`Received ${data.statusCode}`); - } - - // TODO: Forward the charset from the header to the decodeStream. - return decodeStream(options, (err, $) => - err ? reject(err) : resolve($) + undiciStream = undici.stream(url, requestOptions, (res) => { + // TODO Add support for handling status codes, such as redirects. + if (!res.statusCode) { + throw new Error(`Received ${res.statusCode}`); + } + + const contentType = res.headers['content-type']; + const mimeType = new MIMEType(contentType ?? 'text/html'); + + if (!mimeType.isHTML() && !mimeType.isXML()) { + throw new RangeError( + `The content-type "${contentType}" is neither HTML nor XML.` ); } - ); + + // Forward the charset from the header to the decodeStream. + encoding.transportLayerEncodingLabel ??= + mimeType.parameters.get('charset'); + + const opts = { + ...flattenOptions(cheerioOptions), + encoding, + // Set XML mode based on the MIME type. + xmlMode: mimeType.isXML(), + // TODO: Set the baseURL based on the final URL. + baseURL: (res.context as any)?.url, + }; + + return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($))); + }); }); // Let's make sure the request is completed before returning the promise. diff --git a/tsconfig.json b/tsconfig.json index 42a229c78b..3c601600f0 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -25,7 +25,7 @@ /* Module Resolution Options */ "esModuleInterop": true, - "moduleResolution": "node", + "moduleResolution": "node16", "resolveJsonModule": true }, "include": ["src"], From 944b201ad519be7f59a5eb976193d43e80467248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 7 Nov 2022 23:16:58 +0000 Subject: [PATCH 06/18] Set the `baseURL` --- src/node.ts | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/node.ts b/src/node.ts index 9af7392a97..5cf1141279 100644 --- a/src/node.ts +++ b/src/node.ts @@ -55,7 +55,7 @@ export function decodeStream( const { encoding = {}, ...cheerioOptions } = options; const opts = flattenOptions(cheerioOptions); - // Set the encoding to UTF8 for XML mode + // Set the default encoding to UTF-8 for XML mode encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252'; const decodeStream = new DecodeStream(encoding); @@ -66,10 +66,19 @@ export function decodeStream( return decodeStream; } -interface CheerioRequestOptions extends DecodeStreamOptions { - requestOptions?: Parameters[1]; +type UndiciStreamOptions = Parameters[1]; + +export interface CheerioRequestOptions extends DecodeStreamOptions { + /** The options passed to `undici`'s `stream` method. */ + requestOptions?: UndiciStreamOptions; } +const defaultRequestOptions: UndiciStreamOptions = { + method: 'GET', + maxRedirections: 5, + throwOnError: true, +}; + // Get a document from a URL export async function request( // eslint-disable-next-line node/no-unsupported-features/node-builtins @@ -77,7 +86,7 @@ export async function request( options: CheerioRequestOptions = {} ): Promise { const { - requestOptions = { method: 'GET' }, + requestOptions = defaultRequestOptions, encoding = {}, ...cheerioOptions } = options; @@ -87,11 +96,6 @@ export async function request( const promise = new Promise((resolve, reject) => { undiciStream = undici.stream(url, requestOptions, (res) => { - // TODO Add support for handling status codes, such as redirects. - if (!res.statusCode) { - throw new Error(`Received ${res.statusCode}`); - } - const contentType = res.headers['content-type']; const mimeType = new MIMEType(contentType ?? 'text/html'); @@ -102,16 +106,21 @@ export async function request( } // Forward the charset from the header to the decodeStream. - encoding.transportLayerEncodingLabel ??= - mimeType.parameters.get('charset'); + encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset'); + + /* + * If we allow redirects, we will have entries in the history. + * The last entry will be the final URL. + */ + const history = (res.context as any)?.history; const opts = { ...flattenOptions(cheerioOptions), encoding, // Set XML mode based on the MIME type. xmlMode: mimeType.isXML(), - // TODO: Set the baseURL based on the final URL. - baseURL: (res.context as any)?.url, + // Set the `baseURL` to the final URL. + baseURL: history ? history[history.length - 1] : url, }; return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($))); From 8b4a3eca058d52972a80fb0c6d128d1e5b092c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 7 Nov 2022 23:23:47 +0000 Subject: [PATCH 07/18] Rename `request` to `fromURL` --- src/node.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node.ts b/src/node.ts index 5cf1141279..507e9cbc1a 100644 --- a/src/node.ts +++ b/src/node.ts @@ -80,7 +80,7 @@ const defaultRequestOptions: UndiciStreamOptions = { }; // Get a document from a URL -export async function request( +export async function fromURL( // eslint-disable-next-line node/no-unsupported-features/node-builtins url: string | URL, options: CheerioRequestOptions = {} From 61ba1a4a561cf02c7c2c4c41e7ecef39309a405e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 8 Nov 2022 10:42:30 +0000 Subject: [PATCH 08/18] Add `fromURL` tests --- src/node.spec.ts | 143 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 46 deletions(-) diff --git a/src/node.spec.ts b/src/node.spec.ts index 65d64b84cd..b59fb35673 100644 --- a/src/node.spec.ts +++ b/src/node.spec.ts @@ -1,6 +1,7 @@ /* eslint-disable jest/no-done-callback */ import * as cheerio from './node.js'; import { Writable } from 'node:stream'; +import { createServer } from 'node:http'; function noop() { // Ignore @@ -15,73 +16,123 @@ const TEST_HTML_UTF16_BOM = Buffer.from([ ...Array.from(TEST_HTML_UTF16), ]); -describe('Node API', () => { - describe('stringStream', () => { - it('should use parse5 by default', (cb) => { - const stream = cheerio.stringStream({}, (err, $) => { - expect(err).toBeUndefined(); +describe('stringStream', () => { + it('should use parse5 by default', (cb) => { + const stream = cheerio.stringStream({}, (err, $) => { + expect(err).toBeUndefined(); - expect($.html()).toBe( - `${TEST_HTML}` - ); + expect($.html()).toBe( + `${TEST_HTML}` + ); + + cb(); + }); + expect(stream).toBeInstanceOf(Writable); - cb(); - }); - expect(stream).toBeInstanceOf(Writable); + stream.end(TEST_HTML); + }); + + it('should error from parse5 on buffer', () => { + const stream = cheerio.stringStream({}, noop); + expect(stream).toBeInstanceOf(Writable); + + expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow( + 'Parser can work only with string streams.' + ); + }); - stream.end(TEST_HTML); + it('should use htmlparser2 for XML', (cb) => { + const stream = cheerio.stringStream({ xmlMode: true }, (err, $) => { + expect(err).toBeNull(); + + expect($.html()).toBe(TEST_HTML); + + cb(); }); + expect(stream).toBeInstanceOf(Writable); + + stream.end(TEST_HTML); + }); +}); - it('should error from parse5 on buffer', () => { - const stream = cheerio.stringStream({}, noop); - expect(stream).toBeInstanceOf(Writable); +describe('decodeStream', () => { + it('should use parse5 by default', (cb) => { + const stream = cheerio.decodeStream({}, (err, $) => { + expect(err).toBeUndefined(); - expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow( - 'Parser can work only with string streams.' + expect($.html()).toBe( + `${TEST_HTML}` ); + + cb(); }); + expect(stream).toBeInstanceOf(Writable); - it('should use htmlparser2 for XML', (cb) => { - const stream = cheerio.stringStream({ xmlMode: true }, (err, $) => { - expect(err).toBeNull(); + stream.end(TEST_HTML_UTF16_BOM); + }); - expect($.html()).toBe(TEST_HTML); + it('should use htmlparser2 for XML', (cb) => { + const stream = cheerio.decodeStream({ xmlMode: true }, (err, $) => { + expect(err).toBeNull(); - cb(); - }); - expect(stream).toBeInstanceOf(Writable); + expect($.html()).toBe(TEST_HTML); - stream.end(TEST_HTML); + cb(); }); - }); + expect(stream).toBeInstanceOf(Writable); - describe('decodeStream', () => { - it('should use parse5 by default', (cb) => { - const stream = cheerio.decodeStream({}, (err, $) => { - expect(err).toBeUndefined(); + stream.end(TEST_HTML_UTF16_BOM); + }); +}); - expect($.html()).toBe( - `${TEST_HTML}` - ); +function createTestServer( + contentType: string, + body: string | Buffer +): Promise { + return new Promise((resolve, reject) => { + const server = createServer((_req, res) => { + res.writeHead(200, { 'Content-Type': contentType }); + res.end(body); + }); - cb(); - }); - expect(stream).toBeInstanceOf(Writable); + server.listen(0, () => { + const address = server.address(); - stream.end(TEST_HTML_UTF16_BOM); + if (typeof address === 'string' || address === null) { + reject(new Error('Failed to get port')); + } else { + resolve(address.port); + } }); + }); +} - it('should use htmlparser2 for XML', (cb) => { - const stream = cheerio.decodeStream({ xmlMode: true }, (err, $) => { - expect(err).toBeNull(); +describe('fromURL', () => { + it('should fetch UTF-8 HTML', async () => { + const port = await createTestServer('text/html', TEST_HTML); + const $ = await cheerio.fromURL(`http://localhost:${port}`); - expect($.html()).toBe(TEST_HTML); + expect($.html()).toBe( + `${TEST_HTML}` + ); + }); - cb(); - }); - expect(stream).toBeInstanceOf(Writable); + it('should fetch UTF-16 HTML', async () => { + const port = await createTestServer( + 'text/html; charset=utf-16le', + TEST_HTML_UTF16 + ); + const $ = await cheerio.fromURL(`http://localhost:${port}`); - stream.end(TEST_HTML_UTF16_BOM); - }); + expect($.html()).toBe( + `${TEST_HTML}` + ); + }); + + it('should parse XML based on Content-Type', async () => { + const port = await createTestServer('text/xml', TEST_HTML); + const $ = await cheerio.fromURL(`http://localhost:${port}`); + + expect($.html()).toBe(TEST_HTML); }); }); From 357cf706f7bcb78e7095a3a1d2514a93a13a5f4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Wed, 16 Nov 2022 14:10:29 +0000 Subject: [PATCH 09/18] Add `accept` header --- src/node.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/node.ts b/src/node.ts index 507e9cbc1a..1765e8d3c8 100644 --- a/src/node.ts +++ b/src/node.ts @@ -75,8 +75,14 @@ export interface CheerioRequestOptions extends DecodeStreamOptions { const defaultRequestOptions: UndiciStreamOptions = { method: 'GET', + // Allow redirects by default maxRedirections: 5, + // NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753 throwOnError: true, + // Set an Accept header + headers: { + accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + }, }; // Get a document from a URL @@ -92,7 +98,8 @@ export async function fromURL( } = options; let undiciStream: Promise | undefined; - requestOptions.method ??= 'GET'; + // Add headers if none were supplied. + requestOptions.headers ??= defaultRequestOptions.headers; const promise = new Promise((resolve, reject) => { undiciStream = undici.stream(url, requestOptions, (res) => { @@ -115,7 +122,7 @@ export async function fromURL( const history = (res.context as any)?.history; const opts = { - ...flattenOptions(cheerioOptions), + ...cheerioOptions, encoding, // Set XML mode based on the MIME type. xmlMode: mimeType.isXML(), From e136833617f6dcc69bb4416c01654906923d6359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 21 Nov 2022 18:17:58 +0000 Subject: [PATCH 10/18] Add `loadBuffer` --- src/node.spec.ts | 18 ++++++++++++++++++ src/node.ts | 27 ++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/node.spec.ts b/src/node.spec.ts index b59fb35673..8e65553a01 100644 --- a/src/node.spec.ts +++ b/src/node.spec.ts @@ -16,6 +16,24 @@ const TEST_HTML_UTF16_BOM = Buffer.from([ ...Array.from(TEST_HTML_UTF16), ]); +describe('loadBuffer', () => { + it('should parse UTF-8 HTML', () => { + const $ = cheerio.loadBuffer(Buffer.from(TEST_HTML)); + + expect($.html()).toBe( + `${TEST_HTML}` + ); + }); + + it('should parse UTF-16 HTML', () => { + const $ = cheerio.loadBuffer(TEST_HTML_UTF16_BOM); + + expect($.html()).toBe( + `${TEST_HTML}` + ); + }); +}); + describe('stringStream', () => { it('should use parse5 by default', (cb) => { const stream = cheerio.stringStream({}, (err, $) => { diff --git a/src/node.ts b/src/node.ts index 1765e8d3c8..0d8b941857 100644 --- a/src/node.ts +++ b/src/node.ts @@ -9,11 +9,36 @@ import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter'; import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream'; import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; -import { DecodeStream, type SnifferOptions } from 'encoding-sniffer'; +import { + decodeBuffer, + DecodeStream, + type SnifferOptions, +} from 'encoding-sniffer'; import * as undici from 'undici'; import MIMEType from 'whatwg-mimetype'; import { type Writable, finished } from 'node:stream'; +/** + * Sniffs the encoding of a buffer, then creates a querying function bound to a + * document created from the buffer. + * + * @param buffer - The buffer to sniff the encoding of. + * @param options - The options to pass to Cheerio. + * @returns The loaded document. + */ +export function loadBuffer( + buffer: Buffer, + options: DecodeStreamOptions = {} +): CheerioAPI { + const opts = flattenOptions(options); + const str = decodeBuffer(buffer, { + defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252', + ...options.encoding, + }); + + return load(str, opts); +} + function _stringStream( options: InternalOptions | undefined, cb: (err: Error | null | undefined, $: CheerioAPI) => void From 81ed6aee2eb3d874c28c3562b4fdf117376385d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 21 Nov 2022 19:29:46 +0000 Subject: [PATCH 11/18] Add Node exports by default for Node ESM --- package.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 797ebfc993..a7db4d83c3 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,10 @@ "exports": { ".": { "require": "./lib/index.js", - "import": "./lib/esm/index.js" + "import": { + "node": "./lib/esm/node.js", + "default": "./lib/esm/index.js" + } }, "./lib/slim": { "require": "./lib/slim.js", @@ -39,6 +42,10 @@ "./lib/utils": { "require": "./lib/utils.js", "import": "./lib/esm/utils.js" + }, + "./lib/node": { + "require": "./lib/node.js", + "import": "./lib/esm/node.js" } }, "files": [ From 58cd3a1bc99a9e3c85be017f9a6db09a77c1bf1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 21 Nov 2022 19:34:58 +0000 Subject: [PATCH 12/18] Close servers after tests --- src/node.spec.ts | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/node.spec.ts b/src/node.spec.ts index 8e65553a01..f56c6599c2 100644 --- a/src/node.spec.ts +++ b/src/node.spec.ts @@ -1,7 +1,7 @@ /* eslint-disable jest/no-done-callback */ import * as cheerio from './node.js'; import { Writable } from 'node:stream'; -import { createServer } from 'node:http'; +import { createServer, type Server } from 'node:http'; function noop() { // Ignore @@ -103,29 +103,36 @@ describe('decodeStream', () => { }); }); -function createTestServer( - contentType: string, - body: string | Buffer -): Promise { - return new Promise((resolve, reject) => { - const server = createServer((_req, res) => { - res.writeHead(200, { 'Content-Type': contentType }); - res.end(body); +describe('fromURL', () => { + let server: Server | undefined; + + function createTestServer( + contentType: string, + body: string | Buffer + ): Promise { + return new Promise((resolve, reject) => { + server = createServer((_req, res) => { + res.writeHead(200, { 'Content-Type': contentType }); + res.end(body); + }); + + server.listen(0, () => { + const address = server?.address(); + + if (typeof address === 'string' || address == null) { + reject(new Error('Failed to get port')); + } else { + resolve(address.port); + } + }); }); + } - server.listen(0, () => { - const address = server.address(); - - if (typeof address === 'string' || address === null) { - reject(new Error('Failed to get port')); - } else { - resolve(address.port); - } - }); + afterEach((cb) => { + server?.close(cb); + server = undefined; }); -} -describe('fromURL', () => { it('should fetch UTF-8 HTML', async () => { const port = await createTestServer('text/html', TEST_HTML); const $ = await cheerio.fromURL(`http://localhost:${port}`); From 1b81b163316e151d8d6988496b56e895dd3a509f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 11:30:23 +0000 Subject: [PATCH 13/18] Allow inferred request options to be overwritten --- src/node.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/node.ts b/src/node.ts index 0d8b941857..b34bc9620c 100644 --- a/src/node.ts +++ b/src/node.ts @@ -1,11 +1,12 @@ export * from './index.js'; +/* eslint-disable node/no-unsupported-features/node-builtins, node/file-extension-in-import */ + import type { CheerioAPI, CheerioOptions } from './index.js'; import { load } from './index.js'; import { flatten as flattenOptions, InternalOptions } from './options.js'; import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter'; -// eslint-disable-next-line node/file-extension-in-import import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream'; import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream'; @@ -112,7 +113,6 @@ const defaultRequestOptions: UndiciStreamOptions = { // Get a document from a URL export async function fromURL( - // eslint-disable-next-line node/no-unsupported-features/node-builtins url: string | URL, options: CheerioRequestOptions = {} ): Promise { @@ -144,15 +144,21 @@ export async function fromURL( * If we allow redirects, we will have entries in the history. * The last entry will be the final URL. */ - const history = (res.context as any)?.history; + const history = ( + res.context as + | { + history?: URL[]; + } + | undefined + )?.history; const opts = { - ...cheerioOptions, encoding, // Set XML mode based on the MIME type. xmlMode: mimeType.isXML(), // Set the `baseURL` to the final URL. baseURL: history ? history[history.length - 1] : url, + ...cheerioOptions, }; return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($))); From 5cb5cfdb1f1835e538539dc0c1620f61629c88eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:19:51 +0000 Subject: [PATCH 14/18] Add documentation comments --- src/node.ts | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/node.ts b/src/node.ts index b34bc9620c..ad458a0d96 100644 --- a/src/node.ts +++ b/src/node.ts @@ -63,6 +63,16 @@ function _stringStream( return stream; } +/** + * Creates a stream that parses a sequence of strings into a document. + * + * The stream is a `Writable` stream that accepts strings. When the stream is + * finished, the callback is called with the loaded document. + * + * @param options - The options to pass to Cheerio. + * @param cb - The callback to call when the stream is finished. + * @returns The writable stream. + */ export function stringStream( options: CheerioOptions, cb: (err: Error | null | undefined, $: CheerioAPI) => void @@ -74,6 +84,16 @@ export interface DecodeStreamOptions extends CheerioOptions { encoding?: SnifferOptions; } +/** + * Parses a stream of buffers into a document. + * + * The stream is a `Writable` stream that accepts buffers. When the stream is + * finished, the callback is called with the loaded document. + * + * @param options - The options to pass to Cheerio. + * @param cb - The callback to call when the stream is finished. + * @returns The writable stream. + */ export function decodeStream( options: DecodeStreamOptions, cb: (err: Error | null | undefined, $: CheerioAPI) => void @@ -111,7 +131,15 @@ const defaultRequestOptions: UndiciStreamOptions = { }, }; -// Get a document from a URL +/** + * `fromURL` loads a document from a URL. + * + * By default, redirects are allowed and non-2xx responses are rejected. + * + * @param url - The URL to load the document from. + * @param options - The options to pass to Cheerio. + * @returns The loaded document. + */ export async function fromURL( url: string | URL, options: CheerioRequestOptions = {} From 73c174f18c6c59b4c738e1d3d0f3a4a8c67f84e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:23:00 +0000 Subject: [PATCH 15/18] Add jsdom types for benchmarks --- benchmark/suite.ts | 1 - package-lock.json | 334 +++++++++++++++++++++++++++++++++++++++++++++ package.json | 1 + 3 files changed, 335 insertions(+), 1 deletion(-) diff --git a/benchmark/suite.ts b/benchmark/suite.ts index 8a8db68caf..31d3c0421b 100644 --- a/benchmark/suite.ts +++ b/benchmark/suite.ts @@ -2,7 +2,6 @@ import fs from 'fs'; import path from 'path'; import { Suite, Event } from 'benchmark'; -// @ts-expect-error `jsdom` types currently collide with `parse5` types. import { JSDOM } from 'jsdom'; import { Script } from 'vm'; import cheerio from '../lib/index.js'; diff --git a/package-lock.json b/package-lock.json index 1401d10a5f..27f6beee25 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,6 +25,7 @@ "@octokit/graphql": "^5.0.4", "@types/benchmark": "^2.1.2", "@types/jest": "^29.2.3", + "@types/jsdom": "^20.0.1", "@types/node": "^18.11.9", "@types/whatwg-mimetype": "^3.0.0", "@typescript-eslint/eslint-plugin": "^5.44.0", @@ -1430,6 +1431,185 @@ "pretty-format": "^29.0.0" } }, + "node_modules/@types/jest/node_modules/@jest/expect-utils": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.3.1.tgz", + "integrity": "sha512-wlrznINZI5sMjwvUoLVk617ll/UYfGIZNxmbU+Pa7wmkL4vYzhV9R2pwVqUh4NWWuLQWkI8+8mOkxs//prKQ3g==", + "dev": true, + "dependencies": { + "jest-get-type": "^29.2.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/@jest/schemas": { + "version": "29.0.0", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.0.0.tgz", + "integrity": "sha512-3Ab5HgYIIAnS0HjqJHQYZS+zXc4tUmTmBH3z83ajI6afXp8X3ZtdLX+nXx+I7LNkJD7uN9LAVhgnjDgZa2z0kA==", + "dev": true, + "dependencies": { + "@sinclair/typebox": "^0.24.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/@jest/types": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.3.1.tgz", + "integrity": "sha512-d0S0jmmTpjnhCmNpApgX3jrUZgZ22ivKJRvL2lli5hpCRoNnp1f85r2/wpKfXuYu8E7Jjh1hGfhPyup1NM5AmA==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.0.0", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@types/jest/node_modules/diff-sequences": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.3.1.tgz", + "integrity": "sha512-hlM3QR272NXCi4pq+N4Kok4kOp6EsgOM3ZSpJI7Da3UAs+Ttsi8MRmB6trM/lhyzUxGfOgnpkHtgqm5Q/CTcfQ==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/expect": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/expect/-/expect-29.3.1.tgz", + "integrity": "sha512-gGb1yTgU30Q0O/tQq+z30KBWv24ApkMgFUpvKBkyLUBL68Wv8dHdJxTBZFl/iT8K/bqDHvUYRH6IIN3rToopPA==", + "dev": true, + "dependencies": { + "@jest/expect-utils": "^29.3.1", + "jest-get-type": "^29.2.0", + "jest-matcher-utils": "^29.3.1", + "jest-message-util": "^29.3.1", + "jest-util": "^29.3.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/jest-diff": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.3.1.tgz", + "integrity": "sha512-vU8vyiO7568tmin2lA3r2DP8oRvzhvRcD4DjpXc6uGveQodyk7CKLhQlCSiwgx3g0pFaE88/KLZ0yaTWMc4Uiw==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "diff-sequences": "^29.3.1", + "jest-get-type": "^29.2.0", + "pretty-format": "^29.3.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/jest-get-type": { + "version": "29.2.0", + "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.2.0.tgz", + "integrity": "sha512-uXNJlg8hKFEnDgFsrCjznB+sTxdkuqiCL6zMgA75qEbAJjJYTs9XPrvDctrEig2GDow22T/LvHgO57iJhXB/UA==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/jest-matcher-utils": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.3.1.tgz", + "integrity": "sha512-fkRMZUAScup3txIKfMe3AIZZmPEjWEdsPJFK3AIy5qRohWqQFg1qrmKfYXR9qEkNc7OdAu2N4KPHibEmy4HPeQ==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "jest-diff": "^29.3.1", + "jest-get-type": "^29.2.0", + "pretty-format": "^29.3.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/jest-message-util": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.3.1.tgz", + "integrity": "sha512-lMJTbgNcDm5z+6KDxWtqOFWlGQxD6XaYwBqHR8kmpkP+WWWG90I35kdtQHY67Ay5CSuydkTBbJG+tH9JShFCyA==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.3.1", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.3.1", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/jest-util": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.3.1.tgz", + "integrity": "sha512-7YOVZaiX7RJLv76ZfHt4nbNEzzTRiMW/IiOG7ZOKmTXmoGBxUDefgMAxQubu6WPVqP5zSzAdZG0FfLcC7HOIFQ==", + "dev": true, + "dependencies": { + "@jest/types": "^29.3.1", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jest/node_modules/pretty-format": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.3.1.tgz", + "integrity": "sha512-FyLnmb1cYJV8biEIiRyzRFvs2lry7PPIvOqKVe1GCUEYg4YGmlx1qG9EJNMxArYm7piII4qb8UV1Pncq5dxmcg==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.0.0", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@types/jsdom": { + "version": "20.0.1", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-20.0.1.tgz", + "integrity": "sha512-d0r18sZPmMQr1eG35u12FZfhIXNrnsPU/g5wvRKCUf/tOGilKKwYMYGqh33BNR6ba+2gkHw1EUiHoN3mn7E5IQ==", + "dev": true, + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" + } + }, "node_modules/@types/json-schema": { "version": "7.0.11", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz", @@ -1475,6 +1655,12 @@ "integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==", "dev": true }, + "node_modules/@types/tough-cookie": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.2.tgz", + "integrity": "sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==", + "dev": true + }, "node_modules/@types/unist": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.6.tgz", @@ -8489,6 +8675,148 @@ "requires": { "expect": "^29.0.0", "pretty-format": "^29.0.0" + }, + "dependencies": { + "@jest/expect-utils": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.3.1.tgz", + "integrity": "sha512-wlrznINZI5sMjwvUoLVk617ll/UYfGIZNxmbU+Pa7wmkL4vYzhV9R2pwVqUh4NWWuLQWkI8+8mOkxs//prKQ3g==", + "dev": true, + "requires": { + "jest-get-type": "^29.2.0" + } + }, + "@jest/schemas": { + "version": "29.0.0", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.0.0.tgz", + "integrity": "sha512-3Ab5HgYIIAnS0HjqJHQYZS+zXc4tUmTmBH3z83ajI6afXp8X3ZtdLX+nXx+I7LNkJD7uN9LAVhgnjDgZa2z0kA==", + "dev": true, + "requires": { + "@sinclair/typebox": "^0.24.1" + } + }, + "@jest/types": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.3.1.tgz", + "integrity": "sha512-d0S0jmmTpjnhCmNpApgX3jrUZgZ22ivKJRvL2lli5hpCRoNnp1f85r2/wpKfXuYu8E7Jjh1hGfhPyup1NM5AmA==", + "dev": true, + "requires": { + "@jest/schemas": "^29.0.0", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + } + }, + "ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true + }, + "diff-sequences": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.3.1.tgz", + "integrity": "sha512-hlM3QR272NXCi4pq+N4Kok4kOp6EsgOM3ZSpJI7Da3UAs+Ttsi8MRmB6trM/lhyzUxGfOgnpkHtgqm5Q/CTcfQ==", + "dev": true + }, + "expect": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/expect/-/expect-29.3.1.tgz", + "integrity": "sha512-gGb1yTgU30Q0O/tQq+z30KBWv24ApkMgFUpvKBkyLUBL68Wv8dHdJxTBZFl/iT8K/bqDHvUYRH6IIN3rToopPA==", + "dev": true, + "requires": { + "@jest/expect-utils": "^29.3.1", + "jest-get-type": "^29.2.0", + "jest-matcher-utils": "^29.3.1", + "jest-message-util": "^29.3.1", + "jest-util": "^29.3.1" + } + }, + "jest-diff": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.3.1.tgz", + "integrity": "sha512-vU8vyiO7568tmin2lA3r2DP8oRvzhvRcD4DjpXc6uGveQodyk7CKLhQlCSiwgx3g0pFaE88/KLZ0yaTWMc4Uiw==", + "dev": true, + "requires": { + "chalk": "^4.0.0", + "diff-sequences": "^29.3.1", + "jest-get-type": "^29.2.0", + "pretty-format": "^29.3.1" + } + }, + "jest-get-type": { + "version": "29.2.0", + "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.2.0.tgz", + "integrity": "sha512-uXNJlg8hKFEnDgFsrCjznB+sTxdkuqiCL6zMgA75qEbAJjJYTs9XPrvDctrEig2GDow22T/LvHgO57iJhXB/UA==", + "dev": true + }, + "jest-matcher-utils": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.3.1.tgz", + "integrity": "sha512-fkRMZUAScup3txIKfMe3AIZZmPEjWEdsPJFK3AIy5qRohWqQFg1qrmKfYXR9qEkNc7OdAu2N4KPHibEmy4HPeQ==", + "dev": true, + "requires": { + "chalk": "^4.0.0", + "jest-diff": "^29.3.1", + "jest-get-type": "^29.2.0", + "pretty-format": "^29.3.1" + } + }, + "jest-message-util": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.3.1.tgz", + "integrity": "sha512-lMJTbgNcDm5z+6KDxWtqOFWlGQxD6XaYwBqHR8kmpkP+WWWG90I35kdtQHY67Ay5CSuydkTBbJG+tH9JShFCyA==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.3.1", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.3.1", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + } + }, + "jest-util": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.3.1.tgz", + "integrity": "sha512-7YOVZaiX7RJLv76ZfHt4nbNEzzTRiMW/IiOG7ZOKmTXmoGBxUDefgMAxQubu6WPVqP5zSzAdZG0FfLcC7HOIFQ==", + "dev": true, + "requires": { + "@jest/types": "^29.3.1", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + } + }, + "pretty-format": { + "version": "29.3.1", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.3.1.tgz", + "integrity": "sha512-FyLnmb1cYJV8biEIiRyzRFvs2lry7PPIvOqKVe1GCUEYg4YGmlx1qG9EJNMxArYm7piII4qb8UV1Pncq5dxmcg==", + "dev": true, + "requires": { + "@jest/schemas": "^29.0.0", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + } + } + } + }, + "@types/jsdom": { + "version": "20.0.1", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-20.0.1.tgz", + "integrity": "sha512-d0r18sZPmMQr1eG35u12FZfhIXNrnsPU/g5wvRKCUf/tOGilKKwYMYGqh33BNR6ba+2gkHw1EUiHoN3mn7E5IQ==", + "dev": true, + "requires": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" } }, "@types/json-schema": { @@ -8536,6 +8864,12 @@ "integrity": "sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==", "dev": true }, + "@types/tough-cookie": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.2.tgz", + "integrity": "sha512-Q5vtl1W5ue16D+nIaW8JWebSSraJVlK+EthKn7e7UcD4KWsaSJ8BqGPXNaPghgtcn/fhvrN17Tv8ksUsQpiplw==", + "dev": true + }, "@types/unist": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.6.tgz", diff --git a/package.json b/package.json index a7db4d83c3..899e0712ad 100644 --- a/package.json +++ b/package.json @@ -71,6 +71,7 @@ "@octokit/graphql": "^5.0.4", "@types/benchmark": "^2.1.2", "@types/jest": "^29.2.3", + "@types/jsdom": "^20.0.1", "@types/node": "^18.11.9", "@types/whatwg-mimetype": "^3.0.0", "@typescript-eslint/eslint-plugin": "^5.44.0", From 6e7dbf4197adfaf276e2425315c7ca98bd982dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:26:18 +0000 Subject: [PATCH 16/18] Update suite.ts --- benchmark/suite.ts | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/benchmark/suite.ts b/benchmark/suite.ts index 31d3c0421b..e66e6298a6 100644 --- a/benchmark/suite.ts +++ b/benchmark/suite.ts @@ -1,9 +1,9 @@ -import fs from 'fs'; -import path from 'path'; +import fs from 'node:fs'; +import path from 'node:path'; +import { Script } from 'node:vm'; import { Suite, Event } from 'benchmark'; import { JSDOM } from 'jsdom'; -import { Script } from 'vm'; import cheerio from '../lib/index.js'; const documentDir = path.join(__dirname, 'documents'); @@ -73,11 +73,9 @@ export default class Suites { jQueryScript.runInContext(dom.getInternalVMContext()); - const setupData: T = options.setup(dom.window.$); + const setupData: T = options.setup(dom.window['$']); - suite.add('jsdom', () => { - testFn(dom.window.$, setupData); - }); + suite.add('jsdom', () => testFn(dom.window['$'], setupData)); suite.run(); } From 526f1a527ec40fbac223ac32af37e655c732550a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:56:31 +0000 Subject: [PATCH 17/18] Update eslint-plugin-node to -n --- src/node.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node.ts b/src/node.ts index ad458a0d96..c6154da815 100644 --- a/src/node.ts +++ b/src/node.ts @@ -1,6 +1,6 @@ export * from './index.js'; -/* eslint-disable node/no-unsupported-features/node-builtins, node/file-extension-in-import */ +/* eslint-disable n/no-unsupported-features/node-builtins, n/file-extension-in-import */ import type { CheerioAPI, CheerioOptions } from './index.js'; import { load } from './index.js'; From ce9d283fd5ffa421e6f542b4d1093588289348c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 22 Nov 2022 17:40:03 +0000 Subject: [PATCH 18/18] Split comment --- src/node.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/node.ts b/src/node.ts index c6154da815..b7f0c98f74 100644 --- a/src/node.ts +++ b/src/node.ts @@ -1,12 +1,13 @@ export * from './index.js'; -/* eslint-disable n/no-unsupported-features/node-builtins, n/file-extension-in-import */ +/* eslint-disable n/no-unsupported-features/node-builtins */ import type { CheerioAPI, CheerioOptions } from './index.js'; import { load } from './index.js'; import { flatten as flattenOptions, InternalOptions } from './options.js'; import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter'; +// eslint-disable-next-line n/file-extension-in-import import { WritableStream as Htmlparser2Stream } from 'htmlparser2/lib/WritableStream'; import DomHandler from 'domhandler'; import { ParserStream as Parse5Stream } from 'parse5-parser-stream';