Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(options): Revamp option handling #2916

Merged
merged 4 commits into from
Dec 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/api/manipulation.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1974,7 +1974,7 @@ describe('$(...)', () => {
});

it('() : should preserve parsing options', () => {
const $ = load('<div>π</div>', { decodeEntities: false });
const $ = load('<div>π</div>', { xml: { decodeEntities: false } });
const $div = $('div');

expect($div.text()).toBe($div.clone().text());
Expand Down
2 changes: 1 addition & 1 deletion src/batteries.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ describe('stringStream', () => {
});

it('should use htmlparser2 for XML', (cb) => {
const stream = cheerio.stringStream({ xmlMode: true }, (err, $) => {
const stream = cheerio.stringStream({ xml: true }, (err, $) => {
expect(err).toBeNull();

expect($.html()).toBe(TEST_HTML);
Expand Down
18 changes: 12 additions & 6 deletions src/batteries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export * from './index.js';

import type { CheerioAPI, CheerioOptions } from './index.js';
import { load } from './index.js';
import { flatten as flattenOptions, type InternalOptions } from './options.js';
import { flattenOptions, type InternalOptions } from './options.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';

// eslint-disable-next-line n/file-extension-in-import
Expand Down Expand Up @@ -58,7 +58,7 @@ function _stringStream(
options: InternalOptions | undefined,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable {
if (options && (options.xmlMode || options._useHtmlParser2)) {
if (options?._useHtmlParser2) {
const handler: DomHandler = new DomHandler(
(err) => cb(err, load(handler.root)),
options
Expand All @@ -67,10 +67,14 @@ function _stringStream(
return new Htmlparser2Stream(handler, options);
}

const stream = new Parse5Stream({
...options,
treeAdapter: htmlparser2Adapter,
});
options ??= {};
options.treeAdapter ??= htmlparser2Adapter;

if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}

const stream = new Parse5Stream(options);

finished(stream, (err) => cb(err, load(stream.document)));

Expand Down Expand Up @@ -102,6 +106,7 @@ function _stringStream(
* writeStream
* );
* ```
*
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
Expand Down Expand Up @@ -176,6 +181,7 @@ const defaultRequestOptions: UndiciStreamOptions = {
*
* const $ = await cheerio.fromURL('https://example.com');
* ```
*
* @param url - The URL to load the document from.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
Expand Down
29 changes: 12 additions & 17 deletions src/cheerio.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,8 @@ describe('cheerio', () => {
describe('parse5 options', () => {
// Should parse noscript tags only with false option value
test('{scriptingEnabled: ???}', () => {
const opt = 'scriptingEnabled';
const options: CheerioOptions = {};
let result;
let result: Cheerio<Element>;

// [default] scriptingEnabled: true - tag contains one text element
result = cheerio.load(noscript)('noscript');
Expand All @@ -463,7 +462,7 @@ describe('cheerio', () => {
expect(result[0].children[0].type).toBe('text');

// ScriptingEnabled: false - content of noscript will parsed
options[opt] = false;
options.scriptingEnabled = false;
result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0].children).toHaveLength(2);
Expand All @@ -474,7 +473,7 @@ describe('cheerio', () => {
// ScriptingEnabled: ??? - should acts as true
const values = [undefined, null, 0, ''];
for (const val of values) {
options[opt] = val as any;
options.scriptingEnabled = val as any;
result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0].children).toHaveLength(1);
Expand All @@ -484,29 +483,25 @@ describe('cheerio', () => {

// Should contain location data only with truthful option value
test('{sourceCodeLocationInfo: ???}', () => {
const prop = 'sourceCodeLocation';
const opt = 'sourceCodeLocationInfo';
const options: CheerioOptions = {};
let result;
let i;

// Location data should not be present
let values = [undefined, null, 0, false, ''];
for (i = 0; i < values.length; i++) {
options[opt] = values[i] as any;
result = cheerio.load(noscript, options)('noscript');
for (let i = 0; i < values.length; i++) {
options.sourceCodeLocationInfo = values[i] as any;
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).not.toHaveProperty(prop);
expect(result[0]).not.toHaveProperty('sourceCodeLocation');
}

// Location data should be present
values = [true, 1, 'test'];
for (i = 0; i < values.length; i++) {
options[opt] = values[i] as any;
result = cheerio.load(noscript, options)('noscript');
for (let i = 0; i < values.length; i++) {
options.sourceCodeLocationInfo = values[i] as any;
const result = cheerio.load(noscript, options)('noscript');
expect(result).toHaveLength(1);
expect(result[0]).toHaveProperty(prop);
expect(typeof (result[0] as any)[prop]).toBe('object');
expect(result[0]).toHaveProperty('sourceCodeLocation');
expect(typeof (result[0] as any)['sourceCodeLocation']).toBe('object');
}
});
});
Expand Down
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';

const parse = getParse((content, options, isDocument, context) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context)
);
Expand All @@ -52,7 +52,7 @@ const parse = getParse((content, options, isDocument, context) =>
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load = getLoad(parse, (dom, options) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom)
);
Expand Down
10 changes: 3 additions & 7 deletions src/load.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import {
type CheerioOptions,
type InternalOptions,
default as defaultOptions,
flatten as flattenOptions,
flattenOptions,
} from './options.js';
import * as staticMethods from './static.js';
import { Cheerio } from './cheerio.js';
Expand Down Expand Up @@ -114,7 +113,7 @@ export function getLoad(
throw new Error('cheerio.load() expects a string');
}

const internalOpts = { ...defaultOptions, ...flattenOptions(options) };
const internalOpts = flattenOptions(options);
const initialRoot = parse(content, internalOpts, isDocument, null);

/**
Expand Down Expand Up @@ -157,10 +156,7 @@ export function getLoad(
// $($)
if (selector && isCheerio<Result>(selector)) return selector;

const options = {
...internalOpts,
...flattenOptions(opts),
};
const options = flattenOptions(opts, internalOpts);
const r =
typeof root === 'string'
? [parse(root, options, false, null)]
Expand Down
72 changes: 49 additions & 23 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,25 @@ export type Parse5Options = Parse5ParserOptions<Htmlparser2TreeAdapterMap>;
* Please note that parser-specific options are _only recognized_ if the
* relevant parser is used.
*/
export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
/** Recommended way of configuring htmlparser2 when wanting to parse XML. */
export interface CheerioOptions extends Parse5Options {
/**
* Recommended way of configuring htmlparser2 when wanting to parse XML.
*
* This will switch Cheerio to use htmlparser2.
*
* @default false
*/
xml?: HTMLParser2Options | boolean;

/** The base URI for the document. Used for the `href` and `src` props. */
/**
* Enable xml mode, which will switch Cheerio to use htmlparser2.
*
* @deprecated Please use the `xml` option instead.
* @default false
*/
xmlMode?: boolean;

/** The base URI for the document. Used to resolve the `href` and `src` props. */
baseURI?: string | URL; // eslint-disable-line n/no-unsupported-features/node-builtins

/**
Expand Down Expand Up @@ -70,7 +84,9 @@ export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
}

/** Internal options for Cheerio. */
export interface InternalOptions extends Omit<CheerioOptions, 'xml'> {
export interface InternalOptions
extends HTMLParser2Options,
Omit<CheerioOptions, 'xml'> {
/**
* Whether to use htmlparser2.
*
Expand All @@ -79,17 +95,8 @@ export interface InternalOptions extends Omit<CheerioOptions, 'xml'> {
_useHtmlParser2?: boolean;
}

const defaultOpts: CheerioOptions = {
xml: false,
decodeEntities: true,
};

/** Cheerio default options. */
export default defaultOpts;

const xmlModeDefault: InternalOptions = {
_useHtmlParser2: true,
xmlMode: true,
const defaultOpts: InternalOptions = {
_useHtmlParser2: false,
};

/**
Expand All @@ -98,14 +105,33 @@ const xmlModeDefault: InternalOptions = {
* This will set `_useHtmlParser2` to true if `xml` is set to true.
*
* @param options - The options to flatten.
* @param baseOptions - The base options to use.
* @returns The flattened options.
*/
export function flatten(
options?: CheerioOptions | null
): InternalOptions | undefined {
return options?.xml
? typeof options.xml === 'boolean'
? xmlModeDefault
: { ...xmlModeDefault, ...options.xml }
: options ?? undefined;
export function flattenOptions(
options?: CheerioOptions | null,
baseOptions?: InternalOptions
): InternalOptions {
if (!options) {
return baseOptions ?? defaultOpts;
}

const opts: InternalOptions = {
_useHtmlParser2: !!options.xmlMode,
...baseOptions,
...options,
};

if (options.xml) {
opts._useHtmlParser2 = true;
opts.xmlMode = true;

if (options.xml !== true) {
Object.assign(opts, options.xml);
}
} else if (options.xmlMode) {
opts._useHtmlParser2 = true;
}

return opts;
}
5 changes: 3 additions & 2 deletions src/parse.spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import type { Document, Element } from 'domhandler';
import { getParse } from './parse.js';
import defaultOpts from './options.js';

import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import { parseWithParse5 } from './parsers/parse5-adapter.js';

const defaultOpts = { _useHtmlParser2: false };

const parse = getParse((content, options, isDocument, context) =>
options.xmlMode || options._useHtmlParser2
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context)
);
Expand Down
17 changes: 7 additions & 10 deletions src/parsers/parse5-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,15 @@ export function parseWithParse5(
isDocument: boolean,
context: ParentNode | null
): Document {
const opts = {
scriptingEnabled:
typeof options.scriptingEnabled === 'boolean'
? options.scriptingEnabled
: true,
treeAdapter: htmlparser2Adapter,
sourceCodeLocationInfo: options.sourceCodeLocationInfo,
};
options.treeAdapter ??= htmlparser2Adapter;

if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}

return isDocument
? parseDocument(content, opts)
: parseFragment(context, content, opts);
? parseDocument(content, options)
: parseFragment(context, content, options);
}

const renderOpts = { treeAdapter: htmlparser2Adapter };
Expand Down
8 changes: 3 additions & 5 deletions src/static.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import { textContent } from 'domutils';
import {
type InternalOptions,
type CheerioOptions,
default as defaultOptions,
flatten as flattenOptions,
flattenOptions as flattenOptions,
} from './options.js';
import type { ExtractedMap, ExtractMap } from './api/extract.js';

Expand Down Expand Up @@ -85,9 +84,8 @@ export function html(
* so fallback non-existing options to the default ones.
*/
const opts = {
...defaultOptions,
...this?._options,
...flattenOptions(options ?? {}),
...flattenOptions(options),
};

return render(this, toRender, opts);
Expand Down Expand Up @@ -166,7 +164,7 @@ export function parseHTML(
keepScripts = context;
}

const parsed = this.load(data, defaultOptions, false);
const parsed = this.load(data, this._options, false);
if (!keepScripts) {
parsed('script').remove();
}
Expand Down