diff --git a/.eslintrc.json b/.eslintrc.json index e86d7b0dfe..14a389eda1 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,16 +1,12 @@ { - "env": { - "node": true - }, "plugins": ["jsdoc"], "extends": [ "eslint:recommended", "plugin:jsdoc/recommended", - "plugin:jest/recommended", "plugin:node/recommended", "prettier" ], - "globals": { "Set": true, "Symbol": true }, + "env": { "node": true }, "rules": { "array-callback-return": [ 2, @@ -18,34 +14,38 @@ "allowImplicit": true } ], - "block-scoped-var": 2, - "eqeqeq": [ - 2, - "always", - { - "null": "ignore" - } - ], "no-lonely-if": 2, "no-proto": 2, - "curly": [2, "multi-line"], + "eqeqeq": [2, "smart"], + "no-caller": 2, + "dot-notation": 2, + "no-var": 2, + "prefer-const": 2, + "prefer-arrow-callback": [2, { "allowNamedFunctions": true }], + "arrow-body-style": [2, "as-needed"], + "object-shorthand": 2, + "prefer-template": 2, "one-var": [2, "never"], + "prefer-destructuring": [2, { "object": true }], + "capitalized-comments": 2, + "multiline-comment-style": [2, "starred-block"], + "spaced-comment": 2, + "yoda": [2, "never"], + "curly": [2, "multi-line"], + "no-else-return": [ 2, { "allowElseIf": false } ], - "no-shadow": 2, "no-unused-expressions": 2, "no-useless-call": 2, "no-use-before-define": [2, "nofunc"], "no-void": 2, - "yoda": 2, - "strict": 2, "jsdoc/require-jsdoc": 0, - "jsdoc/check-param-names": 2, + "jsdoc/check-param-names": 0, "jsdoc/check-tag-names": 2, "jsdoc/check-types": 2, "jsdoc/newline-after-description": 2, @@ -53,24 +53,61 @@ "jsdoc/require-hyphen-before-param-description": 2, "jsdoc/require-param-description": 2, "jsdoc/require-param-name": 2, - "jsdoc/require-param-type": 2, - "jsdoc/require-param": 2, + "jsdoc/require-param-type": 0, + "jsdoc/require-returns-type": 0, + "jsdoc/require-param": 0, + "jsdoc/no-types": 2, "jsdoc/valid-types": 2, - "node/no-unsupported-features/es-builtins": 0, // TODO - "node/shebang": 0 + "node/no-unsupported-features/es-syntax": 0, + "node/no-missing-import": [2, { "tryExtensions": [".js", ".json", ".ts"] }] }, "settings": { "jsdoc": { - "additionalTagNames": { - "customTags": ["hideconstructor"] + "mode": "typescript", + "tagNamePreference": { + "category": "category" + } + } + }, + "overrides": [ + { + "files": "*.ts", + "extends": [ + "plugin:@typescript-eslint/eslint-recommended", + "plugin:@typescript-eslint/recommended", + "prettier" + ], + "parserOptions": { + "sourceType": "module", + "project": "./tsconfig.eslint.json" }, - "preferredTypes": { - "node": "Node", - "nodewithchildren": "NodeWithChildren", - "element": "Element", - "cheerio": "Cheerio" + "rules": { + "@typescript-eslint/prefer-for-of": 0, + "@typescript-eslint/member-ordering": 0, + "@typescript-eslint/explicit-function-return-type": 0, + "@typescript-eslint/no-unused-vars": 0, + "@typescript-eslint/no-use-before-define": [2, { "functions": false }], + "@typescript-eslint/consistent-type-definitions": [2, "interface"], + "@typescript-eslint/prefer-function-type": 2, + "@typescript-eslint/no-unnecessary-type-arguments": 2, + "@typescript-eslint/prefer-string-starts-ends-with": 2, + "@typescript-eslint/prefer-readonly": 2, + "@typescript-eslint/prefer-includes": 2, + "@typescript-eslint/no-unnecessary-condition": 0, // TODO + "@typescript-eslint/switch-exhaustiveness-check": 2, + "@typescript-eslint/prefer-nullish-coalescing": 2, + + "@typescript-eslint/no-explicit-any": 1 // TODO + } + }, + { + "files": "*.spec.ts", + "extends": "plugin:jest/recommended", + "rules": { + "@typescript-eslint/no-explicit-any": 0, + "@typescript-eslint/ban-ts-comment": 0 } } - } + ] } diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 298bd6b7bf..112575952a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -37,6 +37,3 @@ jobs: - name: Run lint run: npm run lint - - - name: Test types - run: npm run test:types diff --git a/.gitignore b/.gitignore index 6e515e48c5..1df9a8fd75 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,6 @@ node_modules npm-debug.log .DS_Store /.netlify/ -/coverage/ -/docs/ +/coverage +/docs +/lib diff --git a/Readme.md b/Readme.md index 9c53527e6c..ebe7d697ee 100644 --- a/Readme.md +++ b/Readme.md @@ -9,9 +9,6 @@ Coverage - - Join the chat at https://gitter.im/cheeriojs/cheerio - OpenCollective backers @@ -79,7 +76,12 @@ First you need to load in the HTML. This step in jQuery is implicit, since jQuer This is the _preferred_ method: ```js +// ES6 or TypeScript: +import * as cheerio from 'cheerio'; + +// In other environments: const cheerio = require('cheerio'); + const $ = cheerio.load(''); $.html(); @@ -131,7 +133,7 @@ The options in the `xml` object are taken directly from [htmlparser2](https://gi } ``` -For a full list of options and their effects, see [this](https://github.com/fb55/DomHandler) and +For a full list of options and their effects, see [domhandler](https://github.com/fb55/DomHandler) and [htmlparser2's options](https://github.com/fb55/htmlparser2/wiki/Parser-options). Some users may wish to parse markup with the `htmlparser2` library, and @@ -149,9 +151,9 @@ structure as its first argument. Users may install `htmlparser2`, use it to parse input, and pass the result to `load`: ```js -// Usage as of htmlparser2 version 3: +// Usage as of htmlparser2 version 6: const htmlparser2 = require('htmlparser2'); -const dom = htmlparser2.parseDOM(document, options); +const dom = htmlparser2.parseDocument(document, options); const $ = cheerio.load(dom); ``` diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js deleted file mode 100755 index 2d614b03bf..0000000000 --- a/benchmark/benchmark.js +++ /dev/null @@ -1,337 +0,0 @@ -#!/usr/bin/env node -'use strict'; - -var Suites = require('./suite'); -var suites = new Suites(); - -var regexIdx = process.argv.indexOf('--regex') + 1; -if (regexIdx > 0) { - if (regexIdx === process.argv.length) { - throw new Error('Error: the "--regex" option requires a value'); - } - suites.filter(process.argv[regexIdx]); -} -if (process.argv.indexOf('--cheerio-only') >= 0) { - suites.cheerioOnly(); -} - -suites.add('Select all', 'jquery.html', { - test: function ($) { - return $('*').length; - }, -}); -suites.add('Select some', 'jquery.html', { - test: function ($) { - return $('li').length; - }, -}); - -/* - * Manipulation Tests - */ -suites.add('manipulation - append', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $body) { - $body.append(new Array(50).join('
')); - }, -}); - -// These tests run out of memory in jsdom -suites.add('manipulation - prepend - highmem', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $body) { - $body.prepend(new Array(50).join('
')); - }, -}); -suites.add('manipulation - after - highmem', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $body) { - $body.after(new Array(50).join('
')); - }, -}); -suites.add('manipulation - before - highmem', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $body) { - $body.before(new Array(50).join('
')); - }, -}); - -suites.add('manipulation - remove', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $lis) { - var child = $('
'); - $lis.append(child); - child.remove(); - }, -}); - -suites.add('manipulation - replaceWith', 'jquery.html', { - setup: function ($) { - $('body').append('
'); - }, - test: function ($) { - $('#foo').replaceWith('
'); - }, -}); - -suites.add('manipulation - empty', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.empty(); - }, -}); -suites.add('manipulation - html', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.html(); - $lis.html('foo'); - }, -}); -suites.add('manipulation - html render', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $lis) { - $lis.html(); - }, -}); -suites.add('manipulation - html independent', 'jquery.html', { - setup: function () { - return ( - '
bat
baz
' + - '
bat
baz
' + - '
bat
baz
' + - '
bat
baz
' + - '
bat
baz
' + - '
bat
baz
' - ); - }, - test: function ($, content) { - $(content).html(); - }, -}); -suites.add('manipulation - text', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.text(); - $lis.text('foo'); - }, -}); - -/* - * Traversing Tests - */ -suites.add('traversing - Find', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.find('li').length; - }, -}); -suites.add('traversing - Parent', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.parent('div').length; - }, -}); -suites.add('traversing - Parents', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.parents('div').length; - }, -}); -suites.add('traversing - Closest', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.closest('div').length; - }, -}); -suites.add('traversing - next', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.next().length; - }, -}); -suites.add('traversing - nextAll', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.nextAll('li').length; - }, -}); -suites.add('traversing - nextUntil', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.nextUntil('li').length; - }, -}); -suites.add('traversing - prev', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.prev().length; - }, -}); -suites.add('traversing - prevAll', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.prevAll('li').length; - }, -}); -suites.add('traversing - prevUntil', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.prevUntil('li').length; - }, -}); -suites.add('traversing - siblings', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.siblings('li').length; - }, -}); -suites.add('traversing - Children', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.children('a').length; - }, -}); -suites.add('traversing - Filter', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.filter('li').length; - }, -}); -suites.add('traversing - First', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.first().first().length; - }, -}); -suites.add('traversing - Last', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.last().last().length; - }, -}); -suites.add('traversing - Eq', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - return $lis.eq(0).eq(0).length; - }, -}); - -/* - * Attributes Tests - */ -suites.add('attributes - Attributes', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.attr('foo', 'bar'); - $lis.attr('foo'); - $lis.removeAttr('foo'); - }, -}); -suites.add('attributes - Single Attribute', 'jquery.html', { - setup: function ($) { - return $('body'); - }, - test: function ($, $lis) { - $lis.attr('foo', 'bar'); - $lis.attr('foo'); - $lis.removeAttr('foo'); - }, -}); -suites.add('attributes - Data', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.data('foo', 'bar'); - $lis.data('foo'); - }, -}); -suites.add('attributes - Val', 'jquery.html', { - setup: function ($) { - return $('select,input,textarea,option'); - }, - test: function ($, $lis) { - $lis.each(function () { - $(this).val(); - $(this).val('foo'); - }); - }, -}); - -suites.add('attributes - Has class', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.hasClass('foo'); - }, -}); -suites.add('attributes - Toggle class', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.toggleClass('foo'); - }, -}); -suites.add('attributes - Add Remove class', 'jquery.html', { - setup: function ($) { - return $('li'); - }, - test: function ($, $lis) { - $lis.addClass('foo'); - $lis.removeClass('foo'); - }, -}); diff --git a/benchmark/benchmark.ts b/benchmark/benchmark.ts new file mode 100755 index 0000000000..45dcbc77df --- /dev/null +++ b/benchmark/benchmark.ts @@ -0,0 +1,347 @@ +import Suites from './suite'; +import type { Cheerio } from '../src/cheerio'; +import type { Element } from 'domhandler'; + +const suites = new Suites(); + +const regexIdx = process.argv.indexOf('--regex') + 1; +if (regexIdx > 0) { + if (regexIdx === process.argv.length) { + throw new Error('Error: the "--regex" option requires a value'); + } + suites.filter(process.argv[regexIdx]); +} +if (process.argv.includes('--cheerio-only')) { + suites.cheerioOnly(); +} + +suites.add('Select all', 'jquery.html', { + setup() { + return; + }, + test($) { + return $('*').length; + }, +}); +suites.add('Select some', 'jquery.html', { + setup() { + return; + }, + test($) { + return $('li').length; + }, +}); + +/* + * Manipulation Tests + */ +suites.add>('manipulation - append', 'jquery.html', { + setup($) { + return $('body'); + }, + test(_, $body) { + $body.append(new Array(50).join('
')); + }, +}); + +// These tests run out of memory in jsdom +suites.add>( + 'manipulation - prepend - highmem', + 'jquery.html', + { + setup($) { + return $('body'); + }, + test(_, $body) { + $body.prepend(new Array(50).join('
')); + }, + } +); +suites.add>('manipulation - after - highmem', 'jquery.html', { + setup($) { + return $('body'); + }, + test(_, $body) { + $body.after(new Array(50).join('
')); + }, +}); +suites.add>('manipulation - before - highmem', 'jquery.html', { + setup($) { + return $('body'); + }, + test(_, $body) { + $body.before(new Array(50).join('
')); + }, +}); + +suites.add>('manipulation - remove', 'jquery.html', { + setup($) { + return $('body'); + }, + test($, $lis) { + const child = $('
'); + $lis.append(child); + child.remove(); + }, +}); + +suites.add('manipulation - replaceWith', 'jquery.html', { + setup($) { + $('body').append('
'); + }, + test($) { + $('#foo').replaceWith('
'); + }, +}); + +suites.add>('manipulation - empty', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.empty(); + }, +}); +suites.add>('manipulation - html', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.html(); + $lis.html('foo'); + }, +}); +suites.add>('manipulation - html render', 'jquery.html', { + setup($) { + return $('body'); + }, + test(_, $lis) { + $lis.html(); + }, +}); +suites.add('manipulation - html independent', 'jquery.html', { + setup() { + return ( + '
bat
baz
' + + '
bat
baz
' + + '
bat
baz
' + + '
bat
baz
' + + '
bat
baz
' + + '
bat
baz
' + ); + }, + test($, content) { + $(content).html(); + }, +}); +suites.add>('manipulation - text', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.text(); + $lis.text('foo'); + }, +}); + +/* + * Traversing Tests + */ +suites.add>('traversing - Find', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.find('li').length; + }, +}); +suites.add>('traversing - Parent', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.parent('div').length; + }, +}); +suites.add>('traversing - Parents', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.parents('div').length; + }, +}); +suites.add>('traversing - Closest', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.closest('div').length; + }, +}); +suites.add>('traversing - next', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.next().length; + }, +}); +suites.add>('traversing - nextAll', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.nextAll('li').length; + }, +}); +suites.add>('traversing - nextUntil', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.nextUntil('li').length; + }, +}); +suites.add>('traversing - prev', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.prev().length; + }, +}); +suites.add>('traversing - prevAll', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.prevAll('li').length; + }, +}); +suites.add>('traversing - prevUntil', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.prevUntil('li').length; + }, +}); +suites.add>('traversing - siblings', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.siblings('li').length; + }, +}); +suites.add>('traversing - Children', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.children('a').length; + }, +}); +suites.add>('traversing - Filter', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.filter('li').length; + }, +}); +suites.add>('traversing - First', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.first().first().length; + }, +}); +suites.add>('traversing - Last', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.last().last().length; + }, +}); +suites.add>('traversing - Eq', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + return $lis.eq(0).eq(0).length; + }, +}); + +/* + * Attributes Tests + */ +suites.add>('attributes - Attributes', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.attr('foo', 'bar'); + $lis.attr('foo'); + $lis.removeAttr('foo'); + }, +}); +suites.add>('attributes - Single Attribute', 'jquery.html', { + setup($) { + return $('body'); + }, + test(_, $lis) { + $lis.attr('foo', 'bar'); + $lis.attr('foo'); + $lis.removeAttr('foo'); + }, +}); +suites.add>('attributes - Data', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.data('foo', 'bar'); + $lis.data('foo'); + }, +}); +suites.add>('attributes - Val', 'jquery.html', { + setup($) { + return $('select,input,textarea,option'); + }, + test($, $lis) { + $lis.each(function () { + $(this).val(); + $(this).val('foo'); + }); + }, +}); + +suites.add>('attributes - Has class', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.hasClass('foo'); + }, +}); +suites.add>('attributes - Toggle class', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.toggleClass('foo'); + }, +}); +suites.add>('attributes - Add Remove class', 'jquery.html', { + setup($) { + return $('li'); + }, + test(_, $lis) { + $lis.addClass('foo'); + $lis.removeClass('foo'); + }, +}); diff --git a/benchmark/suite.js b/benchmark/suite.js deleted file mode 100644 index 6a1aa4004d..0000000000 --- a/benchmark/suite.js +++ /dev/null @@ -1,88 +0,0 @@ -'use strict'; -var fs = require('fs'); -var path = require('path'); - -var Benchmark = require('benchmark'); -var JSDOM = require('jsdom').JSDOM; -var Script = require('vm').Script; -var cheerio = require('..'); - -var documentDir = path.join(__dirname, 'documents'); -var jQuerySrc = fs.readFileSync( - path.join(__dirname, '../node_modules/jquery/dist/jquery.slim.js') -); -var jQueryScript = new Script(jQuerySrc); -var filterRe = /./; -var cheerioOnly = false; - -var Suites = (module.exports = function () {}); - -Suites.prototype.filter = function (str) { - filterRe = new RegExp(str, 'i'); -}; - -Suites.prototype.cheerioOnly = function () { - cheerioOnly = true; -}; - -Suites.prototype.add = function (name, fileName, options) { - if (!filterRe.test(name)) { - return; - } - var markup = fs.readFileSync(path.join(documentDir, fileName), 'utf8'); - var suite = new Benchmark.Suite(name); - - suite.on('start', function () { - console.log('Test: ' + name + ' (file: ' + fileName + ')'); - }); - suite.on('cycle', function (event) { - if (event.target.error) { - return; - } - console.log('\t' + String(event.target)); - }); - suite.on('error', function (event) { - console.log('*** Error in ' + event.target.name + ': ***'); - console.log('\t' + event.target.error); - console.log('*** Test invalidated. ***'); - }); - suite.on('complete', function (event) { - if (event.target.error) { - console.log(); - return; - } - console.log('\tFastest: ' + this.filter('fastest')[0].name + '\n'); - }); - - this._benchCheerio(suite, markup, options); - if (!cheerioOnly) { - this._benchJsDom(suite, markup, options); - } else { - suite.run(); - } -}; - -Suites.prototype._benchJsDom = function (suite, markup, options) { - var testFn = options.test; - - var dom = new JSDOM(markup, { runScripts: 'outside-only' }); - - jQueryScript.runInContext(dom.getInternalVMContext()); - - var setupData = options.setup && options.setup.call(null, dom.window.$); - - suite.add('jsdom', function () { - testFn(dom.window.$, setupData); - }); - suite.run(); -}; - -Suites.prototype._benchCheerio = function (suite, markup, options) { - var $ = cheerio.load(markup); - var testFn = options.test; - var setupData = options.setup && options.setup.call(null, $); - - suite.add('cheerio', function () { - testFn($, setupData); - }); -}; diff --git a/benchmark/suite.ts b/benchmark/suite.ts new file mode 100644 index 0000000000..9d119e85ca --- /dev/null +++ b/benchmark/suite.ts @@ -0,0 +1,97 @@ +import fs from 'fs'; +import path from 'path'; + +import { Suite, Event } from 'benchmark'; +import { JSDOM } from 'jsdom'; +import { Script } from 'vm'; +import cheerio from '../src'; + +const documentDir = path.join(__dirname, 'documents'); +const jQuerySrc = fs.readFileSync( + path.join(__dirname, '../node_modules/jquery/dist/jquery.slim.js'), + 'utf-8' +); +const jQueryScript = new Script(jQuerySrc); +let filterRe = /./; +let cheerioOnly = false; + +interface SuiteOptions { + test($: typeof cheerio, data: T): void; + setup($: typeof cheerio): T; +} + +export default class Suites { + filter(str: string): void { + filterRe = new RegExp(str, 'i'); + } + + cheerioOnly(): void { + cheerioOnly = true; + } + + add(name: string, fileName: string, options: SuiteOptions): void { + if (!filterRe.test(name)) { + return; + } + const markup = fs.readFileSync(path.join(documentDir, fileName), 'utf8'); + const suite = new Suite(name); + + suite.on('start', () => { + console.log(`Test: ${name} (file: ${fileName})`); + }); + suite.on('cycle', (event: Event) => { + if ((event.target as any).error) { + return; + } + console.log(`\t${String(event.target)}`); + }); + suite.on('error', (event: Event) => { + console.log(`*** Error in ${event.target.name}: ***`); + console.log(`\t${(event.target as any).error}`); + console.log('*** Test invalidated. ***'); + }); + suite.on('complete', function (this: Suite, event: Event) { + if ((event.target as any).error) { + console.log(); + return; + } + console.log(`\tFastest: ${(this.filter('fastest') as any)[0].name}\n`); + }); + + this._benchCheerio(suite, markup, options); + if (!cheerioOnly) { + this._benchJsDom(suite, markup, options); + } else { + suite.run(); + } + } + + _benchJsDom(suite: Suite, markup: string, options: SuiteOptions): void { + const testFn = options.test; + + const dom = new JSDOM(markup, { runScripts: 'outside-only' }); + + jQueryScript.runInContext(dom.getInternalVMContext()); + + const setupData: T = options.setup(dom.window.$); + + suite.add('jsdom', () => { + testFn(dom.window.$, setupData); + }); + suite.run(); + } + + _benchCheerio( + suite: Suite, + markup: string, + options: SuiteOptions + ): void { + const $ = cheerio.load(markup); + const testFn = options.test; + const setupData: T = options.setup($); + + suite.add('cheerio', () => { + testFn($, setupData); + }); + } +} diff --git a/index.js b/index.js deleted file mode 100644 index 33882be68a..0000000000 --- a/index.js +++ /dev/null @@ -1,79 +0,0 @@ -'use strict'; -/** - * @module cheerio - * @borrows load.load as load - * @borrows static.html as html - * @borrows static.text as text - * @borrows static.xml as xml - */ -exports = module.exports = require('./lib/cheerio'); - -var staticMethods = require('./lib/static'); -var loadMethod = require('./lib/load'); - -/** - * An identifier describing the version of Cheerio which has been executed. - * - * @type {string} - */ -exports.version = require('./package.json').version; - -exports.load = loadMethod.load; -exports.html = staticMethods.html; -exports.text = staticMethods.text; -exports.xml = staticMethods.xml; - -/** - * In order to promote consistency with the jQuery library, users are encouraged - * to instead use the static method of the same name. - * - * @deprecated - * @example - * var $ = cheerio.load('

'); - * $.contains($('div').get(0), $('p').get(0)); // true - * $.contains($('p').get(0), $('div').get(0)); // false - * - * @function - * @returns {boolean} - */ -exports.contains = staticMethods.contains; - -/** - * In order to promote consistency with the jQuery library, users are encouraged - * to instead use the static method of the same name. - * - * @deprecated - * @example - * var $ = cheerio.load(''); - * $.merge([1, 2], [3, 4]); // [1, 2, 3, 4] - * - * @function - */ -exports.merge = staticMethods.merge; - -/** - * In order to promote consistency with the jQuery library, users are encouraged - * to instead use the static method of the same name as it is defined on the - * "loaded" Cheerio factory function. - * - * @deprecated See {@link static/parseHTML}. - * @example - * var $ = cheerio.load(''); - * $.parseHTML('markup'); - * - * @function - */ -exports.parseHTML = staticMethods.parseHTML; - -/** - * Users seeking to access the top-level element of a parsed document should - * instead use the `root` static method of a "loaded" Cheerio function. - * - * @deprecated - * @example - * var $ = cheerio.load(''); - * $.root(); - * - * @function - */ -exports.root = staticMethods.root; diff --git a/lib/api/attributes.js b/lib/api/attributes.js deleted file mode 100644 index 46c30741da..0000000000 --- a/lib/api/attributes.js +++ /dev/null @@ -1,691 +0,0 @@ -'use strict'; -/** - * Methods for getting and modifying attributes. - * - * @module cheerio/attributes - */ - -var text = require('../static').text; -var utils = require('../utils'); -var isTag = utils.isTag; -var domEach = utils.domEach; -var hasOwn = Object.prototype.hasOwnProperty; -var camelCase = utils.camelCase; -var cssCase = utils.cssCase; -var rspace = /\s+/; -var dataAttrPrefix = 'data-'; -// Lookup table for coercing string data-* attributes to their corresponding -// JavaScript primitives -var primitives = { - null: null, - true: true, - false: false, -}; -// Attributes that are booleans -var rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i; -// Matches strings that look like JSON objects or arrays -var rbrace = /^(?:{[\w\W]*}|\[[\w\W]*])$/; - -/** - * Gets a node's attribute. For boolean attributes, it will return the value's - * name should it be set. - * - * Also supports getting the `value` of several form elements. - * - * @private - * @param {Element} elem - Elenent to get the attribute of. - * @param {string} name - Name of the attribute. - * @returns {object | string | undefined} The attribute's value. - */ -function getAttr(elem, name) { - if (!elem || !isTag(elem)) return; - - if (!elem.attribs) { - elem.attribs = {}; - } - - // Return the entire attribs object if no attribute specified - if (!name) { - return elem.attribs; - } - - if (hasOwn.call(elem.attribs, name)) { - // Get the (decoded) attribute - return rboolean.test(name) ? name : elem.attribs[name]; - } - - // Mimic the DOM and return text content as value for `option's` - if (elem.name === 'option' && name === 'value') { - return text(elem.children); - } - - // Mimic DOM with default value for radios/checkboxes - if ( - elem.name === 'input' && - (elem.attribs.type === 'radio' || elem.attribs.type === 'checkbox') && - name === 'value' - ) { - return 'on'; - } -} - -/** - * Sets the value of an attribute. The attribute will be deleted if the value is `null`. - * - * @private - * @param {Element} el - The element to set the attribute on. - * @param {string} name - The attribute's name. - * @param {string | null} value - The attribute's value. - */ -function setAttr(el, name, value) { - if (value === null) { - removeAttribute(el, name); - } else { - el.attribs[name] = value + ''; - } -} - -/** - * Method for getting and setting attributes. Gets the attribute value for only - * the first element in the matched set. If you set an attribute's value to - * `null`, you remove that attribute. You may also pass a `map` and `function` - * like jQuery. - * - * @example - * $('ul').attr('id'); - * //=> fruits - * - * $('.apple').attr('id', 'favorite').html(); - * //=>
  • Apple
  • - * - * @param {string} name - Name of the attribute. - * @param {string | Function} [value] - If specified sets the value of the attribute. - * @returns {string | Cheerio} If `value` is specified the instance itself, - * otherwise the attribute's value. - * @see {@link https://api.jquery.com/attr/} - */ -exports.attr = function (name, value) { - // Set the value (with attr map support) - if (typeof name === 'object' || value !== undefined) { - if (typeof value === 'function') { - return domEach(this, function (i, el) { - setAttr(el, name, value.call(el, i, el.attribs[name])); - }); - } - return domEach(this, function (i, el) { - if (!isTag(el)) return; - - if (typeof name === 'object') { - Object.keys(name).forEach(function (objName) { - var objValue = name[objName]; - setAttr(el, objName, objValue); - }); - } else { - setAttr(el, name, value); - } - }); - } - - return arguments.length > 1 ? this : getAttr(this[0], name); -}; - -/** - * Gets a node's prop. - * - * @private - * @param {Node} el - Elenent to get the prop of. - * @param {string} name - Name of the prop. - * @returns {string | undefined} The prop's value. - */ -function getProp(el, name) { - if (!el || !isTag(el)) return; - - return name in el - ? el[name] - : rboolean.test(name) - ? getAttr(el, name) !== undefined - : getAttr(el, name); -} - -/** - * Sets the value of a prop. - * - * @private - * @param {Element} el - The element to set the prop on. - * @param {string} name - The prop's name. - * @param {string | null} value - The prop's value. - */ -function setProp(el, name, value) { - if (name in el) { - el[name] = value; - } else { - setAttr(el, name, rboolean.test(name) ? (value ? '' : null) : value); - } -} - -/** - * Method for getting and setting properties. Gets the property value for only - * the first element in the matched set. - * - * @example - * $('input[type="checkbox"]').prop('checked'); - * //=> false - * - * $('input[type="checkbox"]').prop('checked', true).val(); - * //=> ok - * - * @param {string} name - Name of the property. - * @param {any} [value] - If specified set the property to this. - * @returns {string | Cheerio} If `value` is specified the instance itself, - * otherwise the prop's value. - * @see {@link https://api.jquery.com/prop/} - */ -exports.prop = function (name, value) { - if (typeof name === 'string' && value === undefined) { - switch (name) { - case 'style': { - var property = this.css(); - var keys = Object.keys(property); - keys.forEach(function (p, i) { - property[i] = p; - }); - - property.length = keys.length; - - return property; - } - case 'tagName': - case 'nodeName': - return this[0].name.toUpperCase(); - - case 'outerHTML': - return this.clone().wrap('').parent().html(); - - case 'innerHTML': - return this.html(); - - default: - return getProp(this[0], name); - } - } - - if (typeof name === 'object' || value !== undefined) { - if (typeof value === 'function') { - return domEach(this, function (j, el) { - setProp(el, name, value.call(el, j, getProp(el, name))); - }); - } - - return domEach(this, function (__, el) { - if (!isTag(el)) return; - - if (typeof name === 'object') { - Object.keys(name).forEach(function (key) { - var val = name[key]; - setProp(el, key, val); - }); - } else { - setProp(el, name, value); - } - }); - } -}; - -/** - * Sets the value of a data attribute. - * - * @private - * @param {Element} el - The element to set the data attribute on. - * @param {string | object} name - The data attribute's name. - * @param {string | null} value - The data attribute's value. - */ -function setData(el, name, value) { - if (!el.data) { - el.data = {}; - } - - if (typeof name === 'object') Object.assign(el.data, name); - else if (typeof name === 'string' && value !== undefined) { - el.data[name] = value; - } -} - -/** - * Read the specified attribute from the equivalent HTML5 `data-*` attribute, - * and (if present) cache the value in the node's internal data store. If no - * attribute name is specified, read *all* HTML5 `data-*` attributes in this manner. - * - * @private - * @param {Element} el - Elenent to get the data attribute of. - * @param {string} [name] - Name of the data attribute. - * @returns {any} The data attribute's value, or a map with all of the data attribute. - */ -function readData(el, name) { - var readAll = arguments.length === 1; - var domNames; - var jsNames; - var value; - - if (readAll) { - domNames = Object.keys(el.attribs).filter(function (attrName) { - return attrName.slice(0, dataAttrPrefix.length) === dataAttrPrefix; - }); - jsNames = domNames.map(function (_domName) { - return camelCase(_domName.slice(dataAttrPrefix.length)); - }); - } else { - domNames = [dataAttrPrefix + cssCase(name)]; - jsNames = [name]; - } - - for (var idx = 0; idx < domNames.length; ++idx) { - var domName = domNames[idx]; - var jsName = jsNames[idx]; - if (hasOwn.call(el.attribs, domName) && !hasOwn.call(el.data, jsName)) { - value = el.attribs[domName]; - - if (hasOwn.call(primitives, value)) { - value = primitives[value]; - } else if (value === String(Number(value))) { - value = Number(value); - } else if (rbrace.test(value)) { - try { - value = JSON.parse(value); - } catch (e) { - /* ignore */ - } - } - - el.data[jsName] = value; - } - } - - return readAll ? el.data : value; -} - -/** - * Method for getting and setting data attributes. Gets or sets the data - * attribute value for only the first element in the matched set. - * - * @example - * $('
    ').data(); - * //=> { appleColor: 'red' } - * - * $('
    ').data('apple-color'); - * //=> 'red' - * - * const apple = $('.apple').data('kind', 'mac'); - * apple.data('kind'); - * //=> 'mac' - * - * @param {string} name - Name of the attribute. - * @param {any} [value] - If specified new value. - * @returns {string | Cheerio | undefined} If `value` is specified the instance - * itself, otherwise the data attribute's value. - * @see {@link https://api.jquery.com/data/} - */ -exports.data = function (name, value) { - var elem = this[0]; - - if (!elem || !isTag(elem)) return; - - if (!elem.data) { - elem.data = {}; - } - - // Return the entire data object if no data specified - if (!name) { - return readData(elem); - } - - // Set the value (with attr map support) - if (typeof name === 'object' || value !== undefined) { - domEach(this, function (i, el) { - setData(el, name, value); - }); - return this; - } - if (hasOwn.call(elem.data, name)) { - return elem.data[name]; - } - - return readData(elem, name); -}; - -/** - * Method for getting and setting the value of input, select, and textarea. - * Note: Support for `map`, and `function` has not been added yet. - * - * @example - * $('input[type="text"]').val(); - * //=> input_text - * - * $('input[type="text"]').val('test').html(); - * //=> - * - * @param {string | string[]} [value] - If specified new value. - * @returns {string | Cheerio | undefined} If a new `value` is specified the - * instance itself, otherwise the value. - * @see {@link https://api.jquery.com/val/} - */ -exports.val = function (value) { - var querying = arguments.length === 0; - var element = this[0]; - - if (!element) return; - - switch (element.name) { - case 'textarea': - return this.text(value); - case 'select': { - var option = this.find('option:selected'); - if (!option) return; - if (!querying) { - if (this.attr('multiple') == null && typeof value === 'object') { - return this; - } - if (typeof value !== 'object') { - value = [value]; - } - this.find('option').removeAttr('selected'); - for (var i = 0; i < value.length; i++) { - this.find('option[value="' + value[i] + '"]').attr('selected', ''); - } - return this; - } - - return this.attr('multiple') - ? option.toArray().map(function (el) { - return getAttr(el, 'value'); - }) - : option.attr('value'); - } - case 'input': - case 'option': - return querying ? this.attr('value') : this.attr('value', value); - } -}; - -/** - * Remove an attribute. - * - * @private - * @param {Element} elem - Node to remove attribute from. - * @param {string} name - Name of the attribute to remove. - */ -function removeAttribute(elem, name) { - if (!elem.attribs || !hasOwn.call(elem.attribs, name)) return; - - delete elem.attribs[name]; -} - -/** - * Splits a space-separated list of names to individual names. - * - * @param {string} names - Names to split. - * @returns {string[]} - Split names. - */ -function splitNames(names) { - return names ? names.trim().split(rspace) : []; -} - -/** - * Method for removing attributes by `name`. - * - * @example - * $('.pear').removeAttr('class').html(); - * //=>
  • Pear
  • - * - * $('.apple').attr('id', 'favorite'); - * $('.apple').removeAttr('id class').html(); - * //=>
  • Apple
  • - * - * @param {string} name - Name of the attribute. - * @returns {Cheerio} The instance itself. - * @see {@link https://api.jquery.com/removeAttr/} - */ -exports.removeAttr = function (name) { - var attrNames = splitNames(name); - - for (var i = 0; i < attrNames.length; i++) { - domEach(this, function (_, elem) { - removeAttribute(elem, attrNames[i]); - }); - } - - return this; -}; - -/** - * Check to see if *any* of the matched elements have the given `className`. - * - * @example - * $('.pear').hasClass('pear'); - * //=> true - * - * $('apple').hasClass('fruit'); - * //=> false - * - * $('li').hasClass('pear'); - * //=> true - * - * @param {string} className - Name of the class. - * @returns {boolean} Indicates if an element has the given `className`. - * @see {@link https://api.jquery.com/hasClass/} - */ -exports.hasClass = function (className) { - return this.toArray().some(function (elem) { - var clazz = elem.attribs && elem.attribs['class']; - var idx = -1; - - if (clazz && className.length) { - while ((idx = clazz.indexOf(className, idx + 1)) > -1) { - var end = idx + className.length; - - if ( - (idx === 0 || rspace.test(clazz[idx - 1])) && - (end === clazz.length || rspace.test(clazz[end])) - ) { - return true; - } - } - } - - return false; - }); -}; - -/** - * Adds class(es) to all of the matched elements. Also accepts a `function` like jQuery. - * - * @example - * $('.pear').addClass('fruit').html(); - * //=>
  • Pear
  • - * - * $('.apple').addClass('fruit red').html(); - * //=>
  • Apple
  • - * - * @param {string | Function} value - Name of new class. - * @returns {Cheerio} The instance itself. - * @see {@link https://api.jquery.com/addClass/} - */ -exports.addClass = function (value) { - // Support functions - if (typeof value === 'function') { - return domEach(this, function (i, el) { - var className = el.attribs['class'] || ''; - exports.addClass.call([el], value.call(el, i, className)); - }); - } - - // Return if no value or not a string or function - if (!value || typeof value !== 'string') return this; - - var classNames = value.split(rspace); - var numElements = this.length; - - for (var i = 0; i < numElements; i++) { - // If selected element isn't a tag, move on - if (!isTag(this[i])) continue; - - // If we don't already have classes - var className = getAttr(this[i], 'class'); - - if (!className) { - setAttr(this[i], 'class', classNames.join(' ').trim()); - } else { - var setClass = ' ' + className + ' '; - - // Check if class already exists - for (var j = 0; j < classNames.length; j++) { - var appendClass = classNames[j] + ' '; - if (setClass.indexOf(' ' + appendClass) < 0) setClass += appendClass; - } - - setAttr(this[i], 'class', setClass.trim()); - } - } - - return this; -}; - -/** - * Removes one or more space-separated classes from the selected elements. If no - * `className` is defined, all classes will be removed. Also accepts a - * `function` like jQuery. - * - * @example - * $('.pear').removeClass('pear').html(); - * //=>
  • Pear
  • - * - * $('.apple').addClass('red').removeClass().html(); - * //=>
  • Apple
  • - * - * @param {string | Function} value - Name of the class. - * @returns {Cheerio} The instance itself. - * @see {@link https://api.jquery.com/removeClass/} - */ -exports.removeClass = function (value) { - // Handle if value is a function - if (typeof value === 'function') { - return domEach(this, function (i, el) { - exports.removeClass.call( - [el], - value.call(el, i, el.attribs['class'] || '') - ); - }); - } - - var classes = splitNames(value); - var numClasses = classes.length; - var removeAll = arguments.length === 0; - - return domEach(this, function (_, el) { - if (!isTag(el)) return; - - if (removeAll) { - // Short circuit the remove all case as this is the nice one - el.attribs.class = ''; - } else { - var elClasses = splitNames(el.attribs.class); - var changed = false; - - for (var j = 0; j < numClasses; j++) { - var index = elClasses.indexOf(classes[j]); - - if (index >= 0) { - elClasses.splice(index, 1); - changed = true; - - // We have to do another pass to ensure that there are not duplicate - // classes listed - j--; - } - } - if (changed) { - el.attribs.class = elClasses.join(' '); - } - } - }); -}; - -/** - * Add or remove class(es) from the matched elements, depending on either the - * class's presence or the value of the switch argument. Also accepts a - * `function` like jQuery. - * - * @example - * $('.apple.green').toggleClass('fruit green red').html(); - * //=>
  • Apple
  • - * - * $('.apple.green').toggleClass('fruit green red', true).html(); - * //=>
  • Apple
  • - * - * @param {string | Function} value - Name of the class. Can also be a function. - * @param {boolean} [stateVal] - If specified the state of the class. - * @returns {Cheerio} The instance itself. - * @see {@link https://api.jquery.com/toggleClass/} - */ -exports.toggleClass = function (value, stateVal) { - // Support functions - if (typeof value === 'function') { - return domEach(this, function (i, el) { - exports.toggleClass.call( - [el], - value.call(el, i, el.attribs['class'] || '', stateVal), - stateVal - ); - }); - } - - // Return if no value or not a string or function - if (!value || typeof value !== 'string') return this; - - var classNames = value.split(rspace); - var numClasses = classNames.length; - var state = typeof stateVal === 'boolean' ? (stateVal ? 1 : -1) : 0; - var numElements = this.length; - - for (var i = 0; i < numElements; i++) { - // If selected element isn't a tag, move on - if (!isTag(this[i])) continue; - - var elementClasses = splitNames(this[i].attribs.class); - - // Check if class already exists - for (var j = 0; j < numClasses; j++) { - // Check if the class name is currently defined - var index = elementClasses.indexOf(classNames[j]); - - // Add if stateValue === true or we are toggling and there is no value - if (state >= 0 && index < 0) { - elementClasses.push(classNames[j]); - } else if (state <= 0 && index >= 0) { - // Otherwise remove but only if the item exists - elementClasses.splice(index, 1); - } - } - - this[i].attribs.class = elementClasses.join(' '); - } - - return this; -}; - -/** - * Checks the current list of elements and returns `true` if _any_ of the - * elements match the selector. If using an element or Cheerio selection, - * returns `true` if _any_ of the elements match. If using a predicate function, - * the function is executed in the context of the selected element, so `this` - * refers to the current element. - * - * @param {string | Function | Cheerio | Node} selector - Selector for the selection. - * @returns {boolean} Whether or not the selector matches an element of the instance. - * @see {@link https://api.jquery.com/is/} - */ -exports.is = function (selector) { - if (selector) { - return this.filter(selector).length > 0; - } - return false; -}; diff --git a/lib/api/css.js b/lib/api/css.js deleted file mode 100644 index 970b52f821..0000000000 --- a/lib/api/css.js +++ /dev/null @@ -1,119 +0,0 @@ -'use strict'; -/** @module cheerio/css */ - -var domEach = require('../utils').domEach; - -var toString = Object.prototype.toString; - -/** - * Get the value of a style property for the first element in the set of matched - * elements or set one or more CSS properties for every matched element. - * - * @param {string | object} prop - The name of the property. - * @param {string} [val] - If specified the new value. - * @returns {Cheerio} The instance itself. - * @see {@link https://api.jquery.com/css/} - */ -exports.css = function (prop, val) { - if ( - arguments.length === 2 || - // When `prop` is a "plain" object - toString.call(prop) === '[object Object]' - ) { - return domEach(this, function (idx, el) { - setCss(el, prop, val, idx); - }); - } - return getCss(this[0], prop); -}; - -/** - * Set styles of all elements. - * - * @private - * @param {Element} el - Element to set style of. - * @param {string | object} prop - Name of property. - * @param {string | Function} val - Value to set property to. - * @param {number} [idx] - Optional index within the selection. - */ -function setCss(el, prop, val, idx) { - if (typeof prop === 'string') { - var styles = getCss(el); - if (typeof val === 'function') { - val = val.call(el, idx, styles[prop]); - } - - if (val === '') { - delete styles[prop]; - } else if (val != null) { - styles[prop] = val; - } - - el.attribs.style = stringify(styles); - } else if (typeof prop === 'object') { - Object.keys(prop).forEach(function (k) { - setCss(el, k, prop[k]); - }); - } -} - -/** - * Get parsed styles of the first element. - * - * @private - * @param {Element} el - Element to get styles from. - * @param {string | string[]} [prop] - Name of the prop. - * @returns {object | undefined} The parsed styles. - */ -function getCss(el, prop) { - if (!el || !el.attribs) return; - - var styles = parse(el.attribs.style); - if (typeof prop === 'string') { - return styles[prop]; - } - if (Array.isArray(prop)) { - var newStyles = {}; - prop.forEach(function (item) { - if (styles[item] != null) { - newStyles[item] = styles[item]; - } - }); - return newStyles; - } - return styles; -} - -/** - * Stringify `obj` to styles. - * - * @private - * @param {object} obj - Object to stringify. - * @returns {string} The serialized styles. - */ -function stringify(obj) { - return Object.keys(obj || {}).reduce(function (str, prop) { - return (str += '' + (str ? ' ' : '') + prop + ': ' + obj[prop] + ';'); - }, ''); -} - -/** - * Parse `styles`. - * - * @private - * @param {string} styles - Styles to be parsed. - * @returns {object} The parsed styles. - */ -function parse(styles) { - styles = (styles || '').trim(); - - if (!styles) return {}; - - return styles.split(';').reduce(function (obj, str) { - var n = str.indexOf(':'); - // skip if there is no :, or if it is the first/last character - if (n < 1 || n === str.length - 1) return obj; - obj[str.slice(0, n).trim()] = str.slice(n + 1).trim(); - return obj; - }, {}); -} diff --git a/lib/api/forms.js b/lib/api/forms.js deleted file mode 100644 index f0bedc068e..0000000000 --- a/lib/api/forms.js +++ /dev/null @@ -1,83 +0,0 @@ -'use strict'; -/** @module cheerio/forms */ - -// https://github.com/jquery/jquery/blob/2.1.3/src/manipulation/var/rcheckableType.js -// https://github.com/jquery/jquery/blob/2.1.3/src/serialize.js -var submittableSelector = 'input,select,textarea,keygen'; -var r20 = /%20/g; -var rCRLF = /\r?\n/g; - -/** - * Encode a set of form elements as a string for submission. - * - * @returns {string} The serialized form. - * @see {@link https://api.jquery.com/serialize/} - */ -exports.serialize = function () { - // Convert form elements into name/value objects - var arr = this.serializeArray(); - - // Serialize each element into a key/value string - var retArr = arr.map(function (data) { - return encodeURIComponent(data.name) + '=' + encodeURIComponent(data.value); - }); - - // Return the resulting serialization - return retArr.join('&').replace(r20, '+'); -}; - -/** - * Encode a set of form elements as an array of names and values. - * - * @example - * $('
    ').serializeArray(); - * //=> [ { name: 'foo', value: 'bar' } ] - * - * @returns {object[]} The serialized form. - * @this {Cheerio} - * @see {@link https://api.jquery.com/serializeArray/} - */ -exports.serializeArray = function () { - // Resolve all form elements from either forms or collections of form elements - var Cheerio = this.constructor; - return this.map(function (_, elem) { - var $elem = Cheerio(elem); - if (elem.name === 'form') { - return $elem.find(submittableSelector).toArray(); - } - return $elem.filter(submittableSelector).toArray(); - }) - .filter( - // Verify elements have a name (`attr.name`) and are not disabled (`:enabled`) - '[name!=""]:enabled' + - // and cannot be clicked (`[type=submit]`) or are used in `x-www-form-urlencoded` (`[type=file]`) - ':not(:submit, :button, :image, :reset, :file)' + - // and are either checked/don't have a checkable state - ':matches([checked], :not(:checkbox, :radio))' - // Convert each of the elements to its value(s) - ) - .map(function (_, elem) { - var $elem = Cheerio(elem); - var name = $elem.attr('name'); - var value = $elem.val(); - - // If there is no value set (e.g. `undefined`, `null`), then default value to empty - if (value == null) { - value = ''; - } - - // If we have an array of values (e.g. `'; + +// Comments +const comment = ''; +const conditional = + ''; + +// Text +const text = 'lorem ipsum'; + +// Script +const script = ''; +const scriptEmpty = ''; + +// Style +const style = ''; +const styleEmpty = ''; + +// Directives +const directive = ''; + +describe('parse', () => { + describe('evaluate', () => { + it(`should parse basic empty tags: ${basic}`, () => { + const [tag] = parse(basic, defaultOpts, true).children as Element[]; + expect(tag.type).toBe('tag'); + expect(tag.tagName).toBe('html'); + expect(tag.childNodes).toHaveLength(2); + }); + + it(`should handle sibling tags: ${siblings}`, () => { + const dom = parse(siblings, defaultOpts, false).children as Element[]; + const [h2, p] = dom; + + expect(dom).toHaveLength(2); + expect(h2.tagName).toBe('h2'); + expect(p.tagName).toBe('p'); + }); + + it(`should handle single tags: ${single}`, () => { + const [tag] = parse(single, defaultOpts, false).children as Element[]; + expect(tag.type).toBe('tag'); + expect(tag.tagName).toBe('br'); + expect(tag.childNodes).toHaveLength(0); + }); + + it(`should handle malformatted single tags: ${singleWrong}`, () => { + const [tag] = parse(singleWrong, defaultOpts, false) + .children as Element[]; + expect(tag.type).toBe('tag'); + expect(tag.tagName).toBe('br'); + expect(tag.childNodes).toHaveLength(0); + }); + + it(`should handle tags with children: ${children}`, () => { + const [tag] = parse(children, defaultOpts, true).children as Element[]; + expect(tag.type).toBe('tag'); + expect(tag.tagName).toBe('html'); + expect(tag.childNodes).toBeTruthy(); + expect(tag.childNodes[1]).toHaveProperty('tagName', 'body'); + expect((tag.childNodes[1] as Element).childNodes).toHaveLength(1); + }); + + it(`should handle tags with children: ${li}`, () => { + const [tag] = parse(li, defaultOpts, false).children as Element[]; + expect(tag.childNodes).toHaveLength(1); + expect(tag.childNodes[0]).toHaveProperty('data', 'Durian'); + }); + + it(`should handle tags with attributes: ${attributes}`, () => { + const attrs = parse(attributes, defaultOpts, false) + .children[0] as Element; + expect(attrs.attribs).toBeTruthy(); + expect(attrs.attribs.src).toBe('hello.png'); + expect(attrs.attribs.alt).toBe('man waving'); + }); + + it(`should handle value-less attributes: ${noValueAttribute}`, () => { + const attrs = parse(noValueAttribute, defaultOpts, false) + .children[0] as Element; + expect(attrs.attribs).toBeTruthy(); + expect(attrs.attribs.disabled).toBe(''); + }); + + it(`should handle comments: ${comment}`, () => { + const elem = parse(comment, defaultOpts, false).children[0]; + expect(elem.type).toBe('comment'); + expect(elem).toHaveProperty('data', ' sexy '); + }); + + it(`should handle conditional comments: ${conditional}`, () => { + const elem = parse(conditional, defaultOpts, false).children[0]; + expect(elem.type).toBe('comment'); + expect(elem).toHaveProperty( + 'data', + conditional.replace('', '') + ); + }); + + it(`should handle text: ${text}`, () => { + const text_ = parse(text, defaultOpts, false).children[0]; + expect(text_.type).toBe('text'); + expect(text_).toHaveProperty('data', 'lorem ipsum'); + }); + + it(`should handle script tags: ${script}`, () => { + const script_ = parse(script, defaultOpts, false).children[0] as Element; + expect(script_.type).toBe('script'); + expect(script_.tagName).toBe('script'); + expect(script_.attribs.type).toBe('text/javascript'); + expect(script_.childNodes).toHaveLength(1); + expect(script_.childNodes[0].type).toBe('text'); + expect(script_.childNodes[0]).toHaveProperty( + 'data', + 'alert("hi world!");' + ); + }); + + it(`should handle style tags: ${style}`, () => { + const style_ = parse(style, defaultOpts, false).children[0] as Element; + expect(style_.type).toBe('style'); + expect(style_.tagName).toBe('style'); + expect(style_.attribs.type).toBe('text/css'); + expect(style_.childNodes).toHaveLength(1); + expect(style_.childNodes[0].type).toBe('text'); + expect(style_.childNodes[0]).toHaveProperty( + 'data', + ' h2 { color:blue; } ' + ); + }); + + it(`should handle directives: ${directive}`, () => { + const elem = parse(directive, defaultOpts, true).children[0]; + expect(elem.type).toBe('directive'); + expect(elem).toHaveProperty('data', '!DOCTYPE html ""'); + expect(elem).toHaveProperty('tagName', '!doctype'); + }); + }); + + describe('.parse', () => { + // Root test utility + function rootTest(root: Document) { + expect(root).toHaveProperty('tagName', 'root'); + + expect(root.nextSibling).toBe(null); + expect(root.previousSibling).toBe(null); + expect(root.parentNode).toBe(null); + + const child = root.childNodes[0]; + expect(child.parentNode).toBe(root); + } + + it(`should add root to: ${basic}`, () => { + const root = parse(basic, defaultOpts, true); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0]).toHaveProperty('tagName', 'html'); + }); + + it(`should add root to: ${siblings}`, () => { + const root = parse(siblings, defaultOpts, false); + rootTest(root); + expect(root.childNodes).toHaveLength(2); + expect(root.childNodes[0]).toHaveProperty('tagName', 'h2'); + expect(root.childNodes[1]).toHaveProperty('tagName', 'p'); + expect(root.childNodes[1].parent).toBe(root); + }); + + it(`should add root to: ${comment}`, () => { + const root = parse(comment, defaultOpts, false); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0].type).toBe('comment'); + }); + + it(`should add root to: ${text}`, () => { + const root = parse(text, defaultOpts, false); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0].type).toBe('text'); + }); + + it(`should add root to: ${scriptEmpty}`, () => { + const root = parse(scriptEmpty, defaultOpts, false); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0].type).toBe('script'); + }); + + it(`should add root to: ${styleEmpty}`, () => { + const root = parse(styleEmpty, defaultOpts, false); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0].type).toBe('style'); + }); + + it(`should add root to: ${directive}`, () => { + const root = parse(directive, defaultOpts, true); + rootTest(root); + expect(root.childNodes).toHaveLength(2); + expect(root.childNodes[0].type).toBe('directive'); + }); + + it('should simply return root', () => { + const oldroot = parse(basic, defaultOpts, true); + const root = parse(oldroot, defaultOpts, true); + expect(root).toBe(oldroot); + rootTest(root); + expect(root.childNodes).toHaveLength(1); + expect(root.childNodes[0]).toHaveProperty('tagName', 'html'); + }); + + it('should expose the DOM level 1 API', () => { + const root = parse( + '

    ', + defaultOpts, + false + ).childNodes[0] as Element; + const childNodes = root.childNodes as Element[]; + + expect(childNodes).toHaveLength(3); + + expect(root.tagName).toBe('div'); + expect(root.firstChild).toBe(childNodes[0]); + expect(root.lastChild).toBe(childNodes[2]); + + expect(childNodes[0].tagName).toBe('a'); + expect(childNodes[0].previousSibling).toBe(null); + expect(childNodes[0].nextSibling).toBe(childNodes[1]); + expect(childNodes[0].parentNode).toBe(root); + expect((childNodes[0] as Element).childNodes).toHaveLength(0); + expect(childNodes[0].firstChild).toBe(null); + expect(childNodes[0].lastChild).toBe(null); + + expect(childNodes[1].tagName).toBe('span'); + expect(childNodes[1].previousSibling).toBe(childNodes[0]); + expect(childNodes[1].nextSibling).toBe(childNodes[2]); + expect(childNodes[1].parentNode).toBe(root); + expect(childNodes[1].childNodes).toHaveLength(0); + expect(childNodes[1].firstChild).toBe(null); + expect(childNodes[1].lastChild).toBe(null); + + expect(childNodes[2].tagName).toBe('p'); + expect(childNodes[2].previousSibling).toBe(childNodes[1]); + expect(childNodes[2].nextSibling).toBe(null); + expect(childNodes[2].parentNode).toBe(root); + expect(childNodes[2].childNodes).toHaveLength(0); + expect(childNodes[2].firstChild).toBe(null); + expect(childNodes[2].lastChild).toBe(null); + }); + + it('Should parse less than or equal sign sign', () => { + const root = parse('A<=B', defaultOpts, false); + const { childNodes } = root; + + expect(childNodes[0]).toHaveProperty('tagName', 'i'); + expect((childNodes[0] as Element).childNodes[0]).toHaveProperty( + 'data', + 'A' + ); + expect(childNodes[1]).toHaveProperty('data', '<='); + expect(childNodes[2]).toHaveProperty('tagName', 'i'); + expect((childNodes[2] as Element).childNodes[0]).toHaveProperty( + 'data', + 'B' + ); + }); + + it('Should ignore unclosed CDATA', () => { + const root = parse( + '', + defaultOpts, + false + ); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].tagName).toBe('a'); + expect(childNodes[1].tagName).toBe('script'); + expect(childNodes[1].childNodes[0]).toHaveProperty( + 'data', + 'foo // to documents', () => { + const root = parse('', defaultOpts, true); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].tagName).toBe('html'); + expect(childNodes[0].childNodes[0]).toHaveProperty('tagName', 'head'); + }); + + it('Should implicitly create around ', () => { + const root = parse( + '
    bar
    ', + defaultOpts, + false + ); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].tagName).toBe('table'); + expect(childNodes[0].childNodes.length).toBe(1); + expect(childNodes[0].childNodes[0]).toHaveProperty('tagName', 'tbody'); + expect((childNodes[0] as any).childNodes[0].childNodes[0]).toHaveProperty( + 'tagName', + 'tr' + ); + expect( + (childNodes[0] as any).childNodes[0].childNodes[0].childNodes[0].tagName + ).toBe('td'); + expect( + (childNodes[0] as any).childNodes[0].childNodes[0].childNodes[0] + .childNodes[0].data + ).toBe('bar'); + }); + + it('Should parse custom tag ', () => { + const root = parse('test', defaultOpts, false); + const childNodes = root.childNodes as Element[]; + + expect(childNodes.length).toBe(1); + expect(childNodes[0].tagName).toBe('line'); + expect(childNodes[0].childNodes[0]).toHaveProperty('data', 'test'); + }); + + it('Should properly parse misnested table tags', () => { + const root = parse( + 'i1i2i3', + defaultOpts, + false + ); + const childNodes = root.childNodes as Element[]; + + expect(childNodes.length).toBe(3); + + childNodes.forEach((child, i) => { + expect(child.tagName).toBe('tr'); + expect(child.childNodes[0]).toHaveProperty('tagName', 'td'); + expect((child.childNodes[0] as Element).childNodes[0]).toHaveProperty( + 'data', + `i${i + 1}` + ); + }); + }); + + it('Should correctly parse data url attributes', () => { + const html = + '
    '; + const expectedAttr = + 'font-family:"butcherman-caps"; src:url(data:font/opentype;base64,AAEA...);'; + const root = parse(html, defaultOpts, false); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].attribs.style).toBe(expectedAttr); + }); + + it('Should treat tag content as text', () => { + const root = parse('<xmp><h2>', defaultOpts, false); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].childNodes[0]).toHaveProperty('data', '

    '); + }); + + it('Should correctly parse malformed numbered entities', () => { + const root = parse('

    z&#

    ', defaultOpts, false); + const childNodes = root.childNodes as Element[]; + + expect(childNodes[0].childNodes[0]).toHaveProperty('data', 'z&#'); + }); + + it('Should correctly parse mismatched headings', () => { + const root = parse('

    Test

    ', defaultOpts, false); + const { childNodes } = root; + + expect(childNodes.length).toBe(2); + expect(childNodes[0]).toHaveProperty('tagName', 'h2'); + expect(childNodes[1]).toHaveProperty('tagName', 'div'); + }); + + it('Should correctly parse tricky
     content', () => {
    +      const root = parse(
    +        '
    \nA <- factor(A, levels = c("c","a","b"))\n
    ', + defaultOpts, + false + ); + const childNodes = root.childNodes as Element[]; + + expect(childNodes.length).toBe(1); + expect(childNodes[0].tagName).toBe('pre'); + expect(childNodes[0].childNodes[0]).toHaveProperty( + 'data', + 'A <- factor(A, levels = c("c","a","b"))\n' + ); + }); + + it('should pass the options for including the location info to parse5', () => { + const root = parse( + '

    Hello

    ', + { ...defaultOpts, sourceCodeLocationInfo: true }, + false + ); + // TODO Add `sourceCodeLocation` to domhandler + const location = (root.children[0] as any).sourceCodeLocation; + + expect(typeof location).toBe('object'); + expect(location.endOffset).toBe(12); + }); + }); +}); diff --git a/src/parse.ts b/src/parse.ts new file mode 100644 index 0000000000..0b2f741b9c --- /dev/null +++ b/src/parse.ts @@ -0,0 +1,87 @@ +import { DomUtils } from 'htmlparser2'; +import { parse as parseWithHtmlparser2 } from './parsers/htmlparser2'; +import { parse as parseWithParse5 } from './parsers/parse5'; +import { + Node, + Document, + NodeWithChildren, + isDocument as checkIsDocument, +} from 'domhandler'; +import type { InternalOptions } from './options'; + +/* + * Parser + */ +export default function parse( + content: string | Document | Node | Node[] | Buffer, + options: InternalOptions, + isDocument: boolean +): Document { + if (typeof Buffer !== 'undefined' && Buffer.isBuffer(content)) { + content = content.toString(); + } + + if (typeof content === 'string') { + return options.xmlMode || options._useHtmlParser2 + ? parseWithHtmlparser2(content, options) + : parseWithParse5(content, options, isDocument); + } + + const doc = content as Node | Node[] | Document; + + if (!Array.isArray(doc) && checkIsDocument(doc)) { + // If `doc` is already a root, just return it + return doc; + } + + // Add conent to new root element + const root = new Document([]); + + // Update the DOM using the root + update(doc, root); + + return root; +} + +/** + * Update the dom structure, for one changed layer. + * + * @param newChilds - The new children. + * @param parent - The new parent. + * @returns The parent node. + */ +export function update( + newChilds: Node[] | Node, + parent: NodeWithChildren | null +): Node | null { + // Normalize + const arr = Array.isArray(newChilds) ? newChilds : [newChilds]; + + // Update parent + if (parent) { + parent.children = arr; + } else { + parent = null; + } + + // Update neighbors + for (let i = 0; i < arr.length; i++) { + const node = arr[i]; + + // Cleanly remove existing nodes from their previous structures. + if (node.parent && node.parent.children !== arr) { + DomUtils.removeElement(node); + } + + if (parent) { + node.prev = arr[i - 1] || null; + node.next = arr[i + 1] || null; + } else { + node.prev = node.next = null; + } + + node.parent = parent; + } + + return parent; +} diff --git a/src/parsers/htmlparser2.ts b/src/parsers/htmlparser2.ts new file mode 100644 index 0000000000..af3624041c --- /dev/null +++ b/src/parsers/htmlparser2.ts @@ -0,0 +1,2 @@ +export { parseDocument as parse } from 'htmlparser2'; +export { default as render } from 'dom-serializer'; diff --git a/src/parsers/parse5.ts b/src/parsers/parse5.ts new file mode 100644 index 0000000000..fbc9c27c1c --- /dev/null +++ b/src/parsers/parse5.ts @@ -0,0 +1,49 @@ +import { Node, Document, isDocument } from 'domhandler'; +import { parse as parseDocument, parseFragment, serialize } from 'parse5'; +import htmlparser2Adapter from 'parse5-htmlparser2-tree-adapter'; +import type { InternalOptions } from '../options'; + +interface Parse5Options extends InternalOptions { + context?: Node; +} + +export function parse( + content: string, + options: Parse5Options, + isDocument?: boolean +): Document { + const opts = { + scriptingEnabled: + typeof options.scriptingEnabled === 'boolean' + ? options.scriptingEnabled + : true, + treeAdapter: htmlparser2Adapter, + sourceCodeLocationInfo: options.sourceCodeLocationInfo, + }; + + const { context } = options; + + // @ts-expect-error The tree adapter unfortunately doesn't return the exact types. + return isDocument + ? parseDocument(content, opts) + : // @ts-expect-error Same issue again. + parseFragment(context, content, opts); +} + +export function render(dom: Node | ArrayLike): string { + /* + * `dom-serializer` passes over the special "root" node and renders the + * node's children in its place. To mimic this behavior with `parse5`, an + * equivalent operation must be applied to the input array. + */ + const nodes = 'length' in dom ? dom : [dom]; + for (let index = 0; index < nodes.length; index += 1) { + const node = nodes[index]; + if (isDocument(node)) { + Array.prototype.splice.call(nodes, index, 1, ...node.children); + } + } + + // @ts-expect-error Types don't align here either. + return serialize({ children: nodes }, { treeAdapter: htmlparser2Adapter }); +} diff --git a/src/static.spec.ts b/src/static.spec.ts new file mode 100644 index 0000000000..7851f4c1a5 --- /dev/null +++ b/src/static.spec.ts @@ -0,0 +1,329 @@ +import * as fixtures from './__fixtures__/fixtures'; +import cheerio from '.'; +import { CheerioAPI } from './cheerio'; + +describe('cheerio', () => { + describe('.html', () => { + it('() : should return innerHTML; $.html(obj) should return outerHTML', () => { + const $div = cheerio( + 'div', + '
    foobar
    ' + ); + const span = $div.children()[1]; + expect(cheerio(span).html()).toBe('bar'); + expect(cheerio.html(span)).toBe('bar'); + }); + + it('() : should accept an object, an array, or a cheerio object', () => { + const $span = cheerio('foo'); + expect(cheerio.html($span[0])).toBe('foo'); + expect(cheerio.html($span)).toBe('foo'); + }); + + it('() : should be able to set to an empty string', () => { + const $elem = cheerio('foo').html(''); + expect(cheerio.html($elem)).toBe(''); + }); + + it('() : does not render the root element', () => { + const $ = cheerio.load(''); + expect(cheerio.html($.root())).toBe( + '' + ); + }); + + it('(, , ) : does not render the root element', () => { + const $ = cheerio.load('
    a div
    a span'); + const $collection = $('div').add($.root()).add('span'); + const expected = + '
    a div
    a span
    a div
    a span'; + expect(cheerio.html($collection)).toBe(expected); + }); + + it('() : does not crash with `null` as `this` value', () => { + const { html } = cheerio; + expect(html.call(null as any)).toBe(''); + expect(html.call(null as any, '#nothing')).toBe(''); + }); + }); + + describe('.text', () => { + it('(cheerio object) : should return the text contents of the specified elements', () => { + const $ = cheerio.load('This is content.'); + expect(cheerio.text($('a'))).toBe('This is content.'); + }); + + it('(cheerio object) : should omit comment nodes', () => { + const $ = cheerio.load( + 'This is not a comment.' + ); + expect(cheerio.text($('a'))).toBe('This is not a comment.'); + }); + + it('(cheerio object) : should include text contents of children recursively', () => { + const $ = cheerio.load( + 'This is
    a child with another child and not a comment followed by one last child and some final
    text.
    ' + ); + expect(cheerio.text($('a'))).toBe( + 'This is a child with another child and not a comment followed by one last child and some final text.' + ); + }); + + it('() : should return the rendered text content of the root', () => { + const $ = cheerio.load( + 'This is
    a child with another child and not a comment followed by one last child and some final
    text.
    ' + ); + expect(cheerio.text($.root())).toBe( + 'This is a child with another child and not a comment followed by one last child and some final text.' + ); + }); + + it('(cheerio object) : should omit script tags', () => { + const $ = cheerio.load(''); + expect(cheerio.text($.root())).toBe(''); + }); + + it('(cheerio object) : should omit style tags', () => { + const $ = cheerio.load( + '' + ); + expect($.text()).toBe(''); + }); + + it('(cheerio object) : should include text contents of children omitting style and script tags', () => { + const $ = cheerio.load( + 'Welcome
    Hello, testing text function,
    End of messege' + ); + expect(cheerio.text($.root())).toBe( + 'Welcome Hello, testing text function,End of messege' + ); + }); + + it('() : does not crash with `null` as `this` value', () => { + const { text } = cheerio; + expect(text.call(null as any)).toBe(''); + }); + }); + + describe('.parseHTML', () => { + const $ = cheerio.load(''); + + it('() : returns null', () => { + expect($.parseHTML()).toBe(null); + }); + + it('(null) : returns null', () => { + expect($.parseHTML(null)).toBe(null); + }); + + it('("") : returns null', () => { + expect($.parseHTML('')).toBe(null); + }); + + it('(largeHtmlString) : parses large HTML strings', () => { + const html = new Array(10).join('
    '); + const nodes = $.parseHTML(html); + + expect(nodes.length).toBeGreaterThan(4); + expect(nodes).toBeInstanceOf(Array); + }); + + it('("'; + expect($.parseHTML(html)).toHaveLength(0); + }); + + it('("'; + expect($.parseHTML(html, true)[0]).toHaveProperty('tagName', 'script'); + }); + + it('("scriptAndNonScript) : preserves non-script nodes', () => { + const html = '
    '; + expect($.parseHTML(html)[0]).toHaveProperty('tagName', 'div'); + }); + + it('(scriptAndNonScript, true) : Preserves script position', () => { + const html = '
    '; + expect($.parseHTML(html, true)[0]).toHaveProperty('tagName', 'script'); + }); + + it('(text) : returns a text node', () => { + expect($.parseHTML('text')[0].type).toBe('text'); + }); + + it('(\\ttext) : preserves leading whitespace', () => { + expect($.parseHTML('\t
    ')[0]).toHaveProperty('data', '\t'); + }); + + it('( text) : Leading spaces are treated as text nodes', () => { + expect($.parseHTML('
    ')[0].type).toBe('text'); + }); + + it('(html) : should preserve content', () => { + const html = '
    test div
    '; + expect(cheerio($.parseHTML(html)[0]).html()).toBe('test div'); + }); + + it('(malformedHtml) : should not break', () => { + expect($.parseHTML('')).toHaveLength(1); + }); + + it('(garbageInput) : should not cause an error', () => { + expect( + $.parseHTML('<#if>

    This is a test.

    <#/if>') + ).toBeTruthy(); + }); + + it('(text) : should return an array that is not effected by DOM manipulation methods', () => { + const $div = cheerio.load('
    '); + const elems = $div.parseHTML(''); + + $div('div').append(elems); + + expect(elems).toHaveLength(2); + }); + + it('(html, context) : should ignore context argument', () => { + const $div = cheerio.load('
    '); + const elems = $div.parseHTML('', { foo: 123 }); + + $div('div').append(elems); + + expect(elems).toHaveLength(1); + }); + + it('(html, context, keepScripts) : should ignore context argument', () => { + const $div = cheerio.load('
    '); + const elems = $div.parseHTML( + '', + { foo: 123 }, + true + ); + + $div('div').append(elems); + + expect(elems).toHaveLength(2); + }); + }); + + describe('.merge', () => { + const $ = cheerio.load(''); + let arr1: ArrayLike; + let arr2: ArrayLike; + + beforeEach(() => { + arr1 = [1, 2, 3]; + arr2 = [4, 5, 6]; + }); + + it('should be a function', () => { + expect(typeof $.merge).toBe('function'); + }); + + it('(arraylike, arraylike) : should return an array', () => { + const ret = $.merge(arr1, arr2); + expect(typeof ret).toBe('object'); + expect(Array.isArray(ret)).toBe(true); + }); + + it('(arraylike, arraylike) : should modify the first array', () => { + $.merge(arr1, arr2); + expect(arr1).toHaveLength(6); + }); + + it('(arraylike, arraylike) : should not modify the second array', () => { + $.merge(arr1, arr2); + expect(arr2).toHaveLength(3); + }); + + it('(arraylike, arraylike) : should handle objects that arent arrays, but are arraylike', () => { + const arr1: ArrayLike = { + length: 3, + [0]: 'a', + [1]: 'b', + [2]: 'c', + }; + const arr2 = { + length: 3, + [0]: 'd', + [1]: 'e', + [2]: 'f', + }; + + $.merge(arr1, arr2); + expect(arr1).toHaveLength(6); + expect(arr1[3]).toBe('d'); + expect(arr1[4]).toBe('e'); + expect(arr1[5]).toBe('f'); + expect(arr2).toHaveLength(3); + }); + + it('(?, ?) : should gracefully reject invalid inputs', () => { + expect($.merge([4], 3 as any)).toBeFalsy(); + expect($.merge({} as any, {} as any)).toBeFalsy(); + expect($.merge([], {} as any)).toBeFalsy(); + expect($.merge({} as any, [])).toBeFalsy(); + const fakeArray1 = { length: 3, [0]: 'a', [1]: 'b', [3]: 'd' }; + expect($.merge(fakeArray1, [])).toBeFalsy(); + expect($.merge([], fakeArray1)).toBeFalsy(); + const fakeArray2 = { length: '7' }; + expect($.merge(fakeArray2 as any, [])).toBeFalsy(); + const fakeArray3 = { length: -1 }; + expect($.merge(fakeArray3, [])).toBeFalsy(); + }); + + it('(?, ?) : should no-op on invalid inputs', () => { + const fakeArray1 = { length: 3, [0]: 'a', [1]: 'b', [3]: 'd' }; + $.merge(fakeArray1, []); + expect(fakeArray1).toHaveLength(3); + expect(fakeArray1[0]).toBe('a'); + expect(fakeArray1[1]).toBe('b'); + expect(fakeArray1[3]).toBe('d'); + $.merge([], fakeArray1); + expect(fakeArray1).toHaveLength(3); + expect(fakeArray1[0]).toBe('a'); + expect(fakeArray1[1]).toBe('b'); + expect(fakeArray1[3]).toBe('d'); + }); + }); + + describe('.contains', () => { + let $: CheerioAPI; + + beforeEach(() => { + $ = cheerio.load(fixtures.food); + }); + + it('(container, contained) : should correctly detect the provided element', () => { + const $food = $('#food'); + const $fruits = $('#fruits'); + const $apple = $('.apple'); + + expect($.contains($food[0], $fruits[0])).toBe(true); + expect($.contains($food[0], $apple[0])).toBe(true); + }); + + it('(container, other) : should not detect elements that are not contained', () => { + const $fruits = $('#fruits'); + const $vegetables = $('#vegetables'); + const $apple = $('.apple'); + + expect($.contains($vegetables[0], $apple[0])).toBe(false); + expect($.contains($fruits[0], $vegetables[0])).toBe(false); + expect($.contains($vegetables[0], $fruits[0])).toBe(false); + expect($.contains($fruits[0], $fruits[0])).toBe(false); + expect($.contains($vegetables[0], $vegetables[0])).toBe(false); + }); + }); + + describe('.root', () => { + it('() : should return a cheerio-wrapped root object', () => { + const $ = cheerio.load('foo'); + $.root().append('
    '); + expect($.html()).toBe( + 'foo
    ' + ); + }); + }); +}); diff --git a/src/static.ts b/src/static.ts new file mode 100644 index 0000000000..d43497c2c7 --- /dev/null +++ b/src/static.ts @@ -0,0 +1,313 @@ +import type { CheerioAPI, Cheerio } from './cheerio'; +import { Node, Document } from 'domhandler'; +import { + InternalOptions, + CheerioOptions, + default as defaultOptions, + flatten as flattenOptions, +} from './options'; +import { select } from 'cheerio-select'; +import { ElementType, DomUtils } from 'htmlparser2'; +import { render as renderWithParse5 } from './parsers/parse5'; +import { render as renderWithHtmlparser2 } from './parsers/htmlparser2'; + +/** + * Helper function to render a DOM. + * + * @param that - Cheerio instance to render. + * @param dom - The DOM to render. Defaults to `that`'s root. + * @param options - Options for rendering. + * @returns The rendered document. + */ +function render( + that: typeof Cheerio | undefined, + dom: ArrayLike | Node | string | undefined, + options: InternalOptions +): string { + if (!dom) { + if (that?._root?.children) { + dom = that._root.children; + } else { + return ''; + } + } else if (typeof dom === 'string') { + dom = select(dom, that?._root ?? [], options); + } + + return options.xmlMode || options._useHtmlParser2 + ? // FIXME: Pull in new version of dom-serializer to fix this. + renderWithHtmlparser2(dom as Node[], options) + : renderWithParse5(dom); +} + +/** + * Checks if a passed object is an options object. + * + * @param dom - Object to check if it is an options object. + * @returns Whether the object is an options object. + */ +function isOptions( + dom?: string | ArrayLike | Node | InternalOptions | null +): dom is InternalOptions { + return ( + typeof dom === 'object' && + dom != null && + !('length' in dom) && + !('type' in dom) + ); +} + +/** + * Renders the document. + * + * @param options - Options for the renderer. + * @returns The rendered document. + */ +export function html( + this: typeof Cheerio | void, + options?: CheerioOptions +): string; +/** + * Renders the document. + * + * @param dom - Element to render. + * @param options - Options for the renderer. + * @returns The rendered document. + */ +export function html( + this: typeof Cheerio | void, + dom?: string | ArrayLike | Node, + options?: CheerioOptions +): string; +export function html( + this: typeof Cheerio | void, + dom?: string | ArrayLike | Node | CheerioOptions, + options?: CheerioOptions +): string { + /* + * Be flexible about parameters, sometimes we call html(), + * with options as only parameter + * check dom argument for dom element specific properties + * assume there is no 'length' or 'type' properties in the options object + */ + if (!options && isOptions(dom)) { + options = dom; + dom = undefined; + } + + /* + * Sometimes `$.html()` is used without preloading html, + * so fallback non-existing options to the default ones. + */ + options = { + ...defaultOptions, + ...(this ? this._options : {}), + ...flattenOptions(options ?? {}), + }; + + return render( + this || undefined, + dom as string | Cheerio | Node | undefined, + options + ); +} + +/** + * Render the document as XML. + * + * @param dom - Element to render. + * @returns THe rendered document. + */ +export function xml( + this: typeof Cheerio, + dom?: string | ArrayLike | Node +): string { + const options = { ...this._options, xmlMode: true }; + + return render(this, dom, options); +} + +/** + * Render the document as text. + * + * @param elements - Elements to render. + * @returns The rendered document. + */ +export function text( + this: typeof Cheerio | void, + elements?: ArrayLike +): string { + const elems = elements ? elements : this ? this.root() : []; + + let ret = ''; + + for (let i = 0; i < elems.length; i++) { + const elem = elems[i]; + if (DomUtils.isText(elem)) ret += elem.data; + else if ( + DomUtils.hasChildren(elem) && + elem.type !== ElementType.Comment && + elem.type !== ElementType.Script && + elem.type !== ElementType.Style + ) { + ret += text(elem.children); + } + } + + return ret; +} + +/** + * Parses a string into an array of DOM nodes. The `context` argument has no + * meaning for Cheerio, but it is maintained for API compatibility with jQuery. + * + * @param data - Markup that will be parsed. + * @param context - Will be ignored. If it is a boolean it will be used as the + * value of `keepScripts`. + * @param keepScripts - If false all scripts will be removed. + * @returns The parsed DOM. + * @alias Cheerio.parseHTML + * @see {@link https://api.jquery.com/jQuery.parseHTML/} + */ +export function parseHTML( + this: typeof Cheerio, + data: string, + context?: unknown | boolean, + keepScripts?: boolean +): Node[]; +export function parseHTML(this: typeof Cheerio, data?: '' | null): null; +export function parseHTML( + this: typeof Cheerio, + data?: string | null, + context?: unknown | boolean, + keepScripts = typeof context === 'boolean' ? context : false +): Node[] | null { + if (!data || typeof data !== 'string') { + return null; + } + + if (typeof context === 'boolean') { + keepScripts = context; + } + + const parsed = this.load(data, defaultOptions, false); + if (!keepScripts) { + parsed('script').remove(); + } + + /* + * The `children` array is used by Cheerio internally to group elements that + * share the same parents. When nodes created through `parseHTML` are + * inserted into previously-existing DOM structures, they will be removed + * from the `children` array. The results of `parseHTML` should remain + * constant across these operations, so a shallow copy should be returned. + */ + return parsed.root()[0].children.slice(); +} + +/** + * Sometimes you need to work with the top-level root element. To query it, you + * can use `$.root()`. + * + * @example + * + * ```js + * $.root().append('
      ').html(); + * //=>
        ...
        + * ``` + * + * @returns Cheerio instance wrapping the root node. + * @alias Cheerio.root + */ +export function root(this: typeof Cheerio): Cheerio { + const fn = (this as unknown) as CheerioAPI; + return fn(this._root); +} + +/** + * Checks to see if the `contained` DOM element is a descendant of the + * `container` DOM element. + * + * @param container - Potential parent node. + * @param contained - Potential child node. + * @returns Indicates if the nodes contain one another. + * @alias Cheerio.contains + * @see {@link https://api.jquery.com/jQuery.contains/} + */ +export function contains(container: Node, contained: Node): boolean { + // According to the jQuery API, an element does not "contain" itself + if (contained === container) { + return false; + } + + /* + * Step up the descendants, stopping when the root element is reached + * (signaled by `.parent` returning a reference to the same object) + */ + let next: Node | null = contained; + while (next && next !== next.parent) { + next = next.parent; + if (next === container) { + return true; + } + } + + return false; +} + +interface WritableArrayLike extends ArrayLike { + length: number; + [n: number]: T; +} + +/** + * $.merge(). + * + * @param arr1 - First array. + * @param arr2 - Second array. + * @returns `arr1`, with elements of `arr2` inserted. + * @alias Cheerio.merge + * @see {@link https://api.jquery.com/jQuery.merge/} + */ +export function merge( + arr1: WritableArrayLike, + arr2: ArrayLike +): ArrayLike | undefined { + if (!isArrayLike(arr1) || !isArrayLike(arr2)) { + return; + } + let newLength = arr1.length; + const len = +arr2.length; + + for (let i = 0; i < len; i++) { + arr1[newLength++] = arr2[i]; + } + arr1.length = newLength; + return arr1; +} + +/** + * @param item - Item to check. + * @returns Indicates if the item is array-like. + */ +function isArrayLike(item: any): item is ArrayLike { + if (Array.isArray(item)) { + return true; + } + + if ( + typeof item !== 'object' || + !Object.prototype.hasOwnProperty.call(item, 'length') || + typeof item.length !== 'number' || + item.length < 0 + ) { + return false; + } + + for (let i = 0; i < item.length; i++) { + if (!(i in item)) { + return false; + } + } + return true; +} diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000000..520679cee7 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,60 @@ +type LowercaseLetters = + | 'a' + | 'b' + | 'c' + | 'd' + | 'e' + | 'f' + | 'g' + | 'h' + | 'i' + | 'j' + | 'k' + | 'l' + | 'm' + | 'n' + | 'o' + | 'p' + | 'q' + | 'r' + | 's' + | 't' + | 'u' + | 'v' + | 'w' + | 'x' + | 'y' + | 'z'; + +type AlphaNumeric = + | LowercaseLetters + | Uppercase + | `${number}`; + +type SelectorSpecial = '.' | '#' | ':' | '|' | '>' | '+' | '~' | '['; +/** + * Type for identifying selectors. Allows us to "upgrade" queries using + * selectors to return `Element`s. + */ +export type SelectorType = + | `${SelectorSpecial}${AlphaNumeric}${string}` + | `${AlphaNumeric}${string}`; + +import type { Cheerio } from './cheerio'; +import type { Node, Element } from 'domhandler'; + +/** Elements that can be passed to manipulation methods. */ +export type BasicAcceptedElems = Cheerio | T[] | T | string; +/** Elements that can be passed to manipulation methods, including functions. */ +export type AcceptedElems = + | BasicAcceptedElems + | ((this: T, i: number, el: T) => BasicAcceptedElems); + +/** Function signature, for traversal methods. */ +export type FilterFunction = (this: T, i: number, el: T) => boolean; +/** Supported filter types, for traversal methods. */ +export type AcceptedFilters = + | string + | FilterFunction + | Node + | Cheerio; diff --git a/src/utils.ts b/src/utils.ts new file mode 100644 index 0000000000..197c87c859 --- /dev/null +++ b/src/utils.ts @@ -0,0 +1,116 @@ +import { DomUtils } from 'htmlparser2'; +import { Node, cloneNode, Document } from 'domhandler'; +import type { Cheerio } from './cheerio'; + +/** + * Check if the DOM element is a tag. + * + * `isTag(type)` includes `'); - expect(cheerio.text($.root())).toBe(''); - }); - - it('(cheerio object) : should omit style tags', function () { - var $ = cheerio.load( - '' - ); - expect($.text()).toBe(''); - }); - - it('(cheerio object) : should include text contents of children omitting style and script tags', function () { - var $ = cheerio.load( - 'Welcome
        Hello, testing text function,
        End of messege' - ); - expect(cheerio.text($.root())).toBe( - 'Welcome Hello, testing text function,End of messege' - ); - }); - }); - - describe('.load', function () { - it('(html) : should retain original root after creating a new node', function () { - var $ = cheerio.load('
          '); - expect($('body')).toHaveLength(1); - $('', { - xml: true, - }); - expect($('script')[0].children[0].type).toBe('tag'); - }); - - it('(buffer) : should accept a buffer', function () { - var html = 'foo'; - // eslint-disable-next-line node/no-unsupported-features/node-builtins - var $html = cheerio.load(Buffer.from(html)); - expect($html.html()).toBe(html); - }); - }); - - describe('.clone', function () { - it('() : should return a copy', function () { - var $src = cheerio( - '
          foobarbaz
          ' - ).children(); - var $elem = $src.clone(); - expect($elem.length).toBe(3); - expect($elem.parent()).toHaveLength(0); - expect($elem.text()).toBe($src.text()); - $src.text('rofl'); - expect($elem.text()).not.toBe($src.text()); - }); - - it('() : should return a copy of document', function () { - var $src = cheerio - .load('
          foo
          bar') - .root() - .children(); - var $elem = $src.clone(); - expect($elem.length).toBe(1); - expect($elem.parent()).toHaveLength(0); - expect($elem.text()).toBe($src.text()); - $src.text('rofl'); - expect($elem.text()).not.toBe($src.text()); - }); - - it('() : should preserve parsing options', function () { - var $ = cheerio.load('
          π
          ', { decodeEntities: false }); - var $div = $('div'); - - expect($div.text()).toBe($div.clone().text()); - }); - }); - - describe('.parseHTML', function () { - var $ = cheerio.load(''); - - it('() : returns null', function () { - expect($.parseHTML()).toBe(null); - }); - - it('(null) : returns null', function () { - expect($.parseHTML(null)).toBe(null); - }); - - it('("") : returns null', function () { - expect($.parseHTML('')).toBe(null); - }); - - it('(largeHtmlString) : parses large HTML strings', function () { - var html = new Array(10).join('
          '); - var nodes = $.parseHTML(html); - - expect(nodes.length).toBeGreaterThan(4); - expect(nodes).toBeInstanceOf(Array); - }); - - it('("'; - expect($.parseHTML(html)).toHaveLength(0); - }); - - it('("'; - expect($.parseHTML(html, true)[0].tagName).toMatch(/script/i); - }); - - it('("scriptAndNonScript) : preserves non-script nodes', function () { - var html = '
          '; - expect($.parseHTML(html)[0].tagName).toMatch(/div/i); - }); - - it('(scriptAndNonScript, true) : Preserves script position', function () { - var html = '
          '; - expect($.parseHTML(html, true)[0].tagName).toMatch(/script/i); - }); - - it('(text) : returns a text node', function () { - expect($.parseHTML('text')[0].type).toBe('text'); - }); - - it('(\\ttext) : preserves leading whitespace', function () { - expect($.parseHTML('\t
          ')[0].data).toBe('\t'); - }); - - it('( text) : Leading spaces are treated as text nodes', function () { - expect($.parseHTML('
          ')[0].type).toBe('text'); - }); - - it('(html) : should preserve content', function () { - var html = '
          test div
          '; - expect(cheerio($.parseHTML(html)[0]).html()).toBe('test div'); - }); - - it('(malformedHtml) : should not break', function () { - expect($.parseHTML('')).toHaveLength(1); - }); - - it('(garbageInput) : should not cause an error', function () { - expect( - $.parseHTML('<#if>

          This is a test.

          <#/if>') || true - ).toBeTruthy(); - }); - - it('(text) : should return an array that is not effected by DOM manipulation methods', function () { - var $div = cheerio.load('
          '); - var elems = $div.parseHTML(''); - - $div('div').append(elems); - - expect(elems).toHaveLength(2); - }); - }); - - describe('.merge', function () { - var $ = cheerio.load(''); - var arr1; - var arr2; - - beforeEach(function () { - arr1 = [1, 2, 3]; - arr2 = [4, 5, 6]; - }); - - it('should be a function', function () { - expect(typeof $.merge).toBe('function'); - }); - - it('(arraylike, arraylike) : should return an array', function () { - var ret = $.merge(arr1, arr2); - expect(typeof ret).toBe('object'); - expect(Array.isArray(ret)).toBe(true); - }); - - it('(arraylike, arraylike) : should modify the first array', function () { - $.merge(arr1, arr2); - expect(arr1).toHaveLength(6); - }); - - it('(arraylike, arraylike) : should not modify the second array', function () { - $.merge(arr1, arr2); - expect(arr2).toHaveLength(3); - }); - - it('(arraylike, arraylike) : should handle objects that arent arrays, but are arraylike', function () { - arr1 = {}; - arr2 = {}; - arr1.length = 3; - arr1[0] = 'a'; - arr1[1] = 'b'; - arr1[2] = 'c'; - arr2.length = 3; - arr2[0] = 'd'; - arr2[1] = 'e'; - arr2[2] = 'f'; - $.merge(arr1, arr2); - expect(arr1).toHaveLength(6); - expect(arr1[3]).toBe('d'); - expect(arr1[4]).toBe('e'); - expect(arr1[5]).toBe('f'); - expect(arr2).toHaveLength(3); - }); - - it('(?, ?) : should gracefully reject invalid inputs', function () { - var ret = $.merge([4], 3); - expect(ret).toBeFalsy(); - ret = $.merge({}, {}); - expect(ret).toBeFalsy(); - ret = $.merge([], {}); - expect(ret).toBeFalsy(); - ret = $.merge({}, []); - expect(ret).toBeFalsy(); - var fakeArray1 = { length: 3 }; - fakeArray1[0] = 'a'; - fakeArray1[1] = 'b'; - fakeArray1[3] = 'd'; - ret = $.merge(fakeArray1, []); - expect(ret).toBeFalsy(); - ret = $.merge([], fakeArray1); - expect(ret).toBeFalsy(); - fakeArray1 = {}; - fakeArray1.length = '7'; - ret = $.merge(fakeArray1, []); - expect(ret).toBeFalsy(); - fakeArray1.length = -1; - ret = $.merge(fakeArray1, []); - expect(ret).toBeFalsy(); - }); - - it('(?, ?) : should no-op on invalid inputs', function () { - var fakeArray1 = { length: 3 }; - fakeArray1[0] = 'a'; - fakeArray1[1] = 'b'; - fakeArray1[3] = 'd'; - $.merge(fakeArray1, []); - expect(fakeArray1).toHaveLength(3); - expect(fakeArray1[0]).toBe('a'); - expect(fakeArray1[1]).toBe('b'); - expect(fakeArray1[3]).toBe('d'); - $.merge([], fakeArray1); - expect(fakeArray1).toHaveLength(3); - expect(fakeArray1[0]).toBe('a'); - expect(fakeArray1[1]).toBe('b'); - expect(fakeArray1[3]).toBe('d'); - }); - }); - - describe('.contains', function () { - var $; - - beforeEach(function () { - $ = cheerio.load(fixtures.food); - }); - - it('(container, contained) : should correctly detect the provided element', function () { - var $food = $('#food'); - var $fruits = $('#fruits'); - var $apple = $('.apple'); - - expect($.contains($food[0], $fruits[0])).toBe(true); - expect($.contains($food[0], $apple[0])).toBe(true); - }); - - it('(container, other) : should not detect elements that are not contained', function () { - var $fruits = $('#fruits'); - var $vegetables = $('#vegetables'); - var $apple = $('.apple'); - - expect($.contains($vegetables[0], $apple[0])).toBe(false); - expect($.contains($fruits[0], $vegetables[0])).toBe(false); - expect($.contains($vegetables[0], $fruits[0])).toBe(false); - expect($.contains($fruits[0], $fruits[0])).toBe(false); - expect($.contains($vegetables[0], $vegetables[0])).toBe(false); - }); - }); - - describe('.root', function () { - it('() : should return a cheerio-wrapped root object', function () { - var $ = cheerio.load('foo'); - $.root().append('
          '); - expect($.html()).toBe( - 'foo
          ' - ); - }); - }); -}); diff --git a/test/cheerio.js b/test/cheerio.js deleted file mode 100644 index 337b326523..0000000000 --- a/test/cheerio.js +++ /dev/null @@ -1,491 +0,0 @@ -'use strict'; -var htmlparser2 = require('htmlparser2'); -var cheerio = require('..'); -var utils = require('../lib/utils'); -var fixtures = require('./__fixtures__/fixtures'); -var fruits = fixtures.fruits; -var food = fixtures.food; - -// HTML -var script = ''; -var multiclass = '

          Save

          '; - -describe('cheerio', function () { - it('should get the version', function () { - expect(cheerio.version).toMatch(/\d+\.\d+\.\d+/); - }); - - it('cheerio(null) should be empty', function () { - expect(cheerio(null)).toHaveLength(0); - }); - - it('cheerio(undefined) should be empty', function () { - expect(cheerio(undefined)).toHaveLength(0); - }); - - it("cheerio('') should be empty", function () { - expect(cheerio('')).toHaveLength(0); - }); - - it('cheerio(selector) with no context or root should be empty', function () { - expect(cheerio('.h2')).toHaveLength(0); - expect(cheerio('#fruits')).toHaveLength(0); - }); - - it('cheerio(node) : should override previously-loaded nodes', function () { - var $ = cheerio.load('
          '); - var spanNode = $('span')[0]; - var $span = $(spanNode); - expect($span[0]).toBe(spanNode); - }); - - it('should be able to create html without a root or context', function () { - var $h2 = cheerio('

          '); - expect($h2).not.toHaveLength(0); - expect($h2).toHaveLength(1); - expect($h2[0].tagName).toBe('h2'); - }); - - it('should be able to create complicated html', function () { - var $script = cheerio(script); - expect($script).not.toHaveLength(0); - expect($script).toHaveLength(1); - expect($script[0].attribs.src).toBe('script.js'); - expect($script[0].attribs.type).toBe('text/javascript'); - expect($script[0].childNodes).toHaveLength(0); - }); - - function testAppleSelect($apple) { - expect($apple).toHaveLength(1); - $apple = $apple[0]; - expect($apple.parentNode.tagName).toBe('ul'); - expect($apple.prev).toBe(null); - expect($apple.next.attribs['class']).toBe('orange'); - expect($apple.childNodes).toHaveLength(1); - expect($apple.childNodes[0].data).toBe('Apple'); - } - - // eslint-disable-next-line jest/expect-expect - it('should be able to select .apple with only a context', function () { - var $apple = cheerio('.apple', fruits); - testAppleSelect($apple); - }); - - // eslint-disable-next-line jest/expect-expect - it('should be able to select .apple with a node as context', function () { - var $apple = cheerio('.apple', cheerio(fruits)[0]); - testAppleSelect($apple); - }); - - // eslint-disable-next-line jest/expect-expect - it('should be able to select .apple with only a root', function () { - var $apple = cheerio('.apple', null, fruits); - testAppleSelect($apple); - }); - - it('should be able to select an id', function () { - var $fruits = cheerio('#fruits', null, fruits); - expect($fruits).toHaveLength(1); - expect($fruits[0].attribs.id).toBe('fruits'); - }); - - it('should be able to select a tag', function () { - var $ul = cheerio('ul', fruits); - expect($ul).toHaveLength(1); - expect($ul[0].tagName).toBe('ul'); - }); - - it('should accept a node reference as a context', function () { - var $elems = cheerio('
          '); - expect(cheerio('span', $elems[0])).toHaveLength(1); - }); - - it('should accept an array of node references as a context', function () { - var $elems = cheerio('
          '); - expect(cheerio('span', $elems.toArray())).toHaveLength(1); - }); - - it('should select only elements inside given context (Issue #193)', function () { - var $ = cheerio.load(food); - var $fruits = $('#fruits'); - var fruitElements = $('li', $fruits); - - expect(fruitElements).toHaveLength(3); - }); - - it('should be able to select multiple tags', function () { - var $fruits = cheerio('li', null, fruits); - expect($fruits).toHaveLength(3); - var classes = ['apple', 'orange', 'pear']; - $fruits.each(function (idx, $fruit) { - expect($fruit.attribs['class']).toBe(classes[idx]); - }); - }); - - // eslint-disable-next-line jest/expect-expect - it('should be able to do: cheerio("#fruits .apple")', function () { - var $apple = cheerio('#fruits .apple', fruits); - testAppleSelect($apple); - }); - - // eslint-disable-next-line jest/expect-expect - it('should be able to do: cheerio("li.apple")', function () { - var $apple = cheerio('li.apple', fruits); - testAppleSelect($apple); - }); - - // eslint-disable-next-line jest/expect-expect - it('should be able to select by attributes', function () { - var $apple = cheerio('li[class=apple]', fruits); - testAppleSelect($apple); - }); - - it('should be able to select multiple classes: cheerio(".btn.primary")', function () { - var $a = cheerio('.btn.primary', multiclass); - expect($a).toHaveLength(1); - expect($a[0].childNodes[0].data).toBe('Save'); - }); - - it('should not create a top-level node', function () { - var $elem = cheerio('* div', '
          '); - expect($elem).toHaveLength(0); - }); - - it('should be able to select multiple elements: cheerio(".apple, #fruits")', function () { - var $elems = cheerio('.apple, #fruits', fruits); - expect($elems).toHaveLength(2); - - var $apple = $elems.toArray().filter(function (elem) { - return elem.attribs['class'] === 'apple'; - }); - var $fruits = $elems.toArray().filter(function (elem) { - return elem.attribs.id === 'fruits'; - }); - testAppleSelect($apple); - expect($fruits[0].attribs.id).toBe('fruits'); - }); - - it('should select first element cheerio(:first)', function () { - var $elem = cheerio('li:first', fruits); - expect($elem.attr('class')).toBe('apple'); - - var $filtered = cheerio('li', fruits).filter(':even'); - expect($filtered).toHaveLength(2); - }); - - it('should be able to select immediate children: cheerio("#fruits > .pear")', function () { - var $food = cheerio(food); - cheerio('.pear', $food).append('
        • Another Pear!
        • '); - expect(cheerio('#fruits .pear', $food)).toHaveLength(2); - var $elem = cheerio('#fruits > .pear', $food); - expect($elem).toHaveLength(1); - expect($elem.attr('class')).toBe('pear'); - }); - - it('should be able to select immediate children: cheerio(".apple + .pear")', function () { - var $elem = cheerio('.apple + li', fruits); - expect($elem).toHaveLength(1); - $elem = cheerio('.apple + .pear', fruits); - expect($elem).toHaveLength(0); - $elem = cheerio('.apple + .orange', fruits); - expect($elem).toHaveLength(1); - expect($elem.attr('class')).toBe('orange'); - }); - - it('should be able to select immediate children: cheerio(".apple ~ .pear")', function () { - var $elem = cheerio('.apple ~ li', fruits); - expect($elem).toHaveLength(2); - $elem = cheerio('.apple ~ .pear', fruits); - expect($elem.attr('class')).toBe('pear'); - }); - - it('should handle wildcards on attributes: cheerio("li[class*=r]")', function () { - var $elem = cheerio('li[class*=r]', fruits); - expect($elem).toHaveLength(2); - expect($elem.eq(0).attr('class')).toBe('orange'); - expect($elem.eq(1).attr('class')).toBe('pear'); - }); - - it('should handle beginning of attr selectors: cheerio("li[class^=o]")', function () { - var $elem = cheerio('li[class^=o]', fruits); - expect($elem).toHaveLength(1); - expect($elem.eq(0).attr('class')).toBe('orange'); - }); - - it('should handle beginning of attr selectors: cheerio("li[class$=e]")', function () { - var $elem = cheerio('li[class$=e]', fruits); - expect($elem).toHaveLength(2); - expect($elem.eq(0).attr('class')).toBe('apple'); - expect($elem.eq(1).attr('class')).toBe('orange'); - }); - - it('should gracefully degrade on complex, unmatched queries', function () { - var $elem = cheerio('Eastern States Cup #8-fin <1br>Downhill '); - expect($elem).toHaveLength(0); - }); - - it('(extended Array) should not interfere with prototype methods (issue #119)', function () { - var extended = []; - extended.find = extended.children = extended.each = function () {}; - var $empty = cheerio(extended); - - expect($empty.find).toBe(cheerio.prototype.find); - expect($empty.children).toBe(cheerio.prototype.children); - expect($empty.each).toBe(cheerio.prototype.each); - }); - - it('cheerio.html(null) should return a "" string', function () { - expect(cheerio.html(null)).toBe(''); - }); - - it('should set html(number) as a string', function () { - var $elem = cheerio('
          '); - $elem.html(123); - expect(typeof $elem.text()).toBe('string'); - }); - - it('should set text(number) as a string', function () { - var $elem = cheerio('
          '); - $elem.text(123); - expect(typeof $elem.text()).toBe('string'); - }); - - describe('.load', function () { - it('should generate selections as proper instances', function () { - var $ = cheerio.load(fruits); - - expect($('.apple')).toBeInstanceOf($); - }); - - // issue #1092 - it('should handle a character `)` in `:contains` selector', function () { - var result = cheerio.load('

          )aaa

          ')(":contains('\\)aaa')"); - expect(result).toHaveLength(3); - expect(result.first().prop('tagName')).toBe('HTML'); - expect(result.eq(1).prop('tagName')).toBe('BODY'); - expect(result.last().prop('tagName')).toBe('P'); - }); - - it('should be able to filter down using the context', function () { - var $ = cheerio.load(fruits); - var apple = $('.apple', 'ul'); - var lis = $('li', 'ul'); - - expect(apple).toHaveLength(1); - expect(lis).toHaveLength(3); - }); - - it('should allow loading a pre-parsed DOM', function () { - var dom = htmlparser2.parseDOM(food); - var $ = cheerio.load(dom); - - expect($('ul')).toHaveLength(3); - }); - - it('should allow loading a single element', function () { - var el = htmlparser2.parseDOM(food)[0]; - var $ = cheerio.load(el); - - expect($('ul')).toHaveLength(3); - }); - - it('should render xml in html() when options.xml = true', function () { - var str = ''; - var expected = ''; - var $ = cheerio.load(str, { xml: true }); - - expect($('MixedCaseTag').get(0).tagName).toBe('MixedCaseTag'); - expect($.html()).toBe(expected); - }); - - it('should render xml in html() when options.xml = true passed to html()', function () { - var str = ''; - // since parsing done without xml flag, all tags converted to lowercase - var expectedXml = - ''; - var expectedNoXml = - ''; - var $ = cheerio.load(str); - - expect($('MixedCaseTag').get(0).tagName).toBe('mixedcasetag'); - expect($.html()).toBe(expectedNoXml); - expect($.html({ xml: true })).toBe(expectedXml); - }); - - it('should respect options on the element level', function () { - var str = - 'Some test

          Copyright © 2003-2014

          '; - var expectedHtml = '

          Copyright © 2003-2014

          '; - var expectedXml = '

          Copyright © 2003-2014

          '; - var domNotEncoded = cheerio.load(str, { - xml: { decodeEntities: false }, - }); - var domEncoded = cheerio.load(str); - - expect(domNotEncoded('footer').html()).toBe(expectedHtml); - expect(domEncoded('footer').html()).toBe(expectedXml); - }); - - it('should use htmlparser2 if xml option is used', function () { - var str = '
          '; - var dom = cheerio.load(str, null, false); - expect(dom.html()).toBe(str); - }); - - it('should return a fully-qualified Function', function () { - var $ = cheerio.load('
          '); - - expect($).toBeInstanceOf(Function); - }); - - describe('prototype extensions', function () { - it('should honor extensions defined on `prototype` property', function () { - var $ = cheerio.load('
          '); - $.prototype.myPlugin = function () { - return { - context: this, - args: arguments, - }; - }; - - var $div = $('div'); - - expect(typeof $div.myPlugin).toBe('function'); - expect($div.myPlugin().context).toBe($div); - expect( - Array.prototype.slice.call($div.myPlugin(1, 2, 3).args) - ).toStrictEqual([1, 2, 3]); - }); - - it('should honor extensions defined on `fn` property', function () { - var $ = cheerio.load('
          '); - $.fn.myPlugin = function () { - return { - context: this, - args: arguments, - }; - }; - - var $div = $('div'); - - expect(typeof $div.myPlugin).toBe('function'); - expect($div.myPlugin().context).toBe($div); - expect( - Array.prototype.slice.call($div.myPlugin(1, 2, 3).args) - ).toStrictEqual([1, 2, 3]); - }); - - it('should isolate extensions between loaded functions', function () { - var $a = cheerio.load('
          '); - var $b = cheerio.load('
          '); - - $a.prototype.foo = function () {}; - - expect($b('div').foo).toBeUndefined(); - }); - }); - }); - describe('util functions', function () { - it('camelCase function test', function () { - expect(utils.camelCase('cheerio.js')).toBe('cheerioJs'); - expect(utils.camelCase('camel-case-')).toBe('camelCase'); - expect(utils.camelCase('__directory__')).toBe('_directory_'); - expect(utils.camelCase('_one-two.three')).toBe('OneTwoThree'); - }); - - it('cssCase function test', function () { - expect(utils.cssCase('camelCase')).toBe('camel-case'); - expect(utils.cssCase('jQuery')).toBe('j-query'); - expect(utils.cssCase('neverSayNever')).toBe('never-say-never'); - expect(utils.cssCase('CSSCase')).toBe('-c-s-s-case'); - }); - - it('cloneDom : should be able clone single Elements', function () { - var main = cheerio('

          Cheerio

          '); - var result = []; - utils.domEach(main, function (i, el) { - result = result.concat(utils.cloneDom(el)); - }); - expect(result).toHaveLength(1); - expect(result[0]).not.toBe(main[0]); - expect(main[0].children.length).toBe(result[0].children.length); - expect(cheerio(result).text()).toBe(main.text()); - }); - - it('isHtml function test', function () { - expect(utils.isHtml('')).toBe(true); - expect(utils.isHtml('\n\n')).toBe(true); - expect(utils.isHtml('#main')).toBe(false); - expect(utils.isHtml('\n

          foo

          bar\n')).toBe(true); - expect(utils.isHtml('dog

          fox

          cat')).toBe(true); - expect(utils.isHtml('

          fox

          cat')).toBe(true); - expect(utils.isHtml('\n

          fox

          cat\n')).toBe(true); - expect(utils.isHtml('#

          fox

          cat#')).toBe(true); - expect(utils.isHtml('<123>')).toBe(false); - }); - }); - - describe('parse5 options', function () { - var noscript = fixtures.noscript; - - // should parse noscript tags only with false option value - test('{scriptingEnabled: ???}', function () { - var opt = 'scriptingEnabled'; - var options = {}; - var result; - - // [default] scriptingEnabled: true - tag contains one text element - result = cheerio.load(noscript)('noscript'); - expect(result).toHaveLength(1); - expect(result[0].children).toHaveLength(1); - expect(result[0].children[0].type).toBe('text'); - - // scriptingEnabled: false - content of noscript will parsed - options[opt] = false; - result = cheerio.load(fixtures.noscript, options)('noscript'); - expect(result).toHaveLength(1); - expect(result[0].children).toHaveLength(2); - expect(result[0].children[0].type).toBe('comment'); - expect(result[0].children[1].type).toBe('tag'); - expect(result[0].children[1].name).toBe('a'); - - // scriptingEnabled: ??? - should acts as true - var values = [undefined, null, 0, '']; - for (var val of values) { - options[opt] = val; - result = cheerio.load(noscript, options)('noscript'); - expect(result).toHaveLength(1); - expect(result[0].children).toHaveLength(1); - expect(result[0].children[0].type).toBe('text'); - } - }); - - // should contain location data only with truthful option value - test('{sourceCodeLocationInfo: ???}', function () { - var prop = 'sourceCodeLocation'; - var opt = 'sourceCodeLocationInfo'; - var options = {}; - var result; - var i; - - // Location data should not be present - var values = [undefined, null, 0, false, '']; - for (i = 0; i < values.length; i++) { - options[opt] = values[i]; - result = cheerio.load(noscript, options)('noscript'); - expect(result).toHaveLength(1); - expect(result[0]).not.toHaveProperty(prop); - } - - // Location data should be present - values = [true, 1, 'test']; - for (i = 0; i < values.length; i++) { - options[opt] = values[i]; - result = cheerio.load(noscript, options)('noscript'); - expect(result).toHaveLength(1); - expect(result[0]).toHaveProperty(prop); - expect(typeof result[0][prop]).toBe('object'); - } - }); - }); -}); diff --git a/test/parse.js b/test/parse.js deleted file mode 100644 index d022eb8259..0000000000 --- a/test/parse.js +++ /dev/null @@ -1,399 +0,0 @@ -'use strict'; -var parse = require('../lib/parse'); -var defaultOpts = require('../lib/options').default; - -// Tags -var basic = ''; -var siblings = '

          '; - -// Single Tags -var single = '
          '; -var singleWrong = '
          '; - -// Children -var children = '
          '; -var li = '
        • Durian
        • '; - -// Attributes -var attributes = 'man waving'; -var noValueAttribute = ''; - -// Comments -var comment = ''; -var conditional = - ''; - -// Text -var text = 'lorem ipsum'; - -// Script -var script = ''; -var scriptEmpty = ''; - -// Style -var style = ''; -var styleEmpty = ''; - -// Directives -var directive = ''; - -describe('parse', function () { - describe('evaluate', function () { - it('should parse basic empty tags: ' + basic, function () { - var tag = parse(basic, defaultOpts, true).children[0]; - expect(tag.type).toBe('tag'); - expect(tag.tagName).toBe('html'); - expect(tag.childNodes).toHaveLength(2); - }); - - it('should handle sibling tags: ' + siblings, function () { - var dom = parse(siblings, defaultOpts, false).children; - var h2 = dom[0]; - var p = dom[1]; - - expect(dom).toHaveLength(2); - expect(h2.tagName).toBe('h2'); - expect(p.tagName).toBe('p'); - }); - - it('should handle single tags: ' + single, function () { - var tag = parse(single, defaultOpts, false).children[0]; - expect(tag.type).toBe('tag'); - expect(tag.tagName).toBe('br'); - expect(tag.childNodes).toHaveLength(0); - }); - - it('should handle malformatted single tags: ' + singleWrong, function () { - var tag = parse(singleWrong, defaultOpts, false).children[0]; - expect(tag.type).toBe('tag'); - expect(tag.tagName).toBe('br'); - expect(tag.childNodes).toHaveLength(0); - }); - - it('should handle tags with children: ' + children, function () { - var tag = parse(children, defaultOpts, true).children[0]; - expect(tag.type).toBe('tag'); - expect(tag.tagName).toBe('html'); - expect(tag.childNodes).toBeTruthy(); - expect(tag.childNodes[1].tagName).toBe('body'); - expect(tag.childNodes[1].childNodes).toHaveLength(1); - }); - - it('should handle tags with children: ' + li, function () { - var tag = parse(li, defaultOpts, false).children[0]; - expect(tag.childNodes).toHaveLength(1); - expect(tag.childNodes[0].data).toBe('Durian'); - }); - - it('should handle tags with attributes: ' + attributes, function () { - var attrs = parse(attributes, defaultOpts, false).children[0].attribs; - expect(attrs).toBeTruthy(); - expect(attrs.src).toBe('hello.png'); - expect(attrs.alt).toBe('man waving'); - }); - - it('should handle value-less attributes: ' + noValueAttribute, function () { - var attrs = parse(noValueAttribute, defaultOpts, false).children[0] - .attribs; - expect(attrs).toBeTruthy(); - expect(attrs.disabled).toBe(''); - }); - - it('should handle comments: ' + comment, function () { - var elem = parse(comment, defaultOpts, false).children[0]; - expect(elem.type).toBe('comment'); - expect(elem.data).toBe(' sexy '); - }); - - it('should handle conditional comments: ' + conditional, function () { - var elem = parse(conditional, defaultOpts, false).children[0]; - expect(elem.type).toBe('comment'); - expect(elem.data).toBe( - conditional.replace('', '') - ); - }); - - it('should handle text: ' + text, function () { - var text_ = parse(text, defaultOpts, false).children[0]; - expect(text_.type).toBe('text'); - expect(text_.data).toBe('lorem ipsum'); - }); - - it('should handle script tags: ' + script, function () { - var script_ = parse(script, defaultOpts, false).children[0]; - expect(script_.type).toBe('script'); - expect(script_.tagName).toBe('script'); - expect(script_.attribs.type).toBe('text/javascript'); - expect(script_.childNodes).toHaveLength(1); - expect(script_.childNodes[0].type).toBe('text'); - expect(script_.childNodes[0].data).toBe('alert("hi world!");'); - }); - - it('should handle style tags: ' + style, function () { - var style_ = parse(style, defaultOpts, false).children[0]; - expect(style_.type).toBe('style'); - expect(style_.tagName).toBe('style'); - expect(style_.attribs.type).toBe('text/css'); - expect(style_.childNodes).toHaveLength(1); - expect(style_.childNodes[0].type).toBe('text'); - expect(style_.childNodes[0].data).toBe(' h2 { color:blue; } '); - }); - - it('should handle directives: ' + directive, function () { - var elem = parse(directive, defaultOpts, true).children[0]; - expect(elem.type).toBe('directive'); - expect(elem.data).toBe('!DOCTYPE html ""'); - expect(elem.tagName).toBe('!doctype'); - }); - }); - - describe('.parse', function () { - // root test utility - function rootTest(root) { - expect(root.tagName).toBe('root'); - - expect(root.nextSibling).toBe(null); - expect(root.previousSibling).toBe(null); - expect(root.parentNode).toBe(null); - - var child = root.childNodes[0]; - expect(child.parentNode).toBe(root); - } - - it('should add root to: ' + basic, function () { - var root = parse(basic, defaultOpts, true); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].tagName).toBe('html'); - }); - - it('should add root to: ' + siblings, function () { - var root = parse(siblings, defaultOpts, false); - rootTest(root); - expect(root.childNodes).toHaveLength(2); - expect(root.childNodes[0].tagName).toBe('h2'); - expect(root.childNodes[1].tagName).toBe('p'); - expect(root.childNodes[1].parent).toBe(root); - }); - - it('should add root to: ' + comment, function () { - var root = parse(comment, defaultOpts, false); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].type).toBe('comment'); - }); - - it('should add root to: ' + text, function () { - var root = parse(text, defaultOpts, false); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].type).toBe('text'); - }); - - it('should add root to: ' + scriptEmpty, function () { - var root = parse(scriptEmpty, defaultOpts, false); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].type).toBe('script'); - }); - - it('should add root to: ' + styleEmpty, function () { - var root = parse(styleEmpty, defaultOpts, false); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].type).toBe('style'); - }); - - it('should add root to: ' + directive, function () { - var root = parse(directive, defaultOpts, true); - rootTest(root); - expect(root.childNodes).toHaveLength(2); - expect(root.childNodes[0].type).toBe('directive'); - }); - - it('should simply return root', function () { - var oldroot = parse(basic, defaultOpts, true); - var root = parse(oldroot, defaultOpts, true); - expect(root).toBe(oldroot); - rootTest(root); - expect(root.childNodes).toHaveLength(1); - expect(root.childNodes[0].tagName).toBe('html'); - }); - - it('should expose the DOM level 1 API', function () { - var root = parse( - '

          ', - defaultOpts, - false - ).childNodes[0]; - var childNodes = root.childNodes; - - expect(childNodes).toHaveLength(3); - - expect(root.tagName).toBe('div'); - expect(root.firstChild).toBe(childNodes[0]); - expect(root.lastChild).toBe(childNodes[2]); - - expect(childNodes[0].tagName).toBe('a'); - expect(childNodes[0].previousSibling).toBe(null); - expect(childNodes[0].nextSibling).toBe(childNodes[1]); - expect(childNodes[0].parentNode).toBe(root); - expect(childNodes[0].childNodes).toHaveLength(0); - expect(childNodes[0].firstChild).toBe(null); - expect(childNodes[0].lastChild).toBe(null); - - expect(childNodes[1].tagName).toBe('span'); - expect(childNodes[1].previousSibling).toBe(childNodes[0]); - expect(childNodes[1].nextSibling).toBe(childNodes[2]); - expect(childNodes[1].parentNode).toBe(root); - expect(childNodes[1].childNodes).toHaveLength(0); - expect(childNodes[1].firstChild).toBe(null); - expect(childNodes[1].lastChild).toBe(null); - - expect(childNodes[2].tagName).toBe('p'); - expect(childNodes[2].previousSibling).toBe(childNodes[1]); - expect(childNodes[2].nextSibling).toBe(null); - expect(childNodes[2].parentNode).toBe(root); - expect(childNodes[2].childNodes).toHaveLength(0); - expect(childNodes[2].firstChild).toBe(null); - expect(childNodes[2].lastChild).toBe(null); - }); - - it('Should parse less than or equal sign sign', function () { - var root = parse('A<=B', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes[0].tagName).toBe('i'); - expect(childNodes[0].childNodes[0].data).toBe('A'); - expect(childNodes[1].data).toBe('<='); - expect(childNodes[2].tagName).toBe('i'); - expect(childNodes[2].childNodes[0].data).toBe('B'); - }); - - it('Should ignore unclosed CDATA', function () { - var root = parse( - '', - defaultOpts, - false - ); - var childNodes = root.childNodes; - - expect(childNodes[0].tagName).toBe('a'); - expect(childNodes[1].tagName).toBe('script'); - expect(childNodes[1].childNodes[0].data).toBe('foo // to documents', function () { - var root = parse('', defaultOpts, true); - var childNodes = root.childNodes; - - expect(childNodes[0].tagName).toBe('html'); - expect(childNodes[0].childNodes[0].tagName).toBe('head'); - }); - - it('Should implicitly create around ', function () { - var root = parse('
          bar
          ', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes[0].tagName).toBe('table'); - expect(childNodes[0].childNodes.length).toBe(1); - expect(childNodes[0].childNodes[0].tagName).toBe('tbody'); - expect(childNodes[0].childNodes[0].childNodes[0].tagName).toBe('tr'); - expect( - childNodes[0].childNodes[0].childNodes[0].childNodes[0].tagName - ).toBe('td'); - expect( - childNodes[0].childNodes[0].childNodes[0].childNodes[0].childNodes[0] - .data - ).toBe('bar'); - }); - - it('Should parse custom tag ', function () { - var root = parse('test', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes.length).toBe(1); - expect(childNodes[0].tagName).toBe('line'); - expect(childNodes[0].childNodes[0].data).toBe('test'); - }); - - it('Should properly parse misnested table tags', function () { - var root = parse( - 'i1i2i3', - defaultOpts, - false - ); - var childNodes = root.childNodes; - - expect(childNodes.length).toBe(3); - - childNodes.forEach(function (child, i) { - expect(child.tagName).toBe('tr'); - expect(child.childNodes[0].tagName).toBe('td'); - expect(child.childNodes[0].childNodes[0].data).toBe('i' + (i + 1)); - }); - }); - - it('Should correctly parse data url attributes', function () { - var html = - '
          '; - var expectedAttr = - 'font-family:"butcherman-caps"; src:url(data:font/opentype;base64,AAEA...);'; - var root = parse(html, defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes[0].attribs.style).toBe(expectedAttr); - }); - - it('Should treat tag content as text', function () { - var root = parse('<xmp><h2>', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes[0].childNodes[0].data).toBe('

          '); - }); - - it('Should correctly parse malformed numbered entities', function () { - var root = parse('

          z&#

          ', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes[0].childNodes[0].data).toBe('z&#'); - }); - - it('Should correctly parse mismatched headings', function () { - var root = parse('

          Test

          ', defaultOpts, false); - var childNodes = root.childNodes; - - expect(childNodes.length).toBe(2); - expect(childNodes[0].tagName).toBe('h2'); - expect(childNodes[1].tagName).toBe('div'); - }); - - it('Should correctly parse tricky
           content', function () {
          -      var root = parse(
          -        '
          \nA <- factor(A, levels = c("c","a","b"))\n
          ', - defaultOpts, - false - ); - var childNodes = root.childNodes; - - expect(childNodes.length).toBe(1); - expect(childNodes[0].tagName).toBe('pre'); - expect(childNodes[0].childNodes[0].data).toBe( - 'A <- factor(A, levels = c("c","a","b"))\n' - ); - }); - - it('should pass the options for including the location info to parse5', function () { - var root = parse( - '

          Hello

          ', - Object.assign({}, defaultOpts, { sourceCodeLocationInfo: true }), - false - ); - var location = root.children[0].sourceCodeLocation; - - expect(typeof location).toBe('object'); - expect(location.endOffset).toBe(12); - }); - }); -}); diff --git a/test/xml.js b/test/xml.js deleted file mode 100644 index 6fe0ce82f0..0000000000 --- a/test/xml.js +++ /dev/null @@ -1,65 +0,0 @@ -'use strict'; -var cheerio = require('..'); - -function xml(str, options) { - options = Object.assign({ xml: true }, options); - var $ = cheerio.load(str, options); - return $.xml(); -} - -function dom(str, options) { - var $ = cheerio.load('', options); - return $(str).html(); -} - -describe('render', function () { - describe('(xml)', function () { - it('should render tags correctly', function () { - var str = - ''; - expect(xml(str)).toBe( - '' - ); - }); - - it('should render tags (RSS) correctly', function () { - var str = 'http://www.github.com/'; - expect(xml(str)).toBe('http://www.github.com/'); - }); - - it('should escape entities', function () { - var str = ''; - expect(xml(str)).toBe(str); - }); - - it('should render HTML as XML', function () { - var $ = cheerio.load('', null, false); - expect($.xml()).toBe(''); - }); - }); - - describe('(dom)', function () { - it('should not keep camelCase for new nodes', function () { - var str = 'hello'; - expect(dom(str, { xml: false })).toBe( - 'hello' - ); - }); - - it('should keep camelCase for new nodes', function () { - var str = 'hello'; - expect(dom(str, { xml: true })).toBe( - 'hello' - ); - }); - - it('should maintain the parsing options of distinct contexts independently', function () { - var str = 'hello'; - var $ = cheerio.load('', { xml: false }); - - expect($(str).html()).toBe( - 'hello' - ); - }); - }); -}); diff --git a/tsconfig.eslint.json b/tsconfig.eslint.json new file mode 100644 index 0000000000..949c10e270 --- /dev/null +++ b/tsconfig.eslint.json @@ -0,0 +1,5 @@ +{ + "extends": "./tsconfig.json", + "include": ["src", "benchmark"], + "exclude": [] +} diff --git a/tsconfig.json b/tsconfig.json index 2f98042715..a4165129ff 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,5 +1,37 @@ { "compilerOptions": { - "esModuleInterop": true - } + /* Basic Options */ + "target": "es5" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */, + "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */, + // "lib": [], /* Specify library files to be included in the compilation. */ + "declaration": true /* Generates corresponding '.d.ts' file. */, + "declarationMap": true /* Generates a sourcemap for each corresponding '.d.ts' file. */, + // "sourceMap": true, /* Generates corresponding '.map' file. */ + "outDir": "lib" /* Redirect output structure to the directory. */, + // "importHelpers": true, /* Import emit helpers from 'tslib'. */ + + /* Strict Type-Checking Options */ + "strict": true /* Enable all strict type-checking options. */, + + /* Use tslib */ + "importHelpers": true, + + /* Additional Checks */ + "noUnusedLocals": true /* Report errors on unused locals. */, + "noUnusedParameters": true /* Report errors on unused parameters. */, + "noImplicitReturns": true /* Report error when not all code paths in function return a value. */, + "noFallthroughCasesInSwitch": true /* Report errors for fallthrough cases in switch statement. */, + + /* Module Resolution Options */ + // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ + "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, + "resolveJsonModule": true + }, + "include": ["src"], + "exclude": [ + "**/*.spec.ts", + "**/__fixtures__/*", + "**/__tests__/*", + "**/__snapshots__/*" + ] } diff --git a/types/index.d.ts b/types/index.d.ts deleted file mode 100644 index b299643f73..0000000000 --- a/types/index.d.ts +++ /dev/null @@ -1,287 +0,0 @@ -import { Document, Element, DomHandlerOptions } from 'domhandler'; -import { ParserOptions } from 'htmlparser2'; - -declare namespace cheerio { - type AttrFunction = (this: Element, i: number, currentValue: string) => any; - type WrapFunction = (this: Element) => any; - - interface Cheerio { - // Document References - // Cheerio https://github.com/cheeriojs/cheerio - // JQuery https://api.jquery.com - - [index: number]: Element; - cheerio: string; - length: number; - [Symbol.iterator](): IterableIterator; - - // Attributes - - attr(): { [attr: string]: string }; - attr(name: string): string | undefined; - attr(name: string, value: AttrFunction): Cheerio; - // `value` *can* be `any` here but: - // 1. That makes type-checking the function-type useless - // 2. It's converted to a string anyways - attr(name: string, value: string): Cheerio; - // The map's values *can* be `any` but they'll all be cast to strings - // regardless. - attr(map: { [key: string]: any }): Cheerio; - - data(): any; - data(name: string): any; - data(name: string, value: any): any; - - val(): string; - val(value: string): Cheerio; - - removeAttr(name: string): Cheerio; - - has(selector: string): Cheerio; - has(element: Element): Cheerio; - - hasClass(className: string): boolean; - addClass(classNames: string): Cheerio; - - removeClass(): Cheerio; - removeClass(className: string): Cheerio; - removeClass(func: (index: number, className: string) => string): Cheerio; - - toggleClass(className: string): Cheerio; - toggleClass(className: string, toggleSwitch: boolean): Cheerio; - toggleClass(toggleSwitch?: boolean): Cheerio; - toggleClass( - func: (index: number, className: string, toggleSwitch: boolean) => string, - toggleSwitch?: boolean - ): Cheerio; - - is(selector: string): boolean; - is(element: Element): boolean; - is(element: Element[]): boolean; - is(selection: Cheerio): boolean; - is(func: (index: number, element: Element) => boolean): boolean; - - // Form - serialize(): string; - serializeArray(): { name: string; value: string }[]; - - // Traversing - - find(selector: string): Cheerio; - find(element: Cheerio | Element): Cheerio; - - parent(selector?: string): Cheerio; - parents(selector?: string): Cheerio; - parentsUntil(selector?: string, filter?: string): Cheerio; - parentsUntil(element: Element, filter?: string): Cheerio; - parentsUntil(element: Cheerio, filter?: string): Cheerio; - - prop(name: string): any; - prop(name: string, value: any): Cheerio; - - closest(): Cheerio; - closest(selector: string): Cheerio; - - next(selector?: string): Cheerio; - nextAll(): Cheerio; - nextAll(selector: string): Cheerio; - - nextUntil(selector?: string, filter?: string): Cheerio; - nextUntil(element: Element, filter?: string): Cheerio; - nextUntil(element: Cheerio, filter?: string): Cheerio; - - prev(selector?: string): Cheerio; - prevAll(): Cheerio; - prevAll(selector: string): Cheerio; - - prevUntil(selector?: string, filter?: string): Cheerio; - prevUntil(element: Element, filter?: string): Cheerio; - prevUntil(element: Cheerio, filter?: string): Cheerio; - - slice(start: number, end?: number): Cheerio; - - siblings(selector?: string): Cheerio; - - children(selector?: string): Cheerio; - - contents(): Cheerio; - - each(func: (index: number, element: Element) => any): Cheerio; - map(func: (index: number, element: Element) => any): Cheerio; - - filter(selector: string): Cheerio; - filter(selection: Cheerio): Cheerio; - filter(element: Element): Cheerio; - filter(elements: Element[]): Cheerio; - filter(func: (index: number, element: Element) => boolean): Cheerio; - - not(selector: string): Cheerio; - not(selection: Cheerio): Cheerio; - not(element: Element): Cheerio; - not(func: (index: number, element: Element) => boolean): Cheerio; - - first(): Cheerio; - last(): Cheerio; - - eq(index: number): Cheerio; - - get(): Element[]; - get(index: number): Element | undefined; - - index(): number; - index(selector: string): number; - index(selection: Cheerio): number; - - end(): Cheerio; - - add(selectorOrHtml: string): Cheerio; - add(selector: string, context: Document): Cheerio; - add(element: Element): Cheerio; - add(elements: Element[]): Cheerio; - add(selection: Cheerio): Cheerio; - - addBack(): Cheerio; - addBack(filter: string): Cheerio; - - // Manipulation - appendTo(target: Cheerio): Cheerio; - prependTo(target: Cheerio): Cheerio; - - append(content: string, ...contents: any[]): Cheerio; - append(content: Document, ...contents: any[]): Cheerio; - append(content: Document[], ...contents: any[]): Cheerio; - append(content: Cheerio, ...contents: any[]): Cheerio; - - prepend(content: string, ...contents: any[]): Cheerio; - prepend(content: Document, ...contents: any[]): Cheerio; - prepend(content: Document[], ...contents: any[]): Cheerio; - prepend(content: Cheerio, ...contents: any[]): Cheerio; - - after(content: string, ...contents: any[]): Cheerio; - after(content: Document, ...contents: any[]): Cheerio; - after(content: Document[], ...contents: any[]): Cheerio; - after(content: Cheerio, ...contents: any[]): Cheerio; - - insertAfter(content: string): Cheerio; - insertAfter(content: Document): Cheerio; - insertAfter(content: Cheerio): Cheerio; - - before(content: string, ...contents: any[]): Cheerio; - before(content: Document, ...contents: any[]): Cheerio; - before(content: Document[], ...contents: any[]): Cheerio; - before(content: Cheerio, ...contents: any[]): Cheerio; - - insertBefore(content: string): Cheerio; - insertBefore(content: Document): Cheerio; - insertBefore(content: Cheerio): Cheerio; - - remove(selector?: string): Cheerio; - - replaceWith(content: string): Cheerio; - replaceWith(content: Element): Cheerio; - replaceWith(content: Element[]): Cheerio; - replaceWith(content: Cheerio): Cheerio; - replaceWith(content: () => Cheerio): Cheerio; - - empty(): Cheerio; - - html(): string | null; - html(html: string): Cheerio; - - text(): string; - text(text: string): Cheerio; - - wrap(content: string): Cheerio; - wrap(content: Document): Cheerio; - wrap(content: Cheerio): Cheerio; - - wrapAll( - wrapper: Cheerio | string | Element | Element[] | WrapFunction - ): Cheerio; - - css(propertyName: string): string; - css(propertyNames: string[]): string[]; - css(propertyName: string, value: string): Cheerio; - css(propertyName: string, value: number): Cheerio; - css( - propertyName: string, - func: (index: number, value: string) => string - ): Cheerio; - css( - propertyName: string, - func: (index: number, value: string) => number - ): Cheerio; - css(properties: Object): Cheerio; - - // Rendering - - // Miscellaneous - - clone(): Cheerio; - - // Not Documented - - toArray(): Element[]; - } - - interface CheerioParserOptions extends ParserOptions, DomHandlerOptions { - // Document References - // Cheerio https://github.com/cheeriojs/cheerio - - xml?: (ParserOptions & DomHandlerOptions) | boolean; - _useHtmlParser2?: boolean; - - /** Enable location support for parse5 */ - sourceCodeLocationInfo?: boolean; - - /** Disable scripting in parse5, so noscript tags would be parsed */ - scriptingEnabled?: boolean; - } - - interface Selector { - (selector: string): Cheerio; - (selector: string, context: string): Cheerio; - (selector: string, context: Element): Cheerio; - (selector: string, context: Element[]): Cheerio; - (selector: string, context: Cheerio): Cheerio; - (selector: string, context: string, root: string): Cheerio; - (selector: string, context: Element, root: string): Cheerio; - (selector: string, context: Element[], root: string): Cheerio; - (selector: string, context: Cheerio, root: string): Cheerio; - (selector: any): Cheerio; - } - - interface Root extends Selector { - // Document References - // Cheerio https://github.com/cheeriojs/cheerio - // JQuery https://api.jquery.com - root(): Cheerio; - contains(container: Element, contained: Element): boolean; - parseHTML( - data: string, - context?: Document, - keepScripts?: boolean - ): Document[]; - - html(options?: CheerioParserOptions): string; - html( - dom: string | Cheerio | Element, - options?: CheerioParserOptions - ): string; - - xml(dom?: string | Cheerio | Element): string; - } - - interface CheerioAPI extends Root { - version: string; - load( - html: string | Buffer | { toString(): string }, - options?: CheerioParserOptions | null, - isDocument?: boolean - ): Root; - load(element: Element | Element[], options?: CheerioParserOptions): Root; - } -} - -declare const cheerio: cheerio.CheerioAPI; -export = cheerio; diff --git a/types/index.test-d.ts b/types/index.test-d.ts deleted file mode 100644 index c11366e37b..0000000000 --- a/types/index.test-d.ts +++ /dev/null @@ -1,361 +0,0 @@ -import { expectType } from 'tsd'; -import { Element } from 'domhandler'; -import cheerio from '..'; - -/* - * LOADING - */ -let html = `
            -
          • Apple
          • -
          • Orange
          • -
          • Pear
          • - -
          `; - -// Preferred Method -var $ = cheerio.load(html); - -// Directly load element -cheerio(html); -cheerio('ul', html); -cheerio('li', 'ul', html); - -const $fromElement = cheerio.load($('ul').get(0) as Element); - -if ($fromElement('ul > li').length !== 3) { - throw new Error( - 'Expecting 3 elements when passing `cheerio.Element` to `load()`' - ); -} - -$ = cheerio.load(Buffer.from(html)); - -$ = cheerio.load(html, { - normalizeWhitespace: true, - xmlMode: true, -}); - -$ = cheerio.load(html, { - scriptingEnabled: false, -}); - -$ = cheerio.load(html, { - sourceCodeLocationInfo: true, -}); - -$ = cheerio.load(html, { - normalizeWhitespace: true, - withStartIndices: true, - withEndIndices: true, - xmlMode: true, - decodeEntities: true, - lowerCaseTags: true, - lowerCaseAttributeNames: true, - recognizeCDATA: true, - recognizeSelfClosing: true, -}); - -$ = cheerio.load(html, { - xml: true, -}); - -$ = cheerio.load(html, { - xml: { xmlMode: false }, -}); - -$ = cheerio.load(html, { - xml: { - normalizeWhitespace: true, - withStartIndices: true, - withEndIndices: true, - xmlMode: false, - decodeEntities: true, - lowerCaseTags: true, - lowerCaseAttributeNames: true, - recognizeCDATA: true, - recognizeSelfClosing: true, - }, -}); - -/** Selectors */ -var $el = $('.class'); -var $multiEl = $('selector', 'selector', 'selector'); -var $emptyEl = $('.not-existing-class'); - -expectType($el.cheerio); - -/** Attributes */ - -// attr -$el.attr(); -$el.attr('id'); -$el.attr('id', 'favorite').html(); -$el - .attr('id', function (i, attr) { - return this.tagName + i * 2 + attr; - }) - .html(); -$el - .attr('id', function () { - return this.tagName; - }) - .html(); -$el.attr({ id: 'uniq', class: 'big' }).html(); - -$multiEl.prop('foo') === undefined; -$emptyEl.attr('id') === undefined; - -// props -$el.prop('style'); -$el.prop('style', 'none').html(); - -// data -$el.data(); -$el.data('apple-color'); -$el.data('kind', 'mac'); - -// val -$('input[type="text"]').val(); -$('input[type="text"]').val('test').html(); - -// removeAttr -$el.removeAttr('class').html(); - -// hasClass, addClass, removeClass, toggleClass -$el.addClass('class').addClass('test'); -$el.hasClass('test'); -$el.removeClass('class').removeClass('test'); -$el.addClass('red').removeClass().html(); -$el.toggleClass('fruit green red').html(); - -// is -$el.is('#id'); -$el.is($el); -$el.is(() => { - return true; -}); - -/** Forms */ -// serializeArray -$('
          ').serializeArray(); -$('
          ').serialize(); - -/** Traversing */ -// find -$el.find('li').length; -$el.find($('.apple')).length; - -// .parent([selector]) -$el.parent().attr('id'); -$el.parent('.class').attr('id'); - -// .parents([selector]) -$el.parents().length; -$el.parents('.class').length; - -// .parentsUntil([selector][,filter]) -$el.parentsUntil().length; -$el.parentsUntil('.class').length; - -// .closest(selector) -$el.closest(); -$el.closest('.class'); - -// .next([selector]) -$el.next().hasClass('class'); -$el.next('.class').hasClass('class'); - -// .nextAll([selector]) -$el.nextAll().length; -$el.nextAll('.class').length; - -// .nextUntil([selector], [filter]) -$el.nextUntil(); -$el.nextUntil('.class'); - -// .prev([selector]) -$el.prev().hasClass('class'); -$el.prev('.class').hasClass('class'); - -// .prevAll([selector]) -$el.prevAll().length; -$el.prevAll('.class').length; - -// .prevUntil([selector], [filter]) -$el.prevUntil(); -$el.prevUntil('.class'); - -// .slice( start, [end] ) -$el.slice(1).eq(0).text(); -$el.slice(1, 2).length; - -// .siblings([selector]) -$el.siblings().length; -$el.siblings('.class').length; - -// .children([selector]) -$el.children().length; -$el.children('.class').text(); - -// .contents() -$el.contents().length; - -// .each( function(index, element) ) -$el.each((_, el) => { - $(el).html(); -}); - -// .map( function(index, element) ) -$el - .map((_, el) => { - return $(el).text(); - }) - .get() - .join(' '); - -// .filter -$ = cheerio.load(html); -$el.filter('.class').attr('class'); -$el.filter($('.class')).attr('class'); -$el.filter($('.class')[0]).attr('class'); - -$el - .filter((_, el) => { - return $(el).attr('class') === 'class'; - }) - .attr('class'); - -// .not -$el.not('.class').length; -$el.not($('.class')).length; -$el.not($('.class')[0]).length; - -$el.not((_, el) => { - return $(el).attr('class') === 'class'; -}).length; - -// .has -$el.has('.class').attr('id'); -$el.has($el[0]).attr('id'); - -// .first() -$el.children().first().text(); - -// .last() -$el.children().last().text(); - -// .eq( i ) -$el.eq(0).text(); -$el.eq(-1).text(); - -// .get( [i] ) -$el.get(0)?.tagName; -$el.get().length; - -// .index() -// .index( selector ) -// .index( nodeOrSelection ) -$el.index(); -$el.index('li'); -$el.index($('#fruit, li')); - -// .end() -$el.eq(0).end().length; - -// .add -$el.add('.class').length; - -// .addBack( [filter] ) -$el.eq(0).addBack().length; -$el.eq(0).addBack('.class').length; - -/** Manipulation */ - -$('
        • Plum
        • ').appendTo($el); -$el.prependTo($('
        • Plum
        • ')); - -// .append( content, [content, ...] ) -$el.append('
        • Plum
        • ').html(); -$el.append('
        • Plum
        • ', '
        • Plum
        • ').html(); - -// .prepend( content, [content, ...] ) -$el.prepend('
        • Plum
        • ').html(); -$el.prepend('
        • Plum
        • ', '
        • Plum
        • ').html(); - -// .after( content, [content, ...] ) -$el.after('
        • Plum
        • ').html(); -$el.after('
        • Plum
        • ', '
        • Plum
        • ').html(); - -// .insertAfter( content ) -$('
        • Plum
        • ').insertAfter('.class').html(); - -// .before( content, [content, ...] ) -$el.before('
        • Plum
        • ').html(); -$el.before('
        • Plum
        • ', '
        • Plum
        • ').html(); - -// .insertBefore( content ) -$('
        • Plum
        • ').insertBefore('.class').html(); - -// .remove( [selector] ) -$el.remove().html(); -$el.remove('.class').html(); - -// .replaceWith( content ) -$el.replaceWith($('
        • Plum
        • ')).html(); - -// .empty() -$el.empty().html(); - -// .html( [htmlString] ) -$el.html(); -$el.html('
        • Mango
        • ').html(); - -// .text( [textString] ) -$el.text(); -$el.text('text'); - -// .wrap( content ) -$el.wrap($('
          ')).html(); - -// .wrapAll( wrapper ) -$el.wrapAll($('
          ')); -$el.wrapAll($('
          ').get()); -$el.wrapAll('
          '); -$el.wrapAll(function () { - return '
          '; -}); - -// .css -$el.css('width'); -$el.css(['width', 'height']); -$el.css('width', '50px'); - -/** Rendering */ -$.html(); -$.html('.class'); -$.xml(); -$.xml($el); - -/** Miscellaneous */ - -// .clone() #### -$el.clone().html(); - -/** Utilities */ - -// $.root -$.root().append('
            ').html(); - -// $.contains( container, contained ) -$.contains($el[0], $el[0]); - -// $.parseHTML( data [, context ] [, keepScripts ] ) -$.parseHTML(html); -$.parseHTML(html, undefined, true); - -/** Not in doc */ -$el.toArray(); - -cheerio.html($el); - -expectType(cheerio.version);