Skip to content

Commit 12128e1

Browse files
authored
feat(attributes): Add baseURI option (#2510)
1 parent 2e9fd63 commit 12128e1

File tree

3 files changed

+86
-2
lines changed

3 files changed

+86
-2
lines changed

src/api/attributes.spec.ts

+39
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ describe('$(...)', () => {
243243
expect(imgs.prop('namespace')).toBe(nsHtml);
244244
imgs.prop('attribs', null);
245245
expect(imgs.prop('src')).toBeUndefined();
246+
expect(imgs.prop('data-foo')).toBeUndefined();
246247
});
247248

248249
it('(map) : object map should set multiple props', () => {
@@ -284,6 +285,44 @@ describe('$(...)', () => {
284285
expect($(null as any).prop('prop')).toBeUndefined();
285286
});
286287

288+
it('("href") : should resolve links with `baseURI`', () => {
289+
const $ = cheerio.load(
290+
`
291+
<a id="1" href="http://example.org">example1</a>
292+
<a id="2" href="//example.org">example2</a>
293+
<a id="3" href="/example.org">example3</a>
294+
<a id="4" href="example.org">example4</a>
295+
`,
296+
{ baseURI: 'http://example.com/page/1' }
297+
);
298+
299+
expect($('#1').prop('href')).toBe('http://example.org/');
300+
expect($('#2').prop('href')).toBe('http://example.org/');
301+
expect($('#3').prop('href')).toBe('http://example.com/example.org');
302+
expect($('#4').prop('href')).toBe('http://example.com/page/example.org');
303+
});
304+
305+
it('("src") : should resolve links with `baseURI`', () => {
306+
const $ = cheerio.load(
307+
`
308+
<img id="1" src="http://example.org/image.png">
309+
<iframe id="2" src="//example.org/page.html"></iframe>
310+
<audio id="3" src="/example.org/song.mp3"></audio>
311+
<source id="4" src="example.org/image.png">
312+
`,
313+
{ baseURI: 'http://example.com/page/1' }
314+
);
315+
316+
expect($('#1').prop('src')).toBe('http://example.org/image.png');
317+
expect($('#2').prop('src')).toBe('http://example.org/page.html');
318+
expect($('#3').prop('src')).toBe(
319+
'http://example.com/example.org/song.mp3'
320+
);
321+
expect($('#4').prop('src')).toBe(
322+
'http://example.com/page/example.org/image.png'
323+
);
324+
});
325+
287326
it('("outerHTML") : should render properly', () => {
288327
const outerHtml = '<div><a></a></div>';
289328
const $a = $(outerHtml);

src/api/attributes.ts

+39
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,19 @@ export function prop<T extends AnyNode>(
305305
this: Cheerio<T>,
306306
name: 'innerHTML' | 'outerHTML' | 'innerText' | 'textContent'
307307
): string | null;
308+
/** Get a parsed CSS style object. */
308309
export function prop<T extends AnyNode>(
309310
this: Cheerio<T>,
310311
name: 'style'
311312
): StyleProp;
313+
/**
314+
* Resolve `href` or `src` of supported elements. Requires the `baseURI` option
315+
* to be set, and a global `URL` object to be part of the environment.
316+
*/
317+
export function prop<T extends AnyNode>(
318+
this: Cheerio<T>,
319+
name: 'href' | 'src'
320+
): string | undefined;
312321
export function prop<T extends AnyNode, K extends keyof Element>(
313322
this: Cheerio<T>,
314323
name: K
@@ -364,6 +373,36 @@ export function prop<T extends AnyNode>(
364373
return isTag(el) ? el.name.toUpperCase() : undefined;
365374
}
366375

376+
case 'href':
377+
case 'src': {
378+
const el = this[0];
379+
380+
if (!isTag(el)) {
381+
return undefined;
382+
}
383+
384+
const prop = el.attribs?.[name];
385+
386+
/* eslint-disable node/no-unsupported-features/node-builtins */
387+
if (
388+
typeof URL !== 'undefined' &&
389+
((name === 'href' && (el.tagName === 'a' || el.name === 'link')) ||
390+
(name === 'src' &&
391+
(el.tagName === 'img' ||
392+
el.tagName === 'iframe' ||
393+
el.tagName === 'audio' ||
394+
el.tagName === 'video' ||
395+
el.tagName === 'source'))) &&
396+
prop !== undefined &&
397+
this.options.baseURI
398+
) {
399+
return new URL(prop, this.options.baseURI).href;
400+
}
401+
/* eslint-enable node/no-unsupported-features/node-builtins */
402+
403+
return prop;
404+
}
405+
367406
case 'innerText':
368407
return innerText(this[0]);
369408

src/options.ts

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,23 @@ export interface Parse5Options {
1414
/** Internal options for Cheerio. */
1515
export interface InternalOptions extends HTMLParser2Options, Parse5Options {
1616
_useHtmlParser2?: boolean;
17+
18+
/** The base URI for the document. Used for the `href` and `src` props. */
19+
baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins
1720
}
1821

1922
/**
2023
* Options accepted by Cheerio.
2124
*
22-
* Please note that parser-specific options are *only recognized* if the
25+
* Please note that parser-specific options are _only recognized_ if the
2326
* relevant parser is used.
2427
*/
2528
export interface CheerioOptions extends HTMLParser2Options, Parse5Options {
26-
/** Suggested way of configuring htmlparser2 when wanting to parse XML. */
29+
/** Recommended way of configuring htmlparser2 when wanting to parse XML. */
2730
xml?: HTMLParser2Options | boolean;
31+
32+
/** The base URI for the document. Used for the `href` and `src` props. */
33+
baseURI?: string | URL; // eslint-disable-line node/no-unsupported-features/node-builtins
2834
}
2935

3036
const defaultOpts: CheerioOptions = {

0 commit comments

Comments
 (0)