diff --git a/README.md b/README.md index cf4c2f08..5c897a2d 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,9 @@ npm install diff --save * `newStr` : New string value * `oldHeader` : Additional information to include in the old file header * `newHeader` : Additional information to include in the new file header - * `options` : An object with options. Currently, only `context` is supported and describes how many lines of context should be included. + * `options` : An object with options. + * `context` : describes how many lines of context should be included. + * `tokenizer` : Overrides the default regex used to split text into words. supported by `diffWords` and `diffWordsWithSpace` * `Diff.createPatch(fileName, oldStr, newStr, oldHeader, newHeader)` - creates a unified diff patch. diff --git a/src/diff/word.js b/src/diff/word.js index ab651ee9..5ffd5bea 100644 --- a/src/diff/word.js +++ b/src/diff/word.js @@ -32,7 +32,8 @@ wordDiff.equals = function(left, right) { return left === right || (this.options.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right)); }; wordDiff.tokenize = function(value) { - let tokens = value.split(/(\s+|[()[\]{}'"]|\b)/); + const tokenizer = this.options.tokenizer || /(\s+|[()[\]{}'"]|\b)/; // Use the tokenizer regex in the options or use the default regex + const tokens = value.split(tokenizer); // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set. // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set. for (let i = 0; i < tokens.length - 1; i++) { diff --git a/test/diff/word.js b/test/diff/word.js index 78338bc4..4e670fdb 100644 --- a/test/diff/word.js +++ b/test/diff/word.js @@ -171,6 +171,29 @@ describe('WordDiff', function() { done(); }); }); + + // With custom tokenizer + it('should utilize a custom tokenizer', function() { + + const diff = diffWords('foo_bar', 'something_bar', { + tokenizer: /(\s+|[()[\]{}_'"]|\b)/ + }); + + expect(diff).to.eql([{ + count: 1, + added: undefined, + removed: true, + value: 'foo' + }, { + count: 1, + added: true, + removed: undefined, + value: 'something' + }, { + count: 2, + value: '_bar' + }]); + }); }); describe('#diffWordsWithSpace', function() {