Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added multiple term search functionality (with default phrase search) #5579

Merged
merged 1 commit into from
May 27, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions web/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,7 @@ var PDFViewerApplication = {
eventBus.on('rotateccw', webViewerRotateCcw);
eventBus.on('documentproperties', webViewerDocumentProperties);
eventBus.on('find', webViewerFind);
eventBus.on('findfromurlhash', webViewerFindFromUrlHash);
//#if GENERIC
eventBus.on('fileinputchange', webViewerFileInputChange);
//#endif
Expand Down Expand Up @@ -1906,12 +1907,23 @@ function webViewerDocumentProperties() {
function webViewerFind(e) {
PDFViewerApplication.findController.executeCommand('find' + e.type, {
query: e.query,
phraseSearch: e.phraseSearch,
caseSensitive: e.caseSensitive,
highlightAll: e.highlightAll,
findPrevious: e.findPrevious
});
}

function webViewerFindFromUrlHash(e) {
PDFViewerApplication.findController.executeCommand('find', {
query: e.query,
phraseSearch: e.phraseSearch,
caseSensitive: false,
highlightAll: true,
findPrevious: false
});
}

function webViewerScaleChanging(e) {
var appConfig = PDFViewerApplication.appConfig;
appConfig.toolbar.zoomOut.disabled = (e.scale === MIN_SCALE);
Expand Down Expand Up @@ -2055,6 +2067,7 @@ window.addEventListener('keydown', function keydown(evt) {
if (findState) {
PDFViewerApplication.findController.executeCommand('findagain', {
query: findState.query,
phraseSearch: findState.phraseSearch,
caseSensitive: findState.caseSensitive,
highlightAll: findState.highlightAll,
findPrevious: cmd === 5 || cmd === 12
Expand Down
1 change: 1 addition & 0 deletions web/dom_events.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
var event = document.createEvent('CustomEvent');
event.initCustomEvent('find' + e.type, true, true, {
query: e.query,
phraseSearch: e.phraseSearch,
caseSensitive: e.caseSensitive,
highlightAll: e.highlightAll,
findPrevious: e.findPrevious
Expand Down
1 change: 1 addition & 0 deletions web/firefoxcom.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ Preferences._readFromStorage = function (prefObj) {
source: window,
type: evt.type.substring('find'.length),
query: evt.detail.query,
phraseSearch: true,
caseSensitive: !!evt.detail.caseSensitive,
highlightAll: !!evt.detail.highlightAll,
findPrevious: !!evt.detail.findPrevious
Expand Down
1 change: 1 addition & 0 deletions web/pdf_find_bar.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ var PDFFindBar = (function PDFFindBarClosure() {
type: type,
query: this.findField.value,
caseSensitive: this.caseSensitive.checked,
phraseSearch: true,
highlightAll: this.highlightAll.checked,
findPrevious: findPrev
});
Expand Down
124 changes: 113 additions & 11 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ var PDFFindController = (function PDFFindControllerClosure() {
this.active = false; // If active, find results will be highlighted.
this.pageContents = []; // Stores the text for each page.
this.pageMatches = [];
this.pageMatchesLength = null;
this.matchCount = 0;
this.selected = { // Currently selected match.
pageIdx: -1,
Expand All @@ -104,10 +105,114 @@ var PDFFindController = (function PDFFindControllerClosure() {
});
},

// Helper for multiple search - fills matchesWithLength array
// and takes into account cases when one search term
// include another search term (for example, "tamed tame" or "this is").
// Looking for intersecting terms in the 'matches' and
// leave elements with a longer match-length.

_prepareMatches: function PDFFindController_prepareMatches(
matchesWithLength, matches, matchesLength) {

function isSubTerm(matchesWithLength, currentIndex) {
var currentElem, prevElem, nextElem;
currentElem = matchesWithLength[currentIndex];
nextElem = matchesWithLength[currentIndex + 1];
// checking for cases like "TAMEd TAME"
if (currentIndex < matchesWithLength.length - 1 &&
currentElem.match === nextElem.match) {
currentElem.skipped = true;
return true;
}
// checking for cases like "thIS IS"
for (var i = currentIndex - 1; i >= 0; i--) {
prevElem = matchesWithLength[i];
if (prevElem.skipped) {
continue;
}
if (prevElem.match + prevElem.matchLength < currentElem.match) {
break;
}
if (prevElem.match + prevElem.matchLength >=
currentElem.match + currentElem.matchLength) {
currentElem.skipped = true;
return true;
}
}
return false;
}

var i, len;
// Sorting array of objects { match: <match>, matchLength: <matchLength> }
// in increasing index first and then the lengths.
matchesWithLength.sort(function(a, b) {
return a.match === b.match ?
a.matchLength - b.matchLength : a.match - b.match;
});
for (i = 0, len = matchesWithLength.length; i < len; i++) {
if (isSubTerm(matchesWithLength, i)) {
continue;
}
matches.push(matchesWithLength[i].match);
matchesLength.push(matchesWithLength[i].matchLength);
}
},

calcFindPhraseMatch: function PDFFindController_calcFindPhraseMatch(
query, pageIndex, pageContent) {
var matches = [];
var queryLen = query.length;
var matchIdx = -queryLen;
while (true) {
matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
if (matchIdx === -1) {
break;
}
matches.push(matchIdx);
}
this.pageMatches[pageIndex] = matches;
},

calcFindWordMatch: function PDFFindController_calcFindWordMatch(
query, pageIndex, pageContent) {
var matchesWithLength = [];
// Divide the query into pieces and search for text on each piece.
var queryArray = query.match(/\S+/g);
var subquery, subqueryLen, matchIdx;
for (var i = 0, len = queryArray.length; i < len; i++) {
subquery = queryArray[i];
subqueryLen = subquery.length;
matchIdx = -subqueryLen;
while (true) {
matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
if (matchIdx === -1) {
break;
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I see the need for the matchesLength array. When phraseSearch == true, wouldn't queryLen always be constant? And in that case, wouldn't the matchesLen array always be a big array of repeated queryLen?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this further, I am not too concerned about the memory wasted by the repeated values. However, this does seem to be a bit more complicated than necessary. For example, we could probably do away with both matches and matchesLength and just have the array matchesWithLength. That will use more memory when phraseSearch==true, but it won't be too bad, and it will keep the code clean.

Alternatively, if people are concerned about memory, when phraseSearch==true, we can set matchesLength either to null or to queryLen, rather than an array of repeated queryLen

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@speedplane, you are right about matchesLength array when phraseSearch === true.
There is no need to use it in this case, because all matchesLength elements is identical. I refactored my PR.

// Other searches do not, so we store the length.
matchesWithLength.push({
match: matchIdx,
matchLength: subqueryLen,
skipped: false
});
}
}
// Prepare arrays for store the matches.
if (!this.pageMatchesLength) {
this.pageMatchesLength = [];
}
this.pageMatchesLength[pageIndex] = [];
this.pageMatches[pageIndex] = [];
// Sort matchesWithLength, clean up intersecting terms
// and put the result into the two arrays.
this._prepareMatches(matchesWithLength, this.pageMatches[pageIndex],
this.pageMatchesLength[pageIndex]);
},

calcFindMatch: function PDFFindController_calcFindMatch(pageIndex) {
var pageContent = this.normalize(this.pageContents[pageIndex]);
var query = this.normalize(this.state.query);
var caseSensitive = this.state.caseSensitive;
var phraseSearch = this.state.phraseSearch;
var queryLen = query.length;

if (queryLen === 0) {
Expand All @@ -120,25 +225,21 @@ var PDFFindController = (function PDFFindControllerClosure() {
query = query.toLowerCase();
}

var matches = [];
var matchIdx = -queryLen;
while (true) {
matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
if (matchIdx === -1) {
break;
}
matches.push(matchIdx);
if (phraseSearch) {
this.calcFindPhraseMatch(query, pageIndex, pageContent);
} else {
this.calcFindWordMatch(query, pageIndex, pageContent);
}
this.pageMatches[pageIndex] = matches;

this.updatePage(pageIndex);
if (this.resumePageIdx === pageIndex) {
this.resumePageIdx = null;
this.nextPageMatch();
}

// Update the matches count
if (matches.length > 0) {
this.matchCount += matches.length;
if (this.pageMatches[pageIndex].length > 0) {
this.matchCount += this.pageMatches[pageIndex].length;
this.updateUIResultsCount();
}
},
Expand Down Expand Up @@ -233,6 +334,7 @@ var PDFFindController = (function PDFFindControllerClosure() {
this.resumePageIdx = null;
this.pageMatches = [];
this.matchCount = 0;
this.pageMatchesLength = null;
var self = this;

for (var i = 0; i < numPages; i++) {
Expand Down
7 changes: 7 additions & 0 deletions web/pdf_link_service.js
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,13 @@ var PDFLinkService = (function () {
setHash: function PDFLinkService_setHash(hash) {
if (hash.indexOf('=') >= 0) {
var params = parseQueryString(hash);
if ('search' in params) {
this.eventBus.dispatch('findfromurlhash', {
source: this,
query: params['search'].replace(/"/g, ''),
phraseSearch: (params['phrase'] === 'true')
});
}
// borrowing syntax from "Parameters for Opening PDF Files"
if ('nameddest' in params) {
if (this.pdfHistory) {
Expand Down
23 changes: 18 additions & 5 deletions web/text_layer_builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,18 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
this.divContentDone = true;
},

convertMatches: function TextLayerBuilder_convertMatches(matches) {
convertMatches: function TextLayerBuilder_convertMatches(matches,
matchesLength) {
var i = 0;
var iIndex = 0;
var bidiTexts = this.textContent.items;
var end = bidiTexts.length - 1;
var queryLen = (this.findController === null ?
0 : this.findController.state.query.length);
var ret = [];

if (!matches) {
return ret;
}
for (var m = 0, len = matches.length; m < len; m++) {
// Calculate the start position.
var matchIdx = matches[m];
Expand All @@ -147,7 +150,11 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
};

// Calculate the end position.
matchIdx += queryLen;
if (matchesLength) { // multiterm search
matchIdx += matchesLength[m];
} else { // phrase search
matchIdx += queryLen;
}

// Somewhat the same array as above, but use > instead of >= to get
// the end position right.
Expand Down Expand Up @@ -289,8 +296,14 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {

// Convert the matches on the page controller into the match format
// used for the textLayer.
this.matches = this.convertMatches(this.findController === null ?
[] : (this.findController.pageMatches[this.pageIdx] || []));
var pageMatches, pageMatchesLength;
if (this.findController !== null) {
pageMatches = this.findController.pageMatches[this.pageIdx] || null;
pageMatchesLength = (this.findController.pageMatchesLength) ?
this.findController.pageMatchesLength[this.pageIdx] || null : null;
}

this.matches = this.convertMatches(pageMatches, pageMatchesLength);
this.renderMatches(this.matches);
},

Expand Down