Skip to content

Commit

Permalink
S3C-3926 [WIP] GapSet
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-gramain committed Jan 23, 2024
1 parent 5734d11 commit 7d84c4c
Show file tree
Hide file tree
Showing 4 changed files with 437 additions and 0 deletions.
192 changes: 192 additions & 0 deletions lib/algos/cache/GapSet.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import assert from 'assert';
import SortedSet from 'collections/sorted-set';

import errors from '../../errors';

export type GapSetEntry = {
firstKey: string,
lastKey: string,
weight: number,
};

/**
* Specialized data structure to support caching of listing "gaps",
* i.e. ranges of keys that can be skipped over during listing
* (because they only contain delete markers as latest versions)
*/
export class GapSet {
_gaps: SortedSet;
_maxWeight: number;

constructor(maxWeight) {
this._gaps = new SortedSet(
[],
(left, right) => left.firstKey === right.firstKey,
(left, right) => (
left.firstKey < right.firstKey ? -1 :
left.firstKey > right.firstKey ? 1 : 0
)
);
this._maxWeight = maxWeight;
}

static createFromArray(gaps: GapSetEntry[], maxWeight) {
const gapSet = new GapSet(maxWeight);
gapSet._gaps.addEach(gaps);
return gapSet;
}

/**
* Helper function to initialize a new gap or retrieve an existing
* one depending on keys and weight
*/
_lookupOrCreateGap(firstKey: string, lastKey: string, weight: number): {
gap: GapSetEntry,
weightToMerge: number,
} {
let gap = this._gaps.findGreatestLessThanOrEqual({ firstKey })?.value;
if (gap) {
if (gap.lastKey >= lastKey) {
// return fully overlapping gap already cached
return { gap, weightToMerge: weight };
}
if (gap.lastKey < firstKey) {
// closest gap found is not overlapping, ignore it
gap = null;
} else if (weight > this._maxWeight) {
// if the new weight exceeds the max, split the existing gap found in
// two chained gaps, by using the same key as a link (this is important
// to ensure that listings are aware that it is a single contiguous gap)
gap = {
firstKey: gap.lastKey,
// split gap is temporarily single-key, will be extended in the loop below
lastKey: gap.lastKey,
// split gap only contains the weight not already included in the previous gap
weight: Math.max(weight - gap.weight, 0),
};
// there may be an existing gap starting with 'lastKey': delete it first
this._gaps.delete(gap);
// then add the new split gap to be extended thereafter
this._gaps.add(gap);
return { gap, weightToMerge: gap.weight };
}
}
if (!gap) {
// create a new single-key gap that will be extended to 'lastKey' below
gap = {
firstKey,
lastKey: firstKey,
weight,
};
this._gaps.add(gap);
}
return { gap, weightToMerge: weight };
}

/**
* Extend the current gap up to a certain key
*
* === Note on weight updates ===
*
* The gap weight is maintained to its real value when either of those is true:
* - 'toKey' extends the current gap (potentially merged with others overlapping)
* => add 'extraWeight' to 'entry.weight'
* - or 'toKey' is still within the current gap
* => do not update the weight
*
* In other cases, a heuristic is used instead based on the following ideas:
* - the new real weight must be greater than "entry.weight + extraWeight"
* because the gap extends past 'toKey'
* - the new real weight must be greater than the sum of the weights of
* merged gaps, because they do not overlap
*
* Using the max of the two values is therefore a reasonable lower bound
* of the real weight. Because weights drive eviction behavior, always
* updating with lower bounds guarantee that all entries can eventually
* be evicted (otherwise, they may be increasing forever and get stuck in
* the cache, potentially causing premature eviction of other useful gaps).
*
* It also retains large weight values when merging with larger gaps, which
* entices to keep those gaps in the cache.
*
* Note that in case all gaps merged were chained together, the heuristic
* naturally yields the real weight.
*/
setGap(firstKey: string, lastKey: string, weight: number): GapSetEntry {
assert(lastKey >= firstKey);

const lookup = this._lookupOrCreateGap(firstKey, lastKey, weight);
let { gap: curGap } = lookup;
const { weightToMerge } = lookup;
let findNextGap;
if (curGap.firstKey === curGap.lastKey) {
// single-key gaps are never chained: find the first gap strictly after 'curGap'
// Note: using the 'OrEqual' method is not possible as it would yield 'curGap'
findNextGap = this._gaps.findLeastGreaterThan.bind(this._gaps);
} else {
// gaps with more than one key may be chained (g1.lastKey == g2.firstKey) and we
// need the 'OrEqual' method to find those chained gaps
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps);
}
// See "Note on weight updates" in the JSDoc of this function for a
// thorough explanation about how we update weights
let mergedWeightSum = curGap.weight;

// loop over existing mergeable gaps straddled by 'gap.lastKey' -> 'lastKey'
while (lastKey > curGap.lastKey) {
const nextGap = findNextGap({ firstKey: curGap.lastKey })?.value;
// if no more gap or if the next gap starts beyond 'lastKey', stop merging
if (!nextGap || nextGap.firstKey > lastKey) {
// extend the existing gap
curGap.lastKey = lastKey;
curGap.weight = weightToMerge;
return curGap;
}
// merge 'nextGap' into 'curGap' - don't update the weight yet as all
// the added weight from the operation is included in 'extraWeight'
// (we are just getting rid of the gaps that would overlap here)
curGap.lastKey = nextGap.lastKey;
this._gaps.delete(nextGap);

// keep track of the sum of weights for the merged gaps
mergedWeightSum += nextGap.weight;

// See "Note on weight updates" in the JSDoc of this function
curGap.weight = Math.max(weightToMerge, mergedWeightSum);

// after the first iteration, always use the 'OrEqual' method to find
// chained ranges
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps);
}
// the existing gap already covers 'lastKey': return it
return curGap;
}

removeOverlappingGaps(overlappingKey: string): number {
const closestGap = this._gaps.findGreatestLessThanOrEqual(
{ firstKey: overlappingKey })?.value;
if (closestGap && closestGap.lastKey >= overlappingKey) {
this._gaps.delete(closestGap);
if (closestGap.lastKey > overlappingKey) {
return 1;
}
// because consecutive gaps may overlap by a unique key, there may be a
// second gap to remove if the overlapping key is on a gap boundary
const nextGap = this._gaps.find({ firstKey: overlappingKey })?.value;
if (nextGap) {
this._gaps.delete(nextGap);
return 2;
}
return 1;
}
return 0;
}

toArray() {
return this._gaps.toArray();
}

toJSON() {
return this._gaps.toJSON();
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"base-x": "3.0.8",
"base62": "2.0.1",
"bson": "4.0.0",
"collections": "^5.1.13",
"debug": "~2.6.9",
"diskusage": "^1.1.1",
"fcntl": "github:scality/node-fcntl#0.2.2",
Expand Down
Loading

0 comments on commit 7d84c4c

Please sign in to comment.