Skip to content

Commit

Permalink
S3C-3926 [WIP] GapSet
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-gramain committed Jan 20, 2024
1 parent 5734d11 commit f4860ad
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 0 deletions.
173 changes: 173 additions & 0 deletions lib/algos/cache/GapSet.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import SortedSet from 'collections/sorted-set';

import errors from '../../errors';

export type GapSetEntry = {
firstKey: string,
lastKey: string,
weight: number,
};

/**
* Specialized data structure to support caching of listing "gaps",
* i.e. ranges of keys that can be skipped over during listing
* (because they only contain delete markers as latest versions)
*/
export class GapSet {
_gaps: SortedSet;
_maxWeight: number;

constructor(maxWeight=10000) {
this._gaps = new SortedSet(
[],
(left, right) => left.firstKey === right.firstKey,
(left, right) => (
left.firstKey < right.firstKey ? -1 :
left.firstKey > right.firstKey ? 1 : 0
)
);
this._maxWeight = maxWeight;
}

static createFromArray(gaps: GapSetEntry[], maxWeight=10000) {
const gapSet = new GapSet(maxWeight);
gapSet._gaps.addEach(gaps);
return gapSet;
}

startGap(firstKey: string): GapSetEntry {
const closestGap = this._gaps.findGreatestLessThanOrEqual({ firstKey })?.value;
if (closestGap && closestGap.lastKey >= firstKey) {
// return overlapping gap already cached
return closestGap;
}
// create a new gap with one key
const newGap: GapSetEntry = {
firstKey,
lastKey: firstKey,
weight: 1,
};
this._gaps.addEach([newGap]);
return newGap;
}

/**
* Extend the current gap up to a certain key
*
* === Note on weight updates ===
*
* The gap weight is maintained to its real value when either of those is true:
* - 'toKey' extends the current gap (potentially merged with others overlapping)
* => add 'extraWeight' to 'entry.weight'
* - or 'toKey' is still within the current gap
* => do not update the weight
*
* In other cases, a heuristic is used instead based on the following ideas:
* - the new real weight must be greater than "entry.weight + extraWeight"
* because the gap extends past 'toKey'
* - the new real weight must be greater than the sum of the weights of
* merged gaps, because they do not overlap
*
* Using the max of the two values is therefore a reasonable lower bound
* of the real weight. Because weights drive eviction behavior, always
* updating with lower bounds guarantee that all entries can eventually
* be evicted (otherwise, they may be increasing forever and get stuck in
* the cache, potentially causing premature eviction of other useful gaps).
*
* It also retains large weight values when merging with larger gaps, which
* entices to keep those gaps in the cache.
*
* Note that in case all gaps merged were chained together, the heuristic
* naturally yields the real weight.
*/
extendGap(entry: GapSetEntry, toKey: string, extraWeight: number): GapSetEntry {
// the code doesn't support extending backwards, so let's ensure this
if (toKey < entry.firstKey) {
throw errors.NotImplemented.customizeDescription(
`cannot extend gap backwards "${entry.firstKey}" => "${toKey}"`);
}
let findNextGap;
if (entry.firstKey === entry.lastKey) {
// single-key gaps are never chained: find the first gap strictly after 'entry'
// Note: using the 'OrEqual' method is not possible as it would yield 'entry'
findNextGap = this._gaps.findLeastGreaterThan.bind(this._gaps);
} else {
// gaps with more than one key may be chained (g1.lastKey == g2.firstKey) and we
// need the 'OrEqual' method to find those chained gaps
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps);
}
// See "Note on weight updates" in the JSDoc of this function for a
// thorough explanation about how we update weights
const updatedWeight = entry.weight + extraWeight;
let mergedWeightSum = entry.weight;

// loop over existing mergeable gaps straddled by 'lastKey' -> 'toKey'
while (toKey > entry.lastKey) {
const nextGap = findNextGap({ firstKey: entry.lastKey })?.value;
// if no more gap or if the next gap starts beyond 'toKey', stop merging
if (!nextGap || nextGap.firstKey > toKey) {
// extend the existing gap if its new weight doesn't exceed the max
if (entry.weight + extraWeight < this._maxWeight) {
entry.lastKey = toKey;
entry.weight = updatedWeight;
return entry;
}
// chain a new gap to the previous one by using the same key as a link
// (this is important to ensure that listings are aware that it is a
// single contiguous gap)
const newGap: GapSetEntry = {
firstKey: entry.lastKey,
lastKey: toKey,
weight: extraWeight,
};
this._gaps.addEach([newGap]);
return newGap;
}
// merge 'nextGap' into 'entry' - don't update the weight yet as all
// the added weight from the operation is included in 'extraWeight'
// (we are just getting rid of the gaps that would overlap here)
entry.lastKey = nextGap.lastKey;
this._gaps.delete(nextGap);

// keep track of the sum of weights for the merged gaps
mergedWeightSum += nextGap.weight;

// See "Note on weight updates" in the JSDoc of this function
entry.weight = Math.max(updatedWeight, mergedWeightSum);

// after the first iteration, always use the 'OrEqual' method to find
// chained ranges
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps);
}
// the existing gap already covers 'toKey': return it
return entry;
}

removeOverlappingGaps(overlappingKey: string): number {
const closestGap = this._gaps.findGreatestLessThanOrEqual(
{ firstKey: overlappingKey })?.value;
if (closestGap && closestGap.lastKey >= overlappingKey) {
this._gaps.delete(closestGap);
if (closestGap.lastKey > overlappingKey) {
return 1;
}
// because consecutive gaps may overlap by a unique key, there may be a
// second gap to remove if the overlapping key is on a gap boundary
const nextGap = this._gaps.find({ firstKey: overlappingKey })?.value;
if (nextGap) {
this._gaps.delete(nextGap);
return 2;
}
return 1;
}
return 0;
}

toArray() {
return this._gaps.toArray();
}

toJSON() {
return this._gaps.toJSON();
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"base-x": "3.0.8",
"base62": "2.0.1",
"bson": "4.0.0",
"collections": "^5.1.13",
"debug": "~2.6.9",
"diskusage": "^1.1.1",
"fcntl": "github:scality/node-fcntl#0.2.2",
Expand Down
170 changes: 170 additions & 0 deletions tests/unit/algos/cache/GapSet.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { GapSet } from '../../../../lib/algos/cache/GapSet';

describe('GapSet', () => {
const INITIAL_GAPSET = [
{ firstKey: 'bar', lastKey: 'baz', weight: 10 },
{ firstKey: 'qux', lastKey: 'quz', weight: 20 },
];
const INITIAL_GAPSET_WITH_CHAIN = [
// single-key gap
{ firstKey: 'ape', lastKey: 'ape', weight: 1 },
// start of chain
{ firstKey: 'bar', lastKey: 'baz', weight: 10 },
{ firstKey: 'baz', lastKey: 'qux', weight: 15 },
{ firstKey: 'qux', lastKey: 'quz', weight: 20 },
{ firstKey: 'quz', lastKey: 'rat', weight: 25 },
// end of chain
{ firstKey: 'rat', lastKey: 'yak', weight: 30 },
]

let gapsArray;
let gapSet;
let gapsArrayWithChain;
let gapSetWithChain;
beforeEach(() => {
gapsArray = JSON.parse(
JSON.stringify(INITIAL_GAPSET)
);
gapSet = GapSet.createFromArray(gapsArray);
gapsArrayWithChain = JSON.parse(
JSON.stringify(INITIAL_GAPSET_WITH_CHAIN)
);
gapSetWithChain = GapSet.createFromArray(gapsArrayWithChain);
});

describe('GapSet::startGap()', () => {
it('should start a gap with a single key in empty gap set', () => {
const emptyGapSet = new GapSet();
emptyGapSet.startGap('foo');
expect(emptyGapSet.toArray()).toEqual([
{ firstKey: 'foo', lastKey: 'foo', weight: 1 },
]);
});

it('should start a gap with a single key in non-empty gap set', () => {
const gap = gapSet.startGap('foo');
expect(gap).toEqual({ firstKey: 'foo', lastKey: 'foo', weight: 1 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'baz', weight: 10 },
{ firstKey: 'foo', lastKey: 'foo', weight: 1 },
{ firstKey: 'qux', lastKey: 'quz', weight: 20 },
]);
});

it('should return an existing gap that includes the initial key', () => {
const gap = gapSet.startGap('bat');
expect(gap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 });
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET);
});

it('should return an existing gap that starts with the initial key', () => {
const gap = gapSet.startGap('qux');
expect(gap).toEqual({ firstKey: 'qux', lastKey: 'quz', weight: 20 });
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET);
});

it('should return an existing gap that ends with the initial key', () => {
const gap = gapSet.startGap('quz');
expect(gap).toEqual({ firstKey: 'qux', lastKey: 'quz', weight: 20 });
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET);
});

it('should return the existing chained gap that starts with the initial key', () => {
const gap = gapSetWithChain.startGap('baz');
expect(gap).toEqual({ firstKey: 'baz', lastKey: 'qux', weight: 15 });
expect(gapSetWithChain.toArray()).toEqual(INITIAL_GAPSET_WITH_CHAIN);
});
});

describe('GapSet::extendGap()', () => {
it('should throw with NotImplemented if trying to extend backwards', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
expect(() => gapSet.extendGap(gap, 'bag', 5)).toThrowError('NotImplemented');
});

it('should not extend a gap if key strictly before last gap key', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'bat', 5);
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 });
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET);
});

it('should not extend a gap if key equals last gap key', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'baz', 5);
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 });
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET);
});

it('should extend a gap with no next gap', () => {
const gap = gapsArray[1]; // 'qux' -> 'quz' (weight=20)
const extendedGap = gapSet.extendGap(gap, 'rat', 5);
expect(extendedGap).toEqual({ firstKey: 'qux', lastKey: 'rat', weight: 20 + 5 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'baz', weight: 10 },
{ firstKey: 'qux', lastKey: 'rat', weight: 20 + 5 },
]);
});

it('should extend a gap without overlap with next gap', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'dog', 5);
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'dog', weight: 10 + 5 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'dog', weight: 10 + 5 },
{ firstKey: 'qux', lastKey: 'quz', weight: 20 },
]);
});

it('should extend a gap with single-key overlap with next gap', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'qux', 100);
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 },
]);
});

it('should extend a gap with overlap with next gap and large extra weight', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'quxxx', 100);
// updated weight is the sum of current range weight and extra weight
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 },
]);
});

it('should extend a gap with overlap with next gap and small extra weight', () => {
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10)
const extendedGap = gapSet.extendGap(gap, 'quxxx', 3);
// updated weight is the sum of merged ranges' weights
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 20 });
expect(gapSet.toArray()).toEqual([
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 20 },
]);
});

it('should extend a single-key gap with no other gap', () => {
const singleKeyGap = { firstKey: 'foo', lastKey: 'foo', weight: 10 };
const singleKeyGapSet = GapSet.createFromArray([singleKeyGap]);
const extendedGap = singleKeyGapSet.extendGap(singleKeyGap, 'qux', 20);
expect(extendedGap).toEqual({ firstKey: 'foo', lastKey: 'qux', weight: 10 + 20 });
expect(singleKeyGapSet.toArray()).toEqual([
{ firstKey: 'foo', lastKey: 'qux', weight: 10 + 20 },
]);
});

it('should extend a single-key gap with overlap on chained gaps', () => {
const gap = gapsArrayWithChain[0]; // 'ape' -> 'ape' (weight=1)
const extendedGap = gapSetWithChain.extendGap(gap, 'dog', 30);
expect(extendedGap).toEqual({ firstKey: 'ape', lastKey: 'qux', weight: 1 + 30 });
expect(gapSetWithChain.toArray()).toEqual([
{ firstKey: 'ape', lastKey: 'qux', weight: 1 + 30 },
{ firstKey: 'qux', lastKey: 'quz', weight: 20 },
{ firstKey: 'quz', lastKey: 'rat', weight: 25 },
{ firstKey: 'rat', lastKey: 'yak', weight: 30 },
]);
});
});
});
12 changes: 12 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2192,6 +2192,13 @@ collect-v8-coverage@^1.0.0:
resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59"
integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==

collections@^5.1.13:
version "5.1.13"
resolved "https://registry.yarnpkg.com/collections/-/collections-5.1.13.tgz#eee204a93b67473c8e74e00e934a997cc2817585"
integrity sha512-SCb6Qd+d3Z02corWQ7/mqXiXeeTdHvkP6TeFSYfGYdCFp1WrjSNZ3j6y8Y3T/7osGEe0iOcU2g1d346l99m4Lg==
dependencies:
weak-map "~1.0.x"

color-convert@^1.9.0:
version "1.9.3"
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
Expand Down Expand Up @@ -6469,6 +6476,11 @@ walker@^1.0.7:
dependencies:
makeerror "1.0.12"

weak-map@~1.0.x:
version "1.0.8"
resolved "https://registry.yarnpkg.com/weak-map/-/weak-map-1.0.8.tgz#394c18a9e8262e790544ed8b55c6a4ddad1cb1a3"
integrity sha512-lNR9aAefbGPpHO7AEnY0hCFjz1eTkWCXYvkTRrTHs9qv8zJp+SkVYpzfLIFXQQiG3tVvbNFQgVg2bQS8YGgxyw==

webidl-conversions@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
Expand Down

0 comments on commit f4860ad

Please sign in to comment.