-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5734d11
commit f4860ad
Showing
4 changed files
with
356 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import SortedSet from 'collections/sorted-set'; | ||
|
||
import errors from '../../errors'; | ||
|
||
export type GapSetEntry = { | ||
firstKey: string, | ||
lastKey: string, | ||
weight: number, | ||
}; | ||
|
||
/** | ||
* Specialized data structure to support caching of listing "gaps", | ||
* i.e. ranges of keys that can be skipped over during listing | ||
* (because they only contain delete markers as latest versions) | ||
*/ | ||
export class GapSet { | ||
_gaps: SortedSet; | ||
_maxWeight: number; | ||
|
||
constructor(maxWeight=10000) { | ||
this._gaps = new SortedSet( | ||
[], | ||
(left, right) => left.firstKey === right.firstKey, | ||
(left, right) => ( | ||
left.firstKey < right.firstKey ? -1 : | ||
left.firstKey > right.firstKey ? 1 : 0 | ||
) | ||
); | ||
this._maxWeight = maxWeight; | ||
} | ||
|
||
static createFromArray(gaps: GapSetEntry[], maxWeight=10000) { | ||
const gapSet = new GapSet(maxWeight); | ||
gapSet._gaps.addEach(gaps); | ||
return gapSet; | ||
} | ||
|
||
startGap(firstKey: string): GapSetEntry { | ||
const closestGap = this._gaps.findGreatestLessThanOrEqual({ firstKey })?.value; | ||
if (closestGap && closestGap.lastKey >= firstKey) { | ||
// return overlapping gap already cached | ||
return closestGap; | ||
} | ||
// create a new gap with one key | ||
const newGap: GapSetEntry = { | ||
firstKey, | ||
lastKey: firstKey, | ||
weight: 1, | ||
}; | ||
this._gaps.addEach([newGap]); | ||
return newGap; | ||
} | ||
|
||
/** | ||
* Extend the current gap up to a certain key | ||
* | ||
* === Note on weight updates === | ||
* | ||
* The gap weight is maintained to its real value when either of those is true: | ||
* - 'toKey' extends the current gap (potentially merged with others overlapping) | ||
* => add 'extraWeight' to 'entry.weight' | ||
* - or 'toKey' is still within the current gap | ||
* => do not update the weight | ||
* | ||
* In other cases, a heuristic is used instead based on the following ideas: | ||
* - the new real weight must be greater than "entry.weight + extraWeight" | ||
* because the gap extends past 'toKey' | ||
* - the new real weight must be greater than the sum of the weights of | ||
* merged gaps, because they do not overlap | ||
* | ||
* Using the max of the two values is therefore a reasonable lower bound | ||
* of the real weight. Because weights drive eviction behavior, always | ||
* updating with lower bounds guarantee that all entries can eventually | ||
* be evicted (otherwise, they may be increasing forever and get stuck in | ||
* the cache, potentially causing premature eviction of other useful gaps). | ||
* | ||
* It also retains large weight values when merging with larger gaps, which | ||
* entices to keep those gaps in the cache. | ||
* | ||
* Note that in case all gaps merged were chained together, the heuristic | ||
* naturally yields the real weight. | ||
*/ | ||
extendGap(entry: GapSetEntry, toKey: string, extraWeight: number): GapSetEntry { | ||
// the code doesn't support extending backwards, so let's ensure this | ||
if (toKey < entry.firstKey) { | ||
throw errors.NotImplemented.customizeDescription( | ||
`cannot extend gap backwards "${entry.firstKey}" => "${toKey}"`); | ||
} | ||
let findNextGap; | ||
if (entry.firstKey === entry.lastKey) { | ||
// single-key gaps are never chained: find the first gap strictly after 'entry' | ||
// Note: using the 'OrEqual' method is not possible as it would yield 'entry' | ||
findNextGap = this._gaps.findLeastGreaterThan.bind(this._gaps); | ||
} else { | ||
// gaps with more than one key may be chained (g1.lastKey == g2.firstKey) and we | ||
// need the 'OrEqual' method to find those chained gaps | ||
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps); | ||
} | ||
// See "Note on weight updates" in the JSDoc of this function for a | ||
// thorough explanation about how we update weights | ||
const updatedWeight = entry.weight + extraWeight; | ||
let mergedWeightSum = entry.weight; | ||
|
||
// loop over existing mergeable gaps straddled by 'lastKey' -> 'toKey' | ||
while (toKey > entry.lastKey) { | ||
const nextGap = findNextGap({ firstKey: entry.lastKey })?.value; | ||
// if no more gap or if the next gap starts beyond 'toKey', stop merging | ||
if (!nextGap || nextGap.firstKey > toKey) { | ||
// extend the existing gap if its new weight doesn't exceed the max | ||
if (entry.weight + extraWeight < this._maxWeight) { | ||
entry.lastKey = toKey; | ||
entry.weight = updatedWeight; | ||
return entry; | ||
} | ||
// chain a new gap to the previous one by using the same key as a link | ||
// (this is important to ensure that listings are aware that it is a | ||
// single contiguous gap) | ||
const newGap: GapSetEntry = { | ||
firstKey: entry.lastKey, | ||
lastKey: toKey, | ||
weight: extraWeight, | ||
}; | ||
this._gaps.addEach([newGap]); | ||
return newGap; | ||
} | ||
// merge 'nextGap' into 'entry' - don't update the weight yet as all | ||
// the added weight from the operation is included in 'extraWeight' | ||
// (we are just getting rid of the gaps that would overlap here) | ||
entry.lastKey = nextGap.lastKey; | ||
this._gaps.delete(nextGap); | ||
|
||
// keep track of the sum of weights for the merged gaps | ||
mergedWeightSum += nextGap.weight; | ||
|
||
// See "Note on weight updates" in the JSDoc of this function | ||
entry.weight = Math.max(updatedWeight, mergedWeightSum); | ||
|
||
// after the first iteration, always use the 'OrEqual' method to find | ||
// chained ranges | ||
findNextGap = this._gaps.findLeastGreaterThanOrEqual.bind(this._gaps); | ||
} | ||
// the existing gap already covers 'toKey': return it | ||
return entry; | ||
} | ||
|
||
removeOverlappingGaps(overlappingKey: string): number { | ||
const closestGap = this._gaps.findGreatestLessThanOrEqual( | ||
{ firstKey: overlappingKey })?.value; | ||
if (closestGap && closestGap.lastKey >= overlappingKey) { | ||
this._gaps.delete(closestGap); | ||
if (closestGap.lastKey > overlappingKey) { | ||
return 1; | ||
} | ||
// because consecutive gaps may overlap by a unique key, there may be a | ||
// second gap to remove if the overlapping key is on a gap boundary | ||
const nextGap = this._gaps.find({ firstKey: overlappingKey })?.value; | ||
if (nextGap) { | ||
this._gaps.delete(nextGap); | ||
return 2; | ||
} | ||
return 1; | ||
} | ||
return 0; | ||
} | ||
|
||
toArray() { | ||
return this._gaps.toArray(); | ||
} | ||
|
||
toJSON() { | ||
return this._gaps.toJSON(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
import { GapSet } from '../../../../lib/algos/cache/GapSet'; | ||
|
||
describe('GapSet', () => { | ||
const INITIAL_GAPSET = [ | ||
{ firstKey: 'bar', lastKey: 'baz', weight: 10 }, | ||
{ firstKey: 'qux', lastKey: 'quz', weight: 20 }, | ||
]; | ||
const INITIAL_GAPSET_WITH_CHAIN = [ | ||
// single-key gap | ||
{ firstKey: 'ape', lastKey: 'ape', weight: 1 }, | ||
// start of chain | ||
{ firstKey: 'bar', lastKey: 'baz', weight: 10 }, | ||
{ firstKey: 'baz', lastKey: 'qux', weight: 15 }, | ||
{ firstKey: 'qux', lastKey: 'quz', weight: 20 }, | ||
{ firstKey: 'quz', lastKey: 'rat', weight: 25 }, | ||
// end of chain | ||
{ firstKey: 'rat', lastKey: 'yak', weight: 30 }, | ||
] | ||
|
||
let gapsArray; | ||
let gapSet; | ||
let gapsArrayWithChain; | ||
let gapSetWithChain; | ||
beforeEach(() => { | ||
gapsArray = JSON.parse( | ||
JSON.stringify(INITIAL_GAPSET) | ||
); | ||
gapSet = GapSet.createFromArray(gapsArray); | ||
gapsArrayWithChain = JSON.parse( | ||
JSON.stringify(INITIAL_GAPSET_WITH_CHAIN) | ||
); | ||
gapSetWithChain = GapSet.createFromArray(gapsArrayWithChain); | ||
}); | ||
|
||
describe('GapSet::startGap()', () => { | ||
it('should start a gap with a single key in empty gap set', () => { | ||
const emptyGapSet = new GapSet(); | ||
emptyGapSet.startGap('foo'); | ||
expect(emptyGapSet.toArray()).toEqual([ | ||
{ firstKey: 'foo', lastKey: 'foo', weight: 1 }, | ||
]); | ||
}); | ||
|
||
it('should start a gap with a single key in non-empty gap set', () => { | ||
const gap = gapSet.startGap('foo'); | ||
expect(gap).toEqual({ firstKey: 'foo', lastKey: 'foo', weight: 1 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'baz', weight: 10 }, | ||
{ firstKey: 'foo', lastKey: 'foo', weight: 1 }, | ||
{ firstKey: 'qux', lastKey: 'quz', weight: 20 }, | ||
]); | ||
}); | ||
|
||
it('should return an existing gap that includes the initial key', () => { | ||
const gap = gapSet.startGap('bat'); | ||
expect(gap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 }); | ||
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET); | ||
}); | ||
|
||
it('should return an existing gap that starts with the initial key', () => { | ||
const gap = gapSet.startGap('qux'); | ||
expect(gap).toEqual({ firstKey: 'qux', lastKey: 'quz', weight: 20 }); | ||
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET); | ||
}); | ||
|
||
it('should return an existing gap that ends with the initial key', () => { | ||
const gap = gapSet.startGap('quz'); | ||
expect(gap).toEqual({ firstKey: 'qux', lastKey: 'quz', weight: 20 }); | ||
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET); | ||
}); | ||
|
||
it('should return the existing chained gap that starts with the initial key', () => { | ||
const gap = gapSetWithChain.startGap('baz'); | ||
expect(gap).toEqual({ firstKey: 'baz', lastKey: 'qux', weight: 15 }); | ||
expect(gapSetWithChain.toArray()).toEqual(INITIAL_GAPSET_WITH_CHAIN); | ||
}); | ||
}); | ||
|
||
describe('GapSet::extendGap()', () => { | ||
it('should throw with NotImplemented if trying to extend backwards', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
expect(() => gapSet.extendGap(gap, 'bag', 5)).toThrowError('NotImplemented'); | ||
}); | ||
|
||
it('should not extend a gap if key strictly before last gap key', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'bat', 5); | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 }); | ||
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET); | ||
}); | ||
|
||
it('should not extend a gap if key equals last gap key', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'baz', 5); | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'baz', weight: 10 }); | ||
expect(gapSet.toArray()).toEqual(INITIAL_GAPSET); | ||
}); | ||
|
||
it('should extend a gap with no next gap', () => { | ||
const gap = gapsArray[1]; // 'qux' -> 'quz' (weight=20) | ||
const extendedGap = gapSet.extendGap(gap, 'rat', 5); | ||
expect(extendedGap).toEqual({ firstKey: 'qux', lastKey: 'rat', weight: 20 + 5 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'baz', weight: 10 }, | ||
{ firstKey: 'qux', lastKey: 'rat', weight: 20 + 5 }, | ||
]); | ||
}); | ||
|
||
it('should extend a gap without overlap with next gap', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'dog', 5); | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'dog', weight: 10 + 5 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'dog', weight: 10 + 5 }, | ||
{ firstKey: 'qux', lastKey: 'quz', weight: 20 }, | ||
]); | ||
}); | ||
|
||
it('should extend a gap with single-key overlap with next gap', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'qux', 100); | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 }, | ||
]); | ||
}); | ||
|
||
it('should extend a gap with overlap with next gap and large extra weight', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'quxxx', 100); | ||
// updated weight is the sum of current range weight and extra weight | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 100 }, | ||
]); | ||
}); | ||
|
||
it('should extend a gap with overlap with next gap and small extra weight', () => { | ||
const gap = gapsArray[0]; // 'bar' -> 'baz' (weight=10) | ||
const extendedGap = gapSet.extendGap(gap, 'quxxx', 3); | ||
// updated weight is the sum of merged ranges' weights | ||
expect(extendedGap).toEqual({ firstKey: 'bar', lastKey: 'quz', weight: 10 + 20 }); | ||
expect(gapSet.toArray()).toEqual([ | ||
{ firstKey: 'bar', lastKey: 'quz', weight: 10 + 20 }, | ||
]); | ||
}); | ||
|
||
it('should extend a single-key gap with no other gap', () => { | ||
const singleKeyGap = { firstKey: 'foo', lastKey: 'foo', weight: 10 }; | ||
const singleKeyGapSet = GapSet.createFromArray([singleKeyGap]); | ||
const extendedGap = singleKeyGapSet.extendGap(singleKeyGap, 'qux', 20); | ||
expect(extendedGap).toEqual({ firstKey: 'foo', lastKey: 'qux', weight: 10 + 20 }); | ||
expect(singleKeyGapSet.toArray()).toEqual([ | ||
{ firstKey: 'foo', lastKey: 'qux', weight: 10 + 20 }, | ||
]); | ||
}); | ||
|
||
it('should extend a single-key gap with overlap on chained gaps', () => { | ||
const gap = gapsArrayWithChain[0]; // 'ape' -> 'ape' (weight=1) | ||
const extendedGap = gapSetWithChain.extendGap(gap, 'dog', 30); | ||
expect(extendedGap).toEqual({ firstKey: 'ape', lastKey: 'qux', weight: 1 + 30 }); | ||
expect(gapSetWithChain.toArray()).toEqual([ | ||
{ firstKey: 'ape', lastKey: 'qux', weight: 1 + 30 }, | ||
{ firstKey: 'qux', lastKey: 'quz', weight: 20 }, | ||
{ firstKey: 'quz', lastKey: 'rat', weight: 25 }, | ||
{ firstKey: 'rat', lastKey: 'yak', weight: 30 }, | ||
]); | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters