From 326a17598729567b737c8e5fa1b6635ef8d6d3a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 15:26:02 +0100 Subject: [PATCH 01/41] Make DictionaryCache have better expiry properties --- synapse/util/caches/dictionary_cache.py | 137 +++++++++++++++++++----- synapse/util/caches/lrucache.py | 24 ++++- synapse/util/caches/treecache.py | 11 ++ 3 files changed, 145 insertions(+), 27 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index d267703df0f2..ef4483421ed3 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -14,11 +14,23 @@ import enum import logging import threading -from typing import Any, Dict, Generic, Iterable, Optional, Set, TypeVar +from typing import ( + Any, + Dict, + Generic, + Iterable, + Literal, + Optional, + Set, + Tuple, + TypeVar, + Union, +) import attr from synapse.util.caches.lrucache import LruCache +from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_items logger = logging.getLogger(__name__) @@ -53,20 +65,40 @@ def __len__(self) -> int: return len(self.value) +class _FullCacheKey(enum.Enum): + KEY = object() + + class _Sentinel(enum.Enum): # defining a sentinel in this way allows mypy to correctly handle the # type of a dictionary lookup. sentinel = object() +class _PerKeyValue(Generic[DV]): + __slots__ = ["value"] + + def __init__(self, value: Union[DV, Literal[_Sentinel.sentinel]]) -> None: + self.value = value + + def __len__(self) -> int: + return 1 + + class DictionaryCache(Generic[KT, DKT, DV]): """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. """ def __init__(self, name: str, max_entries: int = 1000): - self.cache: LruCache[KT, DictionaryEntry] = LruCache( - max_size=max_entries, cache_name=name, size_callback=len + self.cache: LruCache[ + Tuple[KT, Union[DKT, Literal[_FullCacheKey.KEY]]], + Union[_PerKeyValue, Dict[DKT, DV]], + ] = LruCache( + max_size=max_entries, + cache_name=name, + cache_type=TreeCache, + size_callback=len, ) self.name = name @@ -96,20 +128,69 @@ def get( Returns: DictionaryEntry """ - entry = self.cache.get(key, _Sentinel.sentinel) - if entry is not _Sentinel.sentinel: - if dict_keys is None: - return DictionaryEntry( - entry.full, entry.known_absent, dict(entry.value) - ) + + if dict_keys is None: + entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel) + if entry is not _Sentinel.sentinel: + assert isinstance(entry, dict) + return DictionaryEntry(True, set(), entry) + + all_entries = self.cache.get_multi( + (key,), _Sentinel.sentinel, update_metrics=False + ) + if all_entries is _Sentinel.sentinel: + return DictionaryEntry(False, set(), {}) + + values = {} + known_absent = set() + for dict_key, dict_value in iterate_tree_cache_items((), all_entries): + dict_key = dict_key[0] + dict_value = dict_value.value + + assert isinstance(dict_value, _PerKeyValue) + if dict_value.value is _Sentinel.sentinel: + known_absent.add(dict_key) + else: + values[dict_key] = dict_value.value + + return DictionaryEntry(False, known_absent, values) + + values = {} + known_absent = set() + missing = set() + for dict_key in dict_keys: + entry = self.cache.get((key, dict_key), _Sentinel.sentinel) + if entry is _Sentinel.sentinel: + missing.add(dict_key) + continue + + assert isinstance(entry, _PerKeyValue) + + if entry.value is _Sentinel.sentinel: + known_absent.add(entry.value) else: - return DictionaryEntry( - entry.full, - entry.known_absent, - {k: entry.value[k] for k in dict_keys if k in entry.value}, - ) + values[dict_key] = entry.value - return DictionaryEntry(False, set(), {}) + if not missing: + return DictionaryEntry(False, known_absent, values) + + entry = self.cache.get( + (key, _FullCacheKey.KEY), _Sentinel.sentinel, update_metrics=False + ) + if entry is _Sentinel.sentinel: + return DictionaryEntry(False, known_absent, values) + + assert isinstance(entry, dict) + + values = {} + for dict_key in dict_keys: + value = entry.get(dict_key, _Sentinel.sentinel) # type: ignore[arg-type] + self.cache[(key, dict_key)] = _PerKeyValue(value) + + if value is not _Sentinel.sentinel: + values[dict_key] = value + + return DictionaryEntry(True, set(), values) def invalidate(self, key: KT) -> None: self.check_thread() @@ -117,7 +198,9 @@ def invalidate(self, key: KT) -> None: # Increment the sequence number so that any SELECT statements that # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 - self.cache.pop(key, None) + + # Del-multi accepts truncated tuples. + self.cache.del_multi((key,)) # type: ignore[arg-type] def invalidate_all(self) -> None: self.check_thread() @@ -149,20 +232,22 @@ def update( # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) if fetched_keys is None: - self._insert(key, value, set()) + self._insert(key, value) else: self._update_or_insert(key, value, fetched_keys) def _update_or_insert( - self, key: KT, value: Dict[DKT, DV], known_absent: Iterable[DKT] + self, key: KT, value: Dict[DKT, DV], fetched_keys: Iterable[DKT] ) -> None: - # We pop and reinsert as we need to tell the cache the size may have - # changed - entry: DictionaryEntry = self.cache.pop(key, DictionaryEntry(False, set(), {})) - entry.value.update(value) - entry.known_absent.update(known_absent) - self.cache[key] = entry + for dict_key, dict_value in value.items(): + self.cache[(key, dict_key)] = _PerKeyValue(dict_value) + + for dict_key in fetched_keys: + if (key, dict_key) in self.cache: + continue + + self.cache[(key, dict_key)] = _PerKeyValue(_Sentinel.sentinel) - def _insert(self, key: KT, value: Dict[DKT, DV], known_absent: Set[DKT]) -> None: - self.cache[key] = DictionaryEntry(True, known_absent, value) + def _insert(self, key: KT, value: Dict[DKT, DV]) -> None: + self.cache[(key, _FullCacheKey.KEY)] = value diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 31f41fec8284..b2b999cd6022 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -44,7 +44,11 @@ from synapse.metrics.jemalloc import get_jemalloc_stats from synapse.util import Clock, caches from synapse.util.caches import CacheMetric, EvictionReason, register_cache -from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry +from synapse.util.caches.treecache import ( + TreeCache, + TreeCacheNode, + iterate_tree_cache_entry, +) from synapse.util.linked_list import ListNode if TYPE_CHECKING: @@ -568,6 +572,22 @@ def cache_get( metrics.inc_misses() return default + @synchronized + def cache_get_multi( + key: tuple, + default: Optional[T] = None, + update_metrics: bool = True, + ) -> Union[None, T, TreeCacheNode]: + node = cache.get(key, None) # type: ignore[arg-type] + if node is not None: + if update_metrics and metrics: + metrics.inc_hits() + return node # type: ignore[return-value] + else: + if update_metrics and metrics: + metrics.inc_misses() + return default + @synchronized def cache_set( key: KT, value: VT, callbacks: Collection[Callable[[], None]] = () @@ -674,6 +694,8 @@ def cache_contains(key: KT) -> bool: self.setdefault = cache_set_default self.pop = cache_pop self.del_multi = cache_del_multi + if cache_type is TreeCache: + self.get_multi = cache_get_multi # `invalidate` is exposed for consistency with DeferredCache, so that it can be # invalidated by the cache invalidation replication stream. self.invalidate = cache_del_multi diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index e78305f78746..c089cb89f416 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -139,3 +139,14 @@ def iterate_tree_cache_entry(d): yield from iterate_tree_cache_entry(value_d) else: yield d + + +def iterate_tree_cache_items(key, value): + """Helper function to iterate over the leaves of a tree, i.e. a dict of that + can contain dicts. + """ + if isinstance(value, TreeCacheNode): + for sub_key, sub_value in value.items(): + yield from iterate_tree_cache_items((*key, sub_key), sub_value) + else: + yield key, value From 40a8fba5f655d630700a3319834e1abc0fa7f3e4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 15:27:03 +0100 Subject: [PATCH 02/41] Newsfile --- changelog.d/13292.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13292.misc diff --git a/changelog.d/13292.misc b/changelog.d/13292.misc new file mode 100644 index 000000000000..67fec55330b0 --- /dev/null +++ b/changelog.d/13292.misc @@ -0,0 +1 @@ +Make `DictionaryCache` expire full entries if they haven't been queried in a while, even if specific keys have been queried recently. From a22716c5c58f1ee80b5e830fd433b6d8b0b19fa2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 15:31:57 +0100 Subject: [PATCH 03/41] Fix literal --- synapse/util/caches/dictionary_cache.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index ef4483421ed3..810b8bb71645 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -14,20 +14,10 @@ import enum import logging import threading -from typing import ( - Any, - Dict, - Generic, - Iterable, - Literal, - Optional, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union import attr +from typing_extensions import Literal from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_items From f046366d2ada68dcbf533b7feee57cef0cb6e682 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 15:54:01 +0100 Subject: [PATCH 04/41] Fix test --- tests/storage/test_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8043bdbde2c7..58a85415e12b 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -369,7 +369,7 @@ def test_get_state_for_event(self): state_dict_ids = cache_entry.value self.assertEqual(cache_entry.full, False) - self.assertEqual(cache_entry.known_absent, {(e1.type, e1.state_key)}) + self.assertEqual(cache_entry.known_absent, set()) self.assertDictEqual(state_dict_ids, {(e1.type, e1.state_key): e1.event_id}) ############################################ From 602a81f5a2098ca29e9ffb89cf13d12471884854 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 16:24:10 +0100 Subject: [PATCH 05/41] don't update access --- synapse/util/caches/dictionary_cache.py | 5 ++++- synapse/util/caches/lrucache.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 810b8bb71645..4e5a96b5b434 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -165,7 +165,10 @@ def get( return DictionaryEntry(False, known_absent, values) entry = self.cache.get( - (key, _FullCacheKey.KEY), _Sentinel.sentinel, update_metrics=False + (key, _FullCacheKey.KEY), + _Sentinel.sentinel, + update_metrics=False, + update_last_access=False, ) if entry is _Sentinel.sentinel: return DictionaryEntry(False, known_absent, values) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index b2b999cd6022..aab0d1c7e296 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -559,10 +559,12 @@ def cache_get( default: Optional[T] = None, callbacks: Collection[Callable[[], None]] = (), update_metrics: bool = True, + update_last_access: bool = True, ) -> Union[None, T, VT]: node = cache.get(key, None) if node is not None: - move_node_to_front(node) + if update_last_access: + move_node_to_front(node) node.add_callbacks(callbacks) if update_metrics and metrics: metrics.inc_hits() From 23c2f394a55f9f3965f3e13cf01c4912545738d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 16:30:56 +0100 Subject: [PATCH 06/41] Fix mypy --- synapse/util/caches/lrucache.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index aab0d1c7e296..def7d5e8cbf4 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -541,6 +541,7 @@ def cache_get( default: Literal[None] = None, callbacks: Collection[Callable[[], None]] = ..., update_metrics: bool = ..., + update_last_access: bool = ..., ) -> Optional[VT]: ... @@ -550,6 +551,7 @@ def cache_get( default: T, callbacks: Collection[Callable[[], None]] = ..., update_metrics: bool = ..., + update_last_access: bool = ..., ) -> Union[T, VT]: ... From cad555f07c20add1d754ab824d53540bd07b5652 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 16:31:53 +0100 Subject: [PATCH 07/41] Better stuff --- synapse/storage/databases/state/store.py | 6 +++++- synapse/util/caches/dictionary_cache.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index afbc85ad0c3b..0e38e5b5e45e 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -202,7 +202,11 @@ def _get_state_for_group_using_cache( requests state from the cache, if False we need to query the DB for the missing state. """ - cache_entry = cache.get(group) + dict_keys = None + if not state_filter.has_wildcards(): + dict_keys = state_filter.concrete_types() + + cache_entry = cache.get(group, dict_keys=dict_keys) state_dict_ids = cache_entry.value if cache_entry.full or state_filter.is_full(): diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 4e5a96b5b434..869a267e94e6 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -126,7 +126,8 @@ def get( return DictionaryEntry(True, set(), entry) all_entries = self.cache.get_multi( - (key,), _Sentinel.sentinel, update_metrics=False + (key,), + _Sentinel.sentinel, ) if all_entries is _Sentinel.sentinel: return DictionaryEntry(False, set(), {}) @@ -167,7 +168,6 @@ def get( entry = self.cache.get( (key, _FullCacheKey.KEY), _Sentinel.sentinel, - update_metrics=False, update_last_access=False, ) if entry is _Sentinel.sentinel: From 7aceec3ed90b2a12a9f72b4ca72cb1c0682bcb67 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 15 Jul 2022 16:52:16 +0100 Subject: [PATCH 08/41] Fix up --- synapse/util/caches/dictionary_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 869a267e94e6..aa080c2d9e7b 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -158,7 +158,7 @@ def get( assert isinstance(entry, _PerKeyValue) if entry.value is _Sentinel.sentinel: - known_absent.add(entry.value) + known_absent.add(dict_key) else: values[dict_key] = entry.value From 057ae8b61c288df851a4666f8b174ba5575be403 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Sun, 17 Jul 2022 11:34:34 +0100 Subject: [PATCH 09/41] Comments --- synapse/storage/databases/state/store.py | 3 ++ synapse/util/caches/dictionary_cache.py | 69 +++++++++++++++++++++--- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 0e38e5b5e45e..bb64543c1f2d 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -202,6 +202,9 @@ def _get_state_for_group_using_cache( requests state from the cache, if False we need to query the DB for the missing state. """ + # If we are asked explicitly for a subset of keys, we only ask for those + # from the cache. This ensures that the `DictionaryCache` can make + # better decisions about what to cache and what to expire. dict_keys = None if not state_filter.has_wildcards(): dict_keys = state_filter.concrete_types() diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index aa080c2d9e7b..dd1fc9a63045 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -56,6 +56,8 @@ def __len__(self) -> int: class _FullCacheKey(enum.Enum): + """The key we use to cache the full dict.""" + KEY = object() @@ -66,12 +68,18 @@ class _Sentinel(enum.Enum): class _PerKeyValue(Generic[DV]): + """The cached value of a dictionary key. If `value` is the sentinel, + indicates that the requested key is known to *not* be in the full dict. + """ + __slots__ = ["value"] def __init__(self, value: Union[DV, Literal[_Sentinel.sentinel]]) -> None: self.value = value def __len__(self) -> int: + # We add a `__len__` implementation as we use this class in a cache + # where the values are variable length. return 1 @@ -81,6 +89,25 @@ class DictionaryCache(Generic[KT, DKT, DV]): """ def __init__(self, name: str, max_entries: int = 1000): + # We use a single cache to cache two different types of entries: + # 1. Map from (key, dict_key) -> dict value (or sentinel, indicating + # the key doesn't exist in the dict); and + # 2. Map from (key, _FullCacheKey.KEY) -> full dict. + # + # The former is used when explicit keys of the dictionary are looked up, + # and the latter when the full dictionary is requested. + # + # If when explicit keys are requested and not in the cache, we then look + # to see if we have the full dict and use that if we do. If found in the + # full dict each key is added into the cache. + # + # This set up allows the `LruCache` to prune the full dict entries if + # they haven't been used in a while, even when there have been recent + # queries for subsets of the dict. + # + # Typing: + # * A key of `(KT, DKT)` has a value of `_PerKeyValue` + # * A key of `(KT, _FullCacheKey.KEY)` has a value of `Dict[DKT, DV]` self.cache: LruCache[ Tuple[KT, Union[DKT, Literal[_FullCacheKey.KEY]]], Union[_PerKeyValue, Dict[DKT, DV]], @@ -120,11 +147,13 @@ def get( """ if dict_keys is None: + # First we check if we have cached the full dict. entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel) if entry is not _Sentinel.sentinel: assert isinstance(entry, dict) return DictionaryEntry(True, set(), entry) + # If not, check if we have cached any of dict keys. all_entries = self.cache.get_multi( (key,), _Sentinel.sentinel, @@ -132,13 +161,21 @@ def get( if all_entries is _Sentinel.sentinel: return DictionaryEntry(False, set(), {}) + # If there are entries we need to unwrap the returned cache nodes + # and `_PerKeyValue` into the `DictionaryEntry`. values = {} known_absent = set() for dict_key, dict_value in iterate_tree_cache_items((), all_entries): dict_key = dict_key[0] dict_value = dict_value.value + # We have explicitly looked for a full cache key, so we + # shouldn't see one. + assert dict_key != _FullCacheKey.KEY + + # ... therefore the values must be `_PerKeyValue` assert isinstance(dict_value, _PerKeyValue) + if dict_value.value is _Sentinel.sentinel: known_absent.add(dict_key) else: @@ -146,6 +183,10 @@ def get( return DictionaryEntry(False, known_absent, values) + # We are being asked for a subset of keys. + + # First got and check for each requested dict key in the cache, tracking + # which we couldn't find. values = {} known_absent = set() missing = set() @@ -162,21 +203,32 @@ def get( else: values[dict_key] = entry.value + # If we found everything we can return immediately. if not missing: return DictionaryEntry(False, known_absent, values) + # If we are missing any keys check if we happen to have the full dict in + # the cache. + # + # We don't update the last access time for this cache fetch, as we + # aren't explicitly interested in the full dict and so we don't want + # requests for explicit dict keys to keep the full dict in the cache. entry = self.cache.get( (key, _FullCacheKey.KEY), _Sentinel.sentinel, update_last_access=False, ) if entry is _Sentinel.sentinel: + # Not in the cache, return the subset of keys we found. return DictionaryEntry(False, known_absent, values) + # We have the full dict! assert isinstance(entry, dict) values = {} for dict_key in dict_keys: + # We explicitly add each dict key to the cache, so that cache hit + # rates for each key can be tracked separately. value = entry.get(dict_key, _Sentinel.sentinel) # type: ignore[arg-type] self.cache[(key, dict_key)] = _PerKeyValue(value) @@ -225,13 +277,21 @@ def update( # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) if fetched_keys is None: - self._insert(key, value) + self.cache[(key, _FullCacheKey.KEY)] = value else: - self._update_or_insert(key, value, fetched_keys) + self._update_subset(key, value, fetched_keys) - def _update_or_insert( + def _update_subset( self, key: KT, value: Dict[DKT, DV], fetched_keys: Iterable[DKT] ) -> None: + """Add the given dictionary values as explicit keys in the cache. + + Args: + key + value: The dictionary with all the values that we should cache + fetched_keys: The full set of keys that were looked up, any keys + here not in `value` should be marked as "known absent". + """ for dict_key, dict_value in value.items(): self.cache[(key, dict_key)] = _PerKeyValue(dict_value) @@ -241,6 +301,3 @@ def _update_or_insert( continue self.cache[(key, dict_key)] = _PerKeyValue(_Sentinel.sentinel) - - def _insert(self, key: KT, value: Dict[DKT, DV]) -> None: - self.cache[(key, _FullCacheKey.KEY)] = value From 7f7b36d56dbb0c85baa002d0ac4072a96f91163a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Jul 2022 10:40:36 +0100 Subject: [PATCH 10/41] Comment LruCache --- synapse/util/caches/lrucache.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index def7d5e8cbf4..e0aeee8df6d9 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -576,17 +576,37 @@ def cache_get( metrics.inc_misses() return default + @overload + def cache_get_multi( + key: tuple, + default: Literal[None] = None, + update_metrics: bool = True, + ) -> Union[None, TreeCacheNode]: + ... + + @overload + def cache_get_multi( + key: tuple, + default: T, + update_metrics: bool = True, + ) -> Union[T, TreeCacheNode]: + ... + @synchronized def cache_get_multi( key: tuple, default: Optional[T] = None, update_metrics: bool = True, ) -> Union[None, T, TreeCacheNode]: - node = cache.get(key, None) # type: ignore[arg-type] + """Used only for `TreeCache` to fetch a subtree.""" + + assert isinstance(cache, TreeCache) + + node = cache.get(key, None) if node is not None: if update_metrics and metrics: metrics.inc_hits() - return node # type: ignore[return-value] + return node else: if update_metrics and metrics: metrics.inc_misses() From 462db2a1714e87fdb04b4a31a4d168d19eebbce8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Jul 2022 10:48:24 +0100 Subject: [PATCH 11/41] Comment LruCache --- synapse/util/caches/lrucache.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index e0aeee8df6d9..6f95c1354e7e 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -563,6 +563,21 @@ def cache_get( update_metrics: bool = True, update_last_access: bool = True, ) -> Union[None, T, VT]: + """Lookup a key in the cache + + Args: + key + default + callbacks: A collection of callbacks that will fire when the + node is removed from the cache (either due to invalidation + or expiry). + update_metrics: Whether to update the hit rate metrics + update_last_access: Whether to update the last access metrics + on a node if successfully fetched. These metrics are used + to determine when to remove the node from the cache. Set + to False if this fetch should *not* prevent a node from + being expired. + """ node = cache.get(key, None) if node is not None: if update_last_access: From 129691f19096c2dc0c652a6eeceebffd60978545 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Jul 2022 10:49:12 +0100 Subject: [PATCH 12/41] Comment TreeCache --- synapse/util/caches/treecache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index c089cb89f416..c0136ea269d3 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -144,6 +144,9 @@ def iterate_tree_cache_entry(d): def iterate_tree_cache_items(key, value): """Helper function to iterate over the leaves of a tree, i.e. a dict of that can contain dicts. + + Returns: + A generator yielding key/value pairs. """ if isinstance(value, TreeCacheNode): for sub_key, sub_value in value.items(): From ca8e1afa10a1a396e7a7985b9e78260cc61f4296 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:09:21 +0100 Subject: [PATCH 13/41] Update synapse/util/caches/dictionary_cache.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index dd1fc9a63045..21b7b4282e3a 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -153,7 +153,7 @@ def get( assert isinstance(entry, dict) return DictionaryEntry(True, set(), entry) - # If not, check if we have cached any of dict keys. + # If not, check if we have cached any dict keys at all for this cache key. all_entries = self.cache.get_multi( (key,), _Sentinel.sentinel, From e4723df752d9865a280b71088c512002960db01a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:10:05 +0100 Subject: [PATCH 14/41] Update synapse/util/caches/lrucache.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/lrucache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 6f95c1354e7e..c26dce94747c 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -563,7 +563,7 @@ def cache_get( update_metrics: bool = True, update_last_access: bool = True, ) -> Union[None, T, VT]: - """Lookup a key in the cache + """Look up a key in the cache Args: key From a74baa6b371e02e43758dfbae7891b183b921c46 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:09:09 +0100 Subject: [PATCH 15/41] Split out code to separate method --- synapse/util/caches/dictionary_cache.py | 79 ++++++++++++++----------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 21b7b4282e3a..c3131ad134b2 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -147,41 +147,7 @@ def get( """ if dict_keys is None: - # First we check if we have cached the full dict. - entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel) - if entry is not _Sentinel.sentinel: - assert isinstance(entry, dict) - return DictionaryEntry(True, set(), entry) - - # If not, check if we have cached any dict keys at all for this cache key. - all_entries = self.cache.get_multi( - (key,), - _Sentinel.sentinel, - ) - if all_entries is _Sentinel.sentinel: - return DictionaryEntry(False, set(), {}) - - # If there are entries we need to unwrap the returned cache nodes - # and `_PerKeyValue` into the `DictionaryEntry`. - values = {} - known_absent = set() - for dict_key, dict_value in iterate_tree_cache_items((), all_entries): - dict_key = dict_key[0] - dict_value = dict_value.value - - # We have explicitly looked for a full cache key, so we - # shouldn't see one. - assert dict_key != _FullCacheKey.KEY - - # ... therefore the values must be `_PerKeyValue` - assert isinstance(dict_value, _PerKeyValue) - - if dict_value.value is _Sentinel.sentinel: - known_absent.add(dict_key) - else: - values[dict_key] = dict_value.value - - return DictionaryEntry(False, known_absent, values) + return self._get_full_dict(key) # We are being asked for a subset of keys. @@ -237,6 +203,49 @@ def get( return DictionaryEntry(True, set(), values) + def _get_full_dict( + self, + key: KT, + ) -> DictionaryEntry: + """Fetch the full dict for the given key.""" + + # First we check if we have cached the full dict. + entry = self.cache.get((key, _FullCacheKey.KEY), _Sentinel.sentinel) + if entry is not _Sentinel.sentinel: + assert isinstance(entry, dict) + return DictionaryEntry(True, set(), entry) + + # If not, check if we have cached any dict keys at all for this cache + # key. + all_entries = self.cache.get_multi( + (key,), + _Sentinel.sentinel, + ) + if all_entries is _Sentinel.sentinel: + return DictionaryEntry(False, set(), {}) + + # If there are entries we need to unwrap the returned cache nodes + # and `_PerKeyValue` into the `DictionaryEntry`. + values = {} + known_absent = set() + for dict_key, dict_value in iterate_tree_cache_items((), all_entries): + dict_key = dict_key[0] + dict_value = dict_value.value + + # We have explicitly looked for a full cache key, so we + # shouldn't see one. + assert dict_key != _FullCacheKey.KEY + + # ... therefore the values must be `_PerKeyValue` + assert isinstance(dict_value, _PerKeyValue) + + if dict_value.value is _Sentinel.sentinel: + known_absent.add(dict_key) + else: + values[dict_key] = dict_value.value + + return DictionaryEntry(False, known_absent, values) + def invalidate(self, key: KT) -> None: self.check_thread() From a9ebcd2a7c578e62800db5306e5b63f02825a8fc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:17:15 +0100 Subject: [PATCH 16/41] Mark DictionaryEntry as frozen --- synapse/util/caches/dictionary_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index c3131ad134b2..8266d177074b 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -35,7 +35,7 @@ # This class can't be generic because it uses slots with attrs. # See: https://github.com/python-attrs/attrs/issues/313 -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class DictionaryEntry: # should be: Generic[DKT, DV]. """Returned when getting an entry from the cache From 6a76dbac7507dcc361b59d98a90e1b02b4e7a88f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:19:30 +0100 Subject: [PATCH 17/41] Don't reuse vars --- synapse/util/caches/dictionary_cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 8266d177074b..51d8d88eaa42 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -228,9 +228,9 @@ def _get_full_dict( # and `_PerKeyValue` into the `DictionaryEntry`. values = {} known_absent = set() - for dict_key, dict_value in iterate_tree_cache_items((), all_entries): - dict_key = dict_key[0] - dict_value = dict_value.value + for key_tuple, node in iterate_tree_cache_items((), all_entries): + dict_key = key_tuple[0] + dict_value = node.value # We have explicitly looked for a full cache key, so we # shouldn't see one. From d4133b24c7fb8a52d0b3cee08b2a550f3d924569 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:23:27 +0100 Subject: [PATCH 18/41] Add example --- synapse/util/caches/treecache.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index c0136ea269d3..ccd4b3367f2f 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -145,6 +145,20 @@ def iterate_tree_cache_items(key, value): """Helper function to iterate over the leaves of a tree, i.e. a dict of that can contain dicts. + The provided key is a tuple that will get prepended to the returned keys. + + Example: + + cache = TreeCache() + cache[(1, 1)] = "a" + cache[(1, 2)] = "b" + cache[(2, 1)] = "c" + + tree_node = cache.get((1,)) + + items = iterate_tree_cache_items((1,), tree_node) + assert list(items) == [((1, 1), "a"), ((1, 2), "b")] + Returns: A generator yielding key/value pairs. """ From 88aa56c9ed0820ac1e0487d17a031893d559c611 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Jul 2022 13:57:12 +0100 Subject: [PATCH 19/41] Make `LruCacheget_multi` return something sane. --- synapse/util/caches/dictionary_cache.py | 8 ++++---- synapse/util/caches/lrucache.py | 27 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 51d8d88eaa42..62bb6689505d 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -20,7 +20,7 @@ from typing_extensions import Literal from synapse.util.caches.lrucache import LruCache -from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_items +from synapse.util.caches.treecache import TreeCache logger = logging.getLogger(__name__) @@ -228,9 +228,9 @@ def _get_full_dict( # and `_PerKeyValue` into the `DictionaryEntry`. values = {} known_absent = set() - for key_tuple, node in iterate_tree_cache_items((), all_entries): - dict_key = key_tuple[0] - dict_value = node.value + for cache_key, dict_value in all_entries: + # The key used for the `TreeCache` is `(key, dict_key)` + dict_key = cache_key[1] # We have explicitly looked for a full cache key, so we # shouldn't see one. diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index c26dce94747c..a3fc0940e968 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -24,9 +24,11 @@ Callable, Collection, Dict, + Generator, Generic, List, Optional, + Tuple, Type, TypeVar, Union, @@ -46,8 +48,8 @@ from synapse.util.caches import CacheMetric, EvictionReason, register_cache from synapse.util.caches.treecache import ( TreeCache, - TreeCacheNode, iterate_tree_cache_entry, + iterate_tree_cache_items, ) from synapse.util.linked_list import ListNode @@ -596,7 +598,7 @@ def cache_get_multi( key: tuple, default: Literal[None] = None, update_metrics: bool = True, - ) -> Union[None, TreeCacheNode]: + ) -> Union[None, Generator[Tuple[KT, VT], None, None]]: ... @overload @@ -604,7 +606,7 @@ def cache_get_multi( key: tuple, default: T, update_metrics: bool = True, - ) -> Union[T, TreeCacheNode]: + ) -> Union[T, Generator[Tuple[KT, VT], None, None]]: ... @synchronized @@ -612,8 +614,15 @@ def cache_get_multi( key: tuple, default: Optional[T] = None, update_metrics: bool = True, - ) -> Union[None, T, TreeCacheNode]: - """Used only for `TreeCache` to fetch a subtree.""" + ) -> Union[None, T, Generator[Tuple[KT, VT], None, None]]: + """Returns a generator yielding all entries under the given key. + + Can only be used if backed by a tree cache. + + Returns: + Either default if the key doesn't exist, or a generator of the + key/value pairs. + """ assert isinstance(cache, TreeCache) @@ -621,7 +630,13 @@ def cache_get_multi( if node is not None: if update_metrics and metrics: metrics.inc_hits() - return node + + # Iterating over the node will return values of type `_Node`, + # which we need to unwrap. + return ( + (full_key, lru_node.value) + for full_key, lru_node in iterate_tree_cache_items(key, node) + ) else: if update_metrics and metrics: metrics.inc_misses() From f053edb746fa78cfa7f03a6f9fbd21360f8b1615 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 15:05:45 +0100 Subject: [PATCH 20/41] Woo comments Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 37 ++++++++++++++++++++----- synapse/util/caches/lrucache.py | 2 +- synapse/util/caches/treecache.py | 1 + 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 62bb6689505d..722efa520cdb 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -86,10 +86,33 @@ def __len__(self) -> int: class DictionaryCache(Generic[KT, DKT, DV]): """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. + + This cache has two levels of key. First there is the "cache key" (of type + `KT`), which maps to a dict. The keys to that dict are the "dict key" (of + type `DKT`). The overall structure is therefore `KT->DKT->DV`. For + example, it might look like: + + { + 1: { 1: "a", 2: "b" }, + 2: { 1: "c" }, + } + + It is possible to look up either individual dict keys, or the *complete* + dict for a given cache key. + + Each dict item, and the complete dict is treated as a separate LRU + entry for the purpose of cache expiry. For example, given: + dict_cache.get(1, None) -> DictionaryEntry({1: "a", 2: "b"}) + dict_cache.get(1, [1]) -> DictionaryEntry({1: "a"}) + dict_cache.get(1, [2]) -> DictionaryEntry({2: "b"}) + + ... then the cache entry for the complete dict will expire first, + followed by the cache entry for the '1' dict key, and finally that + for the '2' dict key. """ def __init__(self, name: str, max_entries: int = 1000): - # We use a single cache to cache two different types of entries: + # We use a single LruCache to store two different types of entries: # 1. Map from (key, dict_key) -> dict value (or sentinel, indicating # the key doesn't exist in the dict); and # 2. Map from (key, _FullCacheKey.KEY) -> full dict. @@ -145,13 +168,13 @@ def get( Returns: DictionaryEntry """ - if dict_keys is None: + # The caller wants the full set of dictionary keys for this cache key return self._get_full_dict(key) # We are being asked for a subset of keys. - # First got and check for each requested dict key in the cache, tracking + # First go and check for each requested dict key in the cache, tracking # which we couldn't find. values = {} known_absent = set() @@ -173,7 +196,7 @@ def get( if not missing: return DictionaryEntry(False, known_absent, values) - # If we are missing any keys check if we happen to have the full dict in + # We are missing some keys, so check if we happen to have the full dict in # the cache. # # We don't update the last access time for this cache fetch, as we @@ -194,7 +217,7 @@ def get( values = {} for dict_key in dict_keys: # We explicitly add each dict key to the cache, so that cache hit - # rates for each key can be tracked separately. + # rates and LRU times for each key can be tracked separately. value = entry.get(dict_key, _Sentinel.sentinel) # type: ignore[arg-type] self.cache[(key, dict_key)] = _PerKeyValue(value) @@ -229,7 +252,7 @@ def _get_full_dict( values = {} known_absent = set() for cache_key, dict_value in all_entries: - # The key used for the `TreeCache` is `(key, dict_key)` + # The keys in `self.cache` are `(cache_key, dict_key)` dict_key = cache_key[1] # We have explicitly looked for a full cache key, so we @@ -298,7 +321,7 @@ def _update_subset( Args: key value: The dictionary with all the values that we should cache - fetched_keys: The full set of keys that were looked up, any keys + fetched_keys: The full set of dict keys that were looked up. Any keys here not in `value` should be marked as "known absent". """ diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index a3fc0940e968..24ec088f08c1 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -631,7 +631,7 @@ def cache_get_multi( if update_metrics and metrics: metrics.inc_hits() - # Iterating over the node will return values of type `_Node`, + # We store entries in the `TreeCache` with values of type `_Node`, # which we need to unwrap. return ( (full_key, lru_node.value) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index ccd4b3367f2f..858c3ecf49ad 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -166,4 +166,5 @@ def iterate_tree_cache_items(key, value): for sub_key, sub_value in value.items(): yield from iterate_tree_cache_items((*key, sub_key), sub_value) else: + # we've reached a leaf of the tree. yield key, value From 740fe2f2a760ed78e85eda7cf52a1c06de229d13 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 15:38:45 +0100 Subject: [PATCH 21/41] Remove use of `cache_key` Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 722efa520cdb..169a06dc4e2a 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -251,10 +251,7 @@ def _get_full_dict( # and `_PerKeyValue` into the `DictionaryEntry`. values = {} known_absent = set() - for cache_key, dict_value in all_entries: - # The keys in `self.cache` are `(cache_key, dict_key)` - dict_key = cache_key[1] - + for (_, dict_key), dict_value in all_entries: # We have explicitly looked for a full cache key, so we # shouldn't see one. assert dict_key != _FullCacheKey.KEY From 378aec51361f80a73edb8c90f4eb184fef2af11b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:15:44 +0100 Subject: [PATCH 22/41] More comments Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 169a06dc4e2a..7d3222782e75 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -316,7 +316,7 @@ def _update_subset( """Add the given dictionary values as explicit keys in the cache. Args: - key + key: top-level cache key value: The dictionary with all the values that we should cache fetched_keys: The full set of dict keys that were looked up. Any keys here not in `value` should be marked as "known absent". From 570903736390ce30b5ff939c047ca20dcec98907 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:21:49 +0100 Subject: [PATCH 23/41] Support values being removed from dict Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 7d3222782e75..4b2c35b930d7 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -326,7 +326,7 @@ def _update_subset( self.cache[(key, dict_key)] = _PerKeyValue(dict_value) for dict_key in fetched_keys: - if (key, dict_key) in self.cache: + if dict_key in value: continue self.cache[(key, dict_key)] = _PerKeyValue(_Sentinel.sentinel) From b376618ba3671c8a7d0be5787d1b996c2e8ae52b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 15:37:44 +0100 Subject: [PATCH 24/41] Fixup lint --- synapse/util/caches/dictionary_cache.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 4b2c35b930d7..8ea8e570d497 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -86,26 +86,26 @@ def __len__(self) -> int: class DictionaryCache(Generic[KT, DKT, DV]): """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. - - This cache has two levels of key. First there is the "cache key" (of type + + This cache has two levels of key. First there is the "cache key" (of type `KT`), which maps to a dict. The keys to that dict are the "dict key" (of type `DKT`). The overall structure is therefore `KT->DKT->DV`. For example, it might look like: - + { 1: { 1: "a", 2: "b" }, 2: { 1: "c" }, } - + It is possible to look up either individual dict keys, or the *complete* dict for a given cache key. - + Each dict item, and the complete dict is treated as a separate LRU entry for the purpose of cache expiry. For example, given: dict_cache.get(1, None) -> DictionaryEntry({1: "a", 2: "b"}) dict_cache.get(1, [1]) -> DictionaryEntry({1: "a"}) dict_cache.get(1, [2]) -> DictionaryEntry({2: "b"}) - + ... then the cache entry for the complete dict will expire first, followed by the cache entry for the '1' dict key, and finally that for the '2' dict key. From 12e14f2232ca86f7fd76f5b6f33a68ef4c25b0bd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 15:38:01 +0100 Subject: [PATCH 25/41] Make `missing` a list --- synapse/util/caches/dictionary_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 8ea8e570d497..06abca397917 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -178,11 +178,11 @@ def get( # which we couldn't find. values = {} known_absent = set() - missing = set() + missing = [] for dict_key in dict_keys: entry = self.cache.get((key, dict_key), _Sentinel.sentinel) if entry is _Sentinel.sentinel: - missing.add(dict_key) + missing.append(dict_key) continue assert isinstance(entry, _PerKeyValue) From fed7755bc99c10755c6db9714b9e2559b5803dd4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 15:38:17 +0100 Subject: [PATCH 26/41] Don't iterate twice over `dict_key` --- synapse/util/caches/dictionary_cache.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 06abca397917..32f73062b2b9 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -214,8 +214,7 @@ def get( # We have the full dict! assert isinstance(entry, dict) - values = {} - for dict_key in dict_keys: + for dict_key in missing: # We explicitly add each dict key to the cache, so that cache hit # rates and LRU times for each key can be tracked separately. value = entry.get(dict_key, _Sentinel.sentinel) # type: ignore[arg-type] From c9f13f3d05180e028a7a7e955afce812d6f7a50e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:17:49 +0100 Subject: [PATCH 27/41] Clarify comment --- synapse/util/caches/dictionary_cache.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 32f73062b2b9..8ce07106c6df 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -272,7 +272,11 @@ def invalidate(self, key: KT) -> None: # raced with the INSERT don't update the cache (SYN-369) self.sequence += 1 - # Del-multi accepts truncated tuples. + # We want to drop all information about the dict for the given key, so + # we use `del_multi` to delete it all in one go. + # + # We ignore the type error here `del_mutli` accepts a truncated key + # (when the key type is a tuple). self.cache.del_multi((key,)) # type: ignore[arg-type] def invalidate_all(self) -> None: From 67bb06d3e97621e330f34b5526b4421bf6c4931c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:31:38 +0100 Subject: [PATCH 28/41] Use iterable rather than generator --- synapse/util/caches/lrucache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 24ec088f08c1..789b9fd1d9c1 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -24,8 +24,8 @@ Callable, Collection, Dict, - Generator, Generic, + Iterable, List, Optional, Tuple, @@ -598,7 +598,7 @@ def cache_get_multi( key: tuple, default: Literal[None] = None, update_metrics: bool = True, - ) -> Union[None, Generator[Tuple[KT, VT], None, None]]: + ) -> Union[None, Iterable[Tuple[KT, VT]]]: ... @overload @@ -606,7 +606,7 @@ def cache_get_multi( key: tuple, default: T, update_metrics: bool = True, - ) -> Union[T, Generator[Tuple[KT, VT], None, None]]: + ) -> Union[T, Iterable[Tuple[KT, VT]]]: ... @synchronized @@ -614,7 +614,7 @@ def cache_get_multi( key: tuple, default: Optional[T] = None, update_metrics: bool = True, - ) -> Union[None, T, Generator[Tuple[KT, VT], None, None]]: + ) -> Union[None, T, Iterable[Tuple[KT, VT]]]: """Returns a generator yielding all entries under the given key. Can only be used if backed by a tree cache. From 2ec4caba159ed85c7833ecb84a53854a0931bac3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:34:02 +0100 Subject: [PATCH 29/41] Add example --- synapse/util/caches/lrucache.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 789b9fd1d9c1..b3bdedb04cad 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -619,6 +619,16 @@ def cache_get_multi( Can only be used if backed by a tree cache. + Example: + + cache = LruCache(10, cache_type=TreeCache) + cache[(1, 1)] = "a" + cache[(1, 2)] = "b" + cache[(2, 1)] = "c" + + items = cache.get_multi((1,)) + assert list(items) == [((1, 1), "a"), ((1, 2), "b")] + Returns: Either default if the key doesn't exist, or a generator of the key/value pairs. From 50bb90108c7cbd69d42ea564b1567cc6d3ff6de7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:44:00 +0100 Subject: [PATCH 30/41] Add doc to TreeCache.get --- synapse/util/caches/treecache.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index 858c3ecf49ad..64bc6724a7f8 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -64,6 +64,15 @@ def set(self, key, value) -> None: self.size += 1 def get(self, key, default=None): + """When `key` is a full key, fetches the value for the given key (if + any). + + If `key` is only a partial key (i.e. a truncated tuple) then returns a + `TreeCacheNode`, which can be passed to the `iterate_tree_cache_*` + functions to iterate over all values in the cache with keys that start + with the given partial key. + """ + node = self.root for k in key[:-1]: node = node.get(k, None) From aa203c67e855130d93160bcbab195c732d1b1a2e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 17:45:23 +0100 Subject: [PATCH 31/41] Note that if full is True known_absent must be empty --- synapse/util/caches/dictionary_cache.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 8ce07106c6df..3b9f8cf5da31 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -39,6 +39,8 @@ class DictionaryEntry: # should be: Generic[DKT, DV]. """Returned when getting an entry from the cache + If `full` is true then `known_absent` will be the empty set. + Attributes: full: Whether the cache has the full or dict or just some keys. If not full then not all requested keys will necessarily be present From 2151474987f91a85119a5bdeffebef3933ff0d12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 18:11:59 +0100 Subject: [PATCH 32/41] When fetching full dict don't return partial --- synapse/util/caches/dictionary_cache.py | 28 +------------------------ tests/util/test_dict_cache.py | 3 ++- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 3b9f8cf5da31..83275ba74a8e 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -239,33 +239,7 @@ def _get_full_dict( assert isinstance(entry, dict) return DictionaryEntry(True, set(), entry) - # If not, check if we have cached any dict keys at all for this cache - # key. - all_entries = self.cache.get_multi( - (key,), - _Sentinel.sentinel, - ) - if all_entries is _Sentinel.sentinel: - return DictionaryEntry(False, set(), {}) - - # If there are entries we need to unwrap the returned cache nodes - # and `_PerKeyValue` into the `DictionaryEntry`. - values = {} - known_absent = set() - for (_, dict_key), dict_value in all_entries: - # We have explicitly looked for a full cache key, so we - # shouldn't see one. - assert dict_key != _FullCacheKey.KEY - - # ... therefore the values must be `_PerKeyValue` - assert isinstance(dict_value, _PerKeyValue) - - if dict_value.value is _Sentinel.sentinel: - known_absent.add(dict_key) - else: - values[dict_key] = dict_value.value - - return DictionaryEntry(False, known_absent, values) + return DictionaryEntry(False, set(), {}) def invalidate(self, key: KT) -> None: self.check_thread() diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index bee66dee4328..ba9ad70f17b5 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -82,7 +82,7 @@ def test_multi_insert(self): test_value_2 = {"test2": "test_simple_cache_hit_miss_partial2"} self.cache.update(seq, key, test_value_2, fetched_keys=set("test2")) - c = self.cache.get(key) + c = self.cache.get(key, dict_keys=["test", "test2"]) self.assertEqual( { "test": "test_simple_cache_hit_miss_partial", @@ -90,3 +90,4 @@ def test_multi_insert(self): }, c.value, ) + self.assertEqual(c.full, False) From 43e0030916f7d96ce7720f3734c232ad2f20ca41 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 18:16:13 +0100 Subject: [PATCH 33/41] Fix test, set takes an iterable --- tests/util/test_dict_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index ba9ad70f17b5..ddca93e85221 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -76,11 +76,11 @@ def test_multi_insert(self): seq = self.cache.sequence test_value_1 = {"test": "test_simple_cache_hit_miss_partial"} - self.cache.update(seq, key, test_value_1, fetched_keys=set("test")) + self.cache.update(seq, key, test_value_1, fetched_keys={"test"}) seq = self.cache.sequence test_value_2 = {"test2": "test_simple_cache_hit_miss_partial2"} - self.cache.update(seq, key, test_value_2, fetched_keys=set("test2")) + self.cache.update(seq, key, test_value_2, fetched_keys={"test2"}) c = self.cache.get(key, dict_keys=["test", "test2"]) self.assertEqual( From 3c23161363f24a7bfccbbdd43217567a62b8a564 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Jul 2022 18:27:24 +0100 Subject: [PATCH 34/41] Add simple test for invalidation --- tests/util/test_dict_cache.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index ddca93e85221..966bfd7b8c63 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -20,7 +20,7 @@ class DictCacheTestCase(unittest.TestCase): def setUp(self): - self.cache = DictionaryCache("foobar") + self.cache = DictionaryCache("foobar", max_entries=10) def test_simple_cache_hit_full(self): key = "test_simple_cache_hit_full" @@ -91,3 +91,26 @@ def test_multi_insert(self): c.value, ) self.assertEqual(c.full, False) + + def test_invalidation(self): + """Test that the partial dict and full dicts get invalidated + separately. + """ + key = "some_key" + + seq = self.cache.sequence + self.cache.update(seq, key, {"a": "b", "c": "d"}) + + for i in range(20): + self.cache.get(key, ["a"]) + self.cache.update(seq, f"key{i}", {1: 2}) + + # We should have evicted the full dict... + r = self.cache.get(key) + self.assertFalse(r.full) + self.assertTrue("c" not in r.value) + + # ... but kept the "a" entry that we kept querying. + r = self.cache.get(key, dict_keys=["a"]) + self.assertFalse(r.full) + self.assertEqual(r.value, {"a": "b"}) From e5ef14db5c524f8c6b228efc896719ee99a662e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 09:32:52 +0100 Subject: [PATCH 35/41] Fix test now we don't return partial dicts --- tests/storage/test_state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 58a85415e12b..5564161750e6 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -370,7 +370,7 @@ def test_get_state_for_event(self): self.assertEqual(cache_entry.full, False) self.assertEqual(cache_entry.known_absent, set()) - self.assertDictEqual(state_dict_ids, {(e1.type, e1.state_key): e1.event_id}) + self.assertDictEqual(state_dict_ids, {}) ############################################ # test that things work with a partial cache @@ -387,7 +387,7 @@ def test_get_state_for_event(self): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) room_id = self.room.to_string() (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( @@ -412,7 +412,7 @@ def test_get_state_for_event(self): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, @@ -443,7 +443,7 @@ def test_get_state_for_event(self): ) self.assertEqual(is_all, False) - self.assertDictEqual({(e1.type, e1.state_key): e1.event_id}, state_dict) + self.assertDictEqual({}, state_dict) (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache( self.state_datastore._state_group_members_cache, From 10fb0d089e797e53a1e87003aef4b47898342625 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 13:51:49 +0100 Subject: [PATCH 36/41] Woo comments Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 2 +- synapse/util/caches/treecache.py | 2 +- tests/util/test_dict_cache.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 83275ba74a8e..aa17a14160e6 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -251,7 +251,7 @@ def invalidate(self, key: KT) -> None: # We want to drop all information about the dict for the given key, so # we use `del_multi` to delete it all in one go. # - # We ignore the type error here `del_mutli` accepts a truncated key + # We ignore the type error here: `del_multi` accepts a truncated key # (when the key type is a tuple). self.cache.del_multi((key,)) # type: ignore[arg-type] diff --git a/synapse/util/caches/treecache.py b/synapse/util/caches/treecache.py index 64bc6724a7f8..c1b8ec0c73eb 100644 --- a/synapse/util/caches/treecache.py +++ b/synapse/util/caches/treecache.py @@ -69,7 +69,7 @@ def get(self, key, default=None): If `key` is only a partial key (i.e. a truncated tuple) then returns a `TreeCacheNode`, which can be passed to the `iterate_tree_cache_*` - functions to iterate over all values in the cache with keys that start + functions to iterate over all entries in the cache with keys that start with the given partial key. """ diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index 966bfd7b8c63..e8b6246ab552 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -99,8 +99,11 @@ def test_invalidation(self): key = "some_key" seq = self.cache.sequence + # start by populating a "full dict" entry self.cache.update(seq, key, {"a": "b", "c": "d"}) + # add a bunch of individual entries, also keeping the individual + # entry for "a" warm. for i in range(20): self.cache.get(key, ["a"]) self.cache.update(seq, f"key{i}", {1: 2}) From d521be27307f2df0eb4121f7fe9bd044caed55de Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 13:53:15 +0100 Subject: [PATCH 37/41] Comment if dict_keys is None --- synapse/util/caches/dictionary_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index aa17a14160e6..504925236c6b 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -165,7 +165,8 @@ def get( Args: key dict_keys: If given a set of keys then return only those keys - that exist in the cache. + that exist in the cache. If None then returns the full dict + if it is in the cache. Returns: DictionaryEntry From 3c84a98e5b31277073432fb656dbed78b48e3d7c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 13:54:17 +0100 Subject: [PATCH 38/41] Document update doesn't invalidate --- synapse/util/caches/dictionary_cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 504925236c6b..1a27a1619803 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -268,7 +268,10 @@ def update( value: Dict[DKT, DV], fetched_keys: Optional[Iterable[DKT]] = None, ) -> None: - """Updates the entry in the cache + """Updates the entry in the cache. + + Note: This does *not* invalidate any existing entries for the `key`. + When the underlying data is changed `.invalidate(key)` must be called. Args: sequence From 9c54881a4b6ddc006268b5865525c325a9d9081c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:05:20 +0100 Subject: [PATCH 39/41] Update synapse/util/caches/dictionary_cache.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/util/caches/dictionary_cache.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 1a27a1619803..e2fcd120f691 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -271,7 +271,13 @@ def update( """Updates the entry in the cache. Note: This does *not* invalidate any existing entries for the `key`. - When the underlying data is changed `.invalidate(key)` must be called. + In particular, if we add an entry for the cached "full dict" with + `fetched_keys=None`, existing entries for individual dict keys are + not invalidated. Likewise, adding entries for individual keys does + not invalidate any cached value for the full dict. + + In other words: if the underlying data is *changed*, the cache must + be explicitly invalidated via `.invalidate()`. Args: sequence From daa2741f7bc5775aca2a85d986362a8d9fd7a6e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:05:43 +0100 Subject: [PATCH 40/41] Flesh out return value --- synapse/util/caches/dictionary_cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index e2fcd120f691..0ae926c60b0b 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -169,7 +169,10 @@ def get( if it is in the cache. Returns: - DictionaryEntry + DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry` + will contain include the keys that are in the cache. If None then + will either return the full dict if in the cache, or the empty + dict (with `full` set to False) if it isn't. """ if dict_keys is None: # The caller wants the full set of dictionary keys for this cache key From 055a5ddf1f9a351b0f77f0b58c208a5b2c60e23b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Jul 2022 15:15:53 +0100 Subject: [PATCH 41/41] Lint --- synapse/util/caches/dictionary_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 0ae926c60b0b..fa91479c97f6 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -274,11 +274,11 @@ def update( """Updates the entry in the cache. Note: This does *not* invalidate any existing entries for the `key`. - In particular, if we add an entry for the cached "full dict" with + In particular, if we add an entry for the cached "full dict" with `fetched_keys=None`, existing entries for individual dict keys are not invalidated. Likewise, adding entries for individual keys does not invalidate any cached value for the full dict. - + In other words: if the underlying data is *changed*, the cache must be explicitly invalidated via `.invalidate()`.