From fb4fe77f578586f8b293dca3c09474e683244831 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 8 Sep 2021 16:19:04 +0100 Subject: [PATCH 1/7] Factor more stuff out of `_get_events_and_persist` It turns out that the event-sorting algorithm in `_get_events_and_persist` is also useful in other circumstances. Here we move the current `_auth_and_persist_fetched_events` to `_auth_and_persist_fetched_events_inner`, and then factor the sorting part out to `_auth_and_persist_fetched_events`. --- synapse/handlers/federation_event.py | 46 +++++++++++++++++++--------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 7d468bd2df12..e9e72bcbe1e0 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1107,7 +1107,7 @@ async def _get_events_and_persist( room_version = await self._store.get_room_version(room_id) - event_map: Dict[str, EventBase] = {} + events: List[EventBase] = [] async def get_event(event_id: str) -> None: with nested_logging_context(event_id): @@ -1125,8 +1125,7 @@ async def get_event(event_id: str) -> None: event_id, ) return - - event_map[event.event_id] = event + events.append(event) except Exception as e: logger.warning( @@ -1137,7 +1136,30 @@ async def get_event(event_id: str) -> None: ) await concurrently_execute(get_event, event_ids, 5) - logger.info("Fetched %i events of %i requested", len(event_map), len(event_ids)) + logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) + await self._auth_and_persist_fetched_events(destination, room_id, events) + + async def _auth_and_persist_fetched_events( + self, origin: str, room_id: str, events: Iterable[EventBase] + ) -> None: + """Persist the events fetched by _get_events_and_persist. + + The events should not depend on one another, e.g. this should be used to persist + a bunch of outliers, but not a chunk of individual events that depend + on each other for state calculations. + + We first sort the events so that they do not depend on each other, then + persist them. + + Notifies about the events where appropriate. + + Params: + origin: where the events came from + room_id: the room that the events are meant to be in (though this has + not yet been checked) + events: the events that have been fetched + """ + event_map = {event.event_id: event for event in events} # we now need to auth the events in an order which ensures that each event's # auth_events are authed before the event itself. @@ -1168,22 +1190,18 @@ async def get_event(event_id: str) -> None: "Persisting %i of %i remaining events", len(roots), len(event_map) ) - await self._auth_and_persist_fetched_events(destination, room_id, roots) + await self._auth_and_persist_fetched_events_inner(origin, room_id, roots) for ev in roots: del event_map[ev.event_id] - async def _auth_and_persist_fetched_events( + async def _auth_and_persist_fetched_events_inner( self, origin: str, room_id: str, fetched_events: Collection[EventBase] ) -> None: - """Persist the events fetched by _get_events_and_persist. - - The events should not depend on one another, e.g. this should be used to persist - a bunch of outliers, but not a chunk of individual events that depend - on each other for state calculations. + """Helper for _auth_and_persist_fetched_events - We also assume that all of the auth events for all of the events have already - been persisted. + Persists a batch of events where we have (theoretically) already persisted all + of their auth events. Notifies about the events where appropriate. @@ -1191,7 +1209,7 @@ async def _auth_and_persist_fetched_events( origin: where the events came from room_id: the room that the events are meant to be in (though this has not yet been checked) - event_id: map from event_id -> event for the fetched events + fetched_events: the events to persist """ # get all the auth events for all the events in this batch. By now, they should # have been persisted. From ff6056d48ea7dd9282a20f3285864f2f0c3ac865 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 23 Sep 2021 18:15:28 +0100 Subject: [PATCH 2/7] `_get_remote_auth_chain_for_event`: remove redundant `outlier` assignment `get_event_auth` returns events with the outlier flag already set, so this is redundant (though we need to update a test where `get_event_auth` is mocked). --- synapse/handlers/federation_event.py | 1 - tests/handlers/test_federation.py | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index e9e72bcbe1e0..17795a791e72 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1650,7 +1650,6 @@ async def _get_remote_auth_chain_for_event( for e in remote_auth_chain if e.event_id in auth_ids or e.type == EventTypes.Create } - auth_event.internal_metadata.outlier = True logger.debug( "_check_event_auth %s missing_auth: %s", diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 6c67a16de923..936ebf3dde36 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -308,7 +308,12 @@ def test_backfill_floating_outlier_membership_auth(self): async def get_event_auth( destination: str, room_id: str, event_id: str ) -> List[EventBase]: - return auth_events + return [ + event_from_pdu_json( + ae.get_pdu_json(), room_version=room_version, outlier=True + ) + for ae in auth_events + ] self.handler.federation_client.get_event_auth = get_event_auth From e229a047c171e43e3724f0999697bac9d0f86cc0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 23 Sep 2021 17:58:57 +0100 Subject: [PATCH 3/7] `_get_remote_auth_chain_for_event`: move existing-event tests earlier Move a couple of tests outside the loop. This is a bit inefficient for now, but a future commit will make it better. It should be functionally identical. --- synapse/handlers/federation_event.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 17795a791e72..28a3849f27d3 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1632,17 +1632,18 @@ async def _get_remote_auth_chain_for_event( logger.info("Failed to get event auth from remote: %s", e1) return + # `event` may be returned, but we should not yet process it. + remote_auth_chain = [x for x in remote_auth_chain if x.event_id != event_id] + + # nor should we reprocess any events we have already seen. seen_remotes = await self._store.have_seen_events( room_id, [e.event_id for e in remote_auth_chain] ) + remote_auth_chain = [ + x for x in remote_auth_chain if x.event_id not in seen_remotes + ] for auth_event in remote_auth_chain: - if auth_event.event_id in seen_remotes: - continue - - if auth_event.event_id == event_id: - continue - try: auth_ids = auth_event.auth_event_ids() auth = { From c7c3e91c79d6dc8d3c3744c52e2dcc9de558126c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 8 Sep 2021 16:34:11 +0100 Subject: [PATCH 4/7] `_get_remote_auth_chain_for_event`: use `_auth_and_persist_fetched_events` We can use the same codepath for persisting the events fetched as part of an auth chain as for those fetched individually by `_get_events_and_persist` for building the state at a backwards extremity. --- synapse/handlers/federation_event.py | 32 ++++------------------------ 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 28a3849f27d3..783077e7f28d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1142,7 +1142,7 @@ async def get_event(event_id: str) -> None: async def _auth_and_persist_fetched_events( self, origin: str, room_id: str, events: Iterable[EventBase] ) -> None: - """Persist the events fetched by _get_events_and_persist. + """Persist the events fetched by _get_events_and_persist or _get_remote_auth_chain_for_event The events should not depend on one another, e.g. this should be used to persist a bunch of outliers, but not a chunk of individual events that depend @@ -1643,33 +1643,9 @@ async def _get_remote_auth_chain_for_event( x for x in remote_auth_chain if x.event_id not in seen_remotes ] - for auth_event in remote_auth_chain: - try: - auth_ids = auth_event.auth_event_ids() - auth = { - (e.type, e.state_key): e - for e in remote_auth_chain - if e.event_id in auth_ids or e.type == EventTypes.Create - } - - logger.debug( - "_check_event_auth %s missing_auth: %s", - event_id, - auth_event.event_id, - ) - missing_auth_event_context = EventContext.for_outlier() - missing_auth_event_context = await self._check_event_auth( - destination, - auth_event, - missing_auth_event_context, - claimed_auth_event_map=auth, - ) - await self.persist_events_and_notify( - room_id, - [(auth_event, missing_auth_event_context)], - ) - except AuthError: - pass + await self._auth_and_persist_fetched_events( + destination, room_id, remote_auth_chain + ) async def _update_context_for_auth_events( self, event: EventBase, context: EventContext, auth_events: StateMap[EventBase] From 867e76d6631b614ca49d582c581c28369429520b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 23 Sep 2021 18:07:53 +0100 Subject: [PATCH 5/7] `_get_remote_auth_chain_for_event`: use a dict for efficiency `_auth_and_persist_fetched_events` sorts the events itself, so we no longer need to care about maintaining the ordering from `get_event_auth` (and no longer need to sort by depth in `get_event_auth`). That means that we can use a map, making it easier to filter out events we already have, etc. --- synapse/federation/federation_client.py | 2 -- synapse/handlers/federation_event.py | 22 +++++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 1416abd0fba3..584836c04ad1 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -501,8 +501,6 @@ async def get_event_auth( destination, auth_chain, outlier=True, room_version=room_version ) - signed_auth.sort(key=lambda e: e.depth) - return signed_auth def _is_unknown_endpoint( diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 783077e7f28d..9cf5e728e4d2 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1623,28 +1623,32 @@ async def _get_remote_auth_chain_for_event( event_id: the event for which we are lacking auth events """ try: - remote_auth_chain = await self._federation_client.get_event_auth( - destination, room_id, event_id - ) + remote_event_map = { + e.event_id: e + for e in await self._federation_client.get_event_auth( + destination, room_id, event_id + ) + } except RequestSendFailed as e1: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. logger.info("Failed to get event auth from remote: %s", e1) return + logger.info("/event_auth returned %i events", len(remote_event_map)) + # `event` may be returned, but we should not yet process it. - remote_auth_chain = [x for x in remote_auth_chain if x.event_id != event_id] + remote_event_map.pop(event_id, None) # nor should we reprocess any events we have already seen. seen_remotes = await self._store.have_seen_events( - room_id, [e.event_id for e in remote_auth_chain] + room_id, remote_event_map.keys() ) - remote_auth_chain = [ - x for x in remote_auth_chain if x.event_id not in seen_remotes - ] + for s in seen_remotes: + remote_event_map.pop(s, None) await self._auth_and_persist_fetched_events( - destination, room_id, remote_auth_chain + destination, room_id, remote_event_map.values() ) async def _update_context_for_auth_events( From bc200262497847e0333feec27c5af1ac0de00ed0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 23 Sep 2021 17:37:25 +0100 Subject: [PATCH 6/7] changelog --- changelog.d/10896.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10896.misc diff --git a/changelog.d/10896.misc b/changelog.d/10896.misc new file mode 100644 index 000000000000..41de99584239 --- /dev/null +++ b/changelog.d/10896.misc @@ -0,0 +1 @@ + Clean up some of the federation event authentication code for clarity. From 49d9802282f0b4e2ab1ae3b2c3e3204231ff8446 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 24 Sep 2021 11:16:09 +0100 Subject: [PATCH 7/7] `_auth_and_persist_fetched_events`: improve docstring --- synapse/handlers/federation_event.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 9cf5e728e4d2..864d2074a7bd 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1144,12 +1144,10 @@ async def _auth_and_persist_fetched_events( ) -> None: """Persist the events fetched by _get_events_and_persist or _get_remote_auth_chain_for_event - The events should not depend on one another, e.g. this should be used to persist - a bunch of outliers, but not a chunk of individual events that depend - on each other for state calculations. + The events to be persisted must be outliers. - We first sort the events so that they do not depend on each other, then - persist them. + We first sort the events to make sure that we process each event's auth_events + before the event itself, and then auth and persist them. Notifies about the events where appropriate. @@ -1161,9 +1159,6 @@ async def _auth_and_persist_fetched_events( """ event_map = {event.event_id: event for event in events} - # we now need to auth the events in an order which ensures that each event's - # auth_events are authed before the event itself. - # # XXX: it might be possible to kick this process off in parallel with fetching # the events. while event_map: