Skip to content

Commit

Permalink
Fixes Tribler#7287: Fix the health info comparison algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlovsky committed Mar 8, 2023
1 parent 762998d commit d8ec081
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from tribler.core.utilities.unicode import hexlify


TOLERABLE_TIME_DRIFT = 60 # one minute
HOUR = 60 * 60
MINUTE = 60
HOUR = MINUTE * 60
TOLERABLE_TIME_DRIFT = MINUTE # When receiving health from another peer, how far the timestamp can be in the future?
TORRENT_CHECK_WINDOW = MINUTE # When asking multiple trackers in parallel, we ignore this time difference in responses
HEALTH_FRESHNESS_SECONDS = 4 * HOUR # Number of seconds before a torrent health is considered stale. Default: 4 hours


@dataclass
Expand Down Expand Up @@ -50,19 +53,53 @@ def is_valid(self) -> bool:
def seeders_leechers_last_check(self) -> Tuple[int, int, int]:
return self.seeders, self.leechers, self.last_check

def should_update(self, torrent_state, self_checked=False):
if self.last_check <= torrent_state.last_check:
# The torrent state in the DB is already fresher than this health
return False
def should_update(self, torrent_state, self_checked=False) -> bool:
# Self is a new health info, torrent_state is a previously saved health info for the same infohash
if self.infohash != torrent_state.infohash:
raise ValueError('An attempt to compare health for different infohashes')

if not self.is_valid():
return False # Health info with future last_check time is ignored

now = int(time.time())
hour_ago = now - HOUR
if not self_checked and torrent_state.self_checked and hour_ago <= torrent_state.last_check <= now:
# The torrent state in the DB was locally checked just recently,
# and we trust this recent local check more than the new health info received remotely
if self_checked:
if not torrent_state.self_checked:
return True # Always prefer self-checked info

if torrent_state.last_check < now - TORRENT_CHECK_WINDOW:
# The previous torrent's health info is too old, replace it with the new health info,
# even if the new health info has fewer seeders
return True

if self.seeders_leechers_last_check > torrent_state.seeders_leechers_last_check:
# The new health info is received almost immediately after the previous health info from another tracker
# and have a bigger number of seeders/leechers, or at least is a bit more fresh
return True

# The previous health info is also self-checked, not too old, and has more seeders/leechers
return False

# The new health info is received from another peer and not self-checked

if torrent_state.self_checked and torrent_state.last_check >= now - HEALTH_FRESHNESS_SECONDS:
# The previous self-checked health is fresh enough, do not replace it with remote health info
return False

return True
if torrent_state.last_check + HEALTH_FRESHNESS_SECONDS < self.last_check:
# The new health info appears to be significantly more recent; let's use it disregarding
# the number of seeders (Note: it is possible that the newly received health info was actually
# checked earlier, but with incorrect OS time. To mitigate this, we can switch to a relative
# time when sending health info over the wire (like, "this remote health check was performed
# 1000 seconds ago"), then the correctness of the OS time will not matter anymore)
return True

if torrent_state.last_check - TOLERABLE_TIME_DRIFT <= self.last_check \
and self.seeders_leechers_last_check > torrent_state.seeders_leechers_last_check:
# The new remote health info is not (too) older than the previous one, and have more seeders/leechers
return True

# The new remote health info is older than the previous health info, or not much fresher and has fewer seeders
return False

@dataclass
class TrackerResponse:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import time
from unittest.mock import Mock

import pytest

from tribler.core.components.torrent_checker.torrent_checker.dataclasses import HEALTH_FRESHNESS_SECONDS, HealthInfo, \
TOLERABLE_TIME_DRIFT, \
TORRENT_CHECK_WINDOW

INFOHASH = b'infohash_1'


def now() -> int:
return int(time.time())


@pytest.fixture(name='torrent_state')
def torrent_state_fixture():
return Mock(infohash=INFOHASH)


def test_different_infohashes(torrent_state: Mock):
health = HealthInfo(infohash=b'infohash_2', last_check=now())
with pytest.raises(ValueError, match='^An attempt to compare health for different infohashes$'):
health.should_update(torrent_state)


def test_invalid_health(torrent_state: Mock):
health = HealthInfo(INFOHASH, last_check=now() + TOLERABLE_TIME_DRIFT + 2)
assert not health.is_valid()
assert not health.should_update(torrent_state)


def test_self_checked_health_remote_torrent_state(torrent_state: Mock):
torrent_state.self_checked = False
health = HealthInfo(INFOHASH, last_check=now())
assert health.should_update(torrent_state, self_checked=True)


def test_self_checked_health_torrent_state_outside_window(torrent_state: Mock):
torrent_state.self_checked = True
torrent_state.last_check = now() - TORRENT_CHECK_WINDOW - 1
health = HealthInfo(INFOHASH, last_check=now())
assert health.should_update(torrent_state, self_checked=True)


def test_self_checked_health_inside_window_more_seeders(torrent_state: Mock):
now_ = now()
torrent_state.self_checked = True
torrent_state.last_check = now_ - TORRENT_CHECK_WINDOW + 2
torrent_state.seeders_leechers_last_check = (1, 2, torrent_state.last_check)
health = HealthInfo(INFOHASH, last_check=now_, seeders=2, leechers=1)
assert health.seeders_leechers_last_check == (2, 1, now_)
assert health.seeders_leechers_last_check > torrent_state.seeders_leechers_last_check
assert health.should_update(torrent_state, self_checked=True)


def test_self_checked_health_inside_window_fewer_seeders(torrent_state: Mock):
now_ = now()
torrent_state.self_checked = True
torrent_state.last_check = now_ - TORRENT_CHECK_WINDOW + 2
torrent_state.seeders_leechers_last_check = (2, 1, torrent_state.last_check)
health = HealthInfo(INFOHASH, last_check=now_, seeders=1, leechers=2)
assert health.seeders_leechers_last_check == (1, 2, now_)
assert health.seeders_leechers_last_check < torrent_state.seeders_leechers_last_check
assert not health.should_update(torrent_state, self_checked=True)


def test_self_checked_torrent_state_fresh_enough(torrent_state: Mock):
now_ = now()
torrent_state.self_checked = True
torrent_state.last_check = now_ - HEALTH_FRESHNESS_SECONDS + 2 # self-checked, fresh enough
health = HealthInfo(INFOHASH, last_check=now_)
assert not health.should_update(torrent_state)


def test_torrent_state_self_checked_long_ago(torrent_state: Mock):
now_ = now()
torrent_state.self_checked = True
torrent_state.last_check = now_ - HEALTH_FRESHNESS_SECONDS - 2
health = HealthInfo(INFOHASH, last_check=now_)
assert health.should_update(torrent_state)

# should work the same way if time is not recent
big_time_offset = 1000000
torrent_state.last_check -= big_time_offset
health.last_check -= big_time_offset
assert health.should_update(torrent_state)


def test_more_recent_more_seeders(torrent_state: Mock):
t = now() - 100
torrent_state.self_checked = False
torrent_state.last_check = t
torrent_state.seeders_leechers_last_check = (1, 2, t)

health = HealthInfo(INFOHASH, last_check=t-1, seeders=2, leechers=1)
assert abs(torrent_state.last_check - health.last_check) <= TOLERABLE_TIME_DRIFT
assert health.should_update(torrent_state)

health.last_check = t+1
assert abs(torrent_state.last_check - health.last_check) <= TOLERABLE_TIME_DRIFT
assert health.should_update(torrent_state)


def test_more_recent_fewer_seeders(torrent_state: Mock):
t = now() - 100
torrent_state.self_checked = False
torrent_state.last_check = t
torrent_state.seeders_leechers_last_check = (2, 1, t)

health = HealthInfo(INFOHASH, last_check=t-1, seeders=1, leechers=2)
assert abs(torrent_state.last_check - health.last_check) <= TOLERABLE_TIME_DRIFT
assert not health.should_update(torrent_state)

health.last_check = t+1
assert abs(torrent_state.last_check - health.last_check) <= TOLERABLE_TIME_DRIFT
assert not health.should_update(torrent_state)


def test_less_recent_more_seeders(torrent_state: Mock):
t = now() - 100
torrent_state.self_checked = False
torrent_state.last_check = t

health = HealthInfo(INFOHASH, last_check=t - TOLERABLE_TIME_DRIFT - 1, seeders=100)
assert not health.should_update(torrent_state)
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from tribler.core.components.metadata_store.db.serialization import REGULAR_TORRENT
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.torrent_checker.torrent_checker import DHT
from tribler.core.components.torrent_checker.torrent_checker.dataclasses import HealthInfo, TrackerResponse
from tribler.core.components.torrent_checker.torrent_checker.dataclasses import HEALTH_FRESHNESS_SECONDS, HealthInfo, \
TrackerResponse
from tribler.core.components.torrent_checker.torrent_checker.utils import aggregate_responses_for_infohash, \
filter_non_exceptions, gather_coros, aggregate_health_by_infohash
from tribler.core.components.torrent_checker.torrent_checker.torrentchecker_session import \
Expand All @@ -36,7 +37,6 @@

TORRENT_SELECTION_POOL_SIZE = 2 # How many torrents to check (popular or random) during periodic check
USER_CHANNEL_TORRENT_SELECTION_POOL_SIZE = 5 # How many torrents to check from user's channel during periodic check
HEALTH_FRESHNESS_SECONDS = 4 * 3600 # Number of seconds before a torrent health is considered stale. Default: 4 hours
TORRENTS_CHECKED_RETURN_SIZE = 240 # Estimated torrents checked on default 4 hours idle run


Expand Down

0 comments on commit d8ec081

Please sign in to comment.