From 8acb357c73e35ab5475fe92b45e14133241b1628 Mon Sep 17 00:00:00 2001 From: Ryan Cross Date: Tue, 14 Jan 2025 16:40:53 -0800 Subject: [PATCH 1/2] feat: add management command move_list --- backend/mlarchive/archive/mail.py | 25 ++++--- .../archive/management/commands/move_list.py | 31 ++++++++ backend/mlarchive/archive/utils.py | 46 +++++++++++- backend/mlarchive/tests/archive/utils.py | 73 ++++++++++++++++++- 4 files changed, 162 insertions(+), 13 deletions(-) create mode 100644 backend/mlarchive/archive/management/commands/move_list.py diff --git a/backend/mlarchive/archive/mail.py b/backend/mlarchive/archive/mail.py index 33fc9472..85231c05 100644 --- a/backend/mlarchive/archive/mail.py +++ b/backend/mlarchive/archive/mail.py @@ -467,6 +467,19 @@ def get_message_from_bytes(b, policy): return email.message_from_bytes(b, policy=email_policy.compat32) +def make_hash(msgid, listname): + """ + Returns the message hashcode, a SHA-1 digest of the Message-ID and listname. + Similar to the popular Web Email Archive, mail-archive.com + see: https://www.mail-archive.com/faq.html#msgid + """ + msgid_bytes = msgid.encode('utf8') + listname_bytes = listname.encode('utf8') + sha = hashlib.sha1(msgid_bytes) + sha.update(listname_bytes) + b64 = base64.urlsafe_b64encode(sha.digest()) + return b64.decode('utf8') + # -------------------------------------------------- # Classes # -------------------------------------------------- @@ -746,16 +759,8 @@ def get_date(self): raise DateError("%s, %s" % (self.msgid, self.email_message.get_unixfrom())) def get_hash(self): - """Returns the message hashcode, a SHA-1 digest of the Message-ID and listname. - Similar to the popular Web Email Archive, mail-archive.com - see: https://www.mail-archive.com/faq.html#msgid - """ - msgid = self.msgid.encode('utf8') - listname = self.listname.encode('utf8') - sha = hashlib.sha1(msgid) - sha.update(listname) - b64 = base64.urlsafe_b64encode(sha.digest()) - return b64.decode('utf8') + """Returns the message hashcode""" + return make_hash(msgid=self.msgid, listname=self.listname) def get_msgid(self): msgid = self.normalize(self.email_message.get('Message-ID', '')) diff --git a/backend/mlarchive/archive/management/commands/move_list.py b/backend/mlarchive/archive/management/commands/move_list.py new file mode 100644 index 00000000..3df43b48 --- /dev/null +++ b/backend/mlarchive/archive/management/commands/move_list.py @@ -0,0 +1,31 @@ +# Copyright The IETF Trust 2025, All Rights Reserved +# -*- coding: utf-8 -*- + + +from django.core.management.base import BaseCommand, CommandError +from mlarchive.archive.models import EmailList +from mlarchive.archive.utils import move_list + +import logging +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Move messages from source list to target list" + + def add_arguments(self, parser): + parser.add_argument('source', help='Source list name') + parser.add_argument('target', help='Target list name') + + def handle(self, *args, **options): + source_name = options['source'] + # confirm source list exists + try: + _ = EmailList.objects.get(name=source_name) + except EmailList.DoesNotExist: + raise CommandError(f'Source list does not exist: {source_name}') + try: + move_list(options['source'], options['target']) + except Exception as e: + logger.error(f'move list failed: {e}') + raise CommandError(f'Command failed. {e}') diff --git a/backend/mlarchive/archive/utils.py b/backend/mlarchive/archive/utils.py index 83eac432..f1770057 100644 --- a/backend/mlarchive/archive/utils.py +++ b/backend/mlarchive/archive/utils.py @@ -10,6 +10,7 @@ import os import re import requests +import shutil import subprocess from collections import defaultdict @@ -20,7 +21,8 @@ from django.http import HttpResponse from django.utils.encoding import smart_bytes -from mlarchive.archive.models import EmailList, Subscriber +from mlarchive.archive.models import EmailList, Subscriber, Redirect +from mlarchive.archive.mail import MessageWrapper # from mlarchive.archive.signals import _export_lists, _list_save_handler @@ -404,3 +406,45 @@ def purge_incoming(): file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) if file_mtime < cutoff_date: os.remove(file_path) + + +def move_list(source, target): + '''Move messages from source list to target list. Includes: + - create the new list if it doesn't exist + - moving files on disk + - updating database and search index + - creating entries in the Redirect table to map original urls + to new urls + ''' + try: + source_list = EmailList.objects.get(name=source) + except EmailList.DoesNotExist: + raise Exception(f'Email list does not exist: {source}') + target_list, created = EmailList.objects.get_or_create( + name=target, + defaults={'private': source_list.private}) + if created and target_list.private: + for member in source_list.members.all(): + target_list.members.add(member) + # create directory if needed + path = os.path.join(settings.ARCHIVE_DIR, target) + if not os.path.exists(path): + os.mkdir(path) + os.chmod(path, 0o2777) + # move message files + for msg in source_list.message_set.all(): + _ = len(msg.pymsg) # evaluate msg.pymsg + source_path = msg.get_file_path() + old_url = msg.get_absolute_url() + # get new hashcode + mw = MessageWrapper(message=msg.pymsg, listname=target) + hashcode = mw.get_hash() + msg.hashcode = hashcode + msg.email_list = target_list + msg.save() + # move file on disk + target_path = msg.get_file_path() + shutil.move(source_path, target_path) + # create redirect + new_url = msg.get_absolute_url() + Redirect.objects.create(old=old_url, new=new_url) diff --git a/backend/mlarchive/tests/archive/utils.py b/backend/mlarchive/tests/archive/utils.py index d740c8f6..45e55e38 100644 --- a/backend/mlarchive/tests/archive/utils.py +++ b/backend/mlarchive/tests/archive/utils.py @@ -13,12 +13,16 @@ from django.conf import settings from django.core.cache import cache from django.contrib.auth.models import AnonymousUser +from django.http import QueryDict from mlarchive.archive.utils import (get_noauth, get_lists, get_lists_for_user, lookup_user, process_members, check_inactive, EmailList, purge_incoming, create_mbox_file, _get_lists_as_xml, get_subscribers, Subscriber, get_mailman_lists, get_membership_3, get_subscriber_counts, get_fqdn, - update_mbox_files, _export_lists) -from mlarchive.archive.models import User, Message + update_mbox_files, _export_lists, move_list) +from mlarchive.archive.models import User, Message, Redirect +from mlarchive.archive.mail import make_hash +from mlarchive.archive.forms import AdvancedSearchForm +from mlarchive.archive.backends.elasticsearch import search_from_form from factories import EmailListFactory @@ -429,3 +433,68 @@ def test_purge_incoming(tmpdir, settings): assert len(os.listdir(path)) == 1 assert os.path.exists(new_file_path) assert not os.path.exists(old_file_path) + + +@pytest.mark.django_db(transaction=True) +def test_move_list(rf, search_api_messages): + source = 'acme' + target = 'acme-archived' + msg = Message.objects.filter(email_list__name=source).last() + path = msg.get_file_path() + old_url = msg.get_absolute_url() + list_dir = os.path.dirname(path) + new_list_dir = os.path.join(os.path.dirname(list_dir), target) + # assert pre-conditions + assert os.path.exists(path) + assert len(os.listdir(list_dir)) == 4 + assert not os.path.exists(os.path.join(list_dir, target)) + assert Message.objects.filter(email_list__name=source).count() == 4 + assert Message.objects.filter(email_list__name=target).count() == 0 + # pre index state + data = QueryDict('email_list=acme') + request = rf.get('/arch/search/?' + data.urlencode()) + request.user = AnonymousUser() + form = AdvancedSearchForm(data=data, request=request) + search = search_from_form(form) + results = search.execute() + assert len(results) == 4 + ids = [h.msgid for h in results] + assert sorted(ids) == ['api001', 'api002', 'api003', 'api004'] + # move messages + move_list(source, target) + # check files moved + assert not os.path.exists(path) + assert len(os.listdir(list_dir)) == 0 + assert os.path.exists(new_list_dir) + assert len(os.listdir(new_list_dir)) == 4 + # check new hash + new_hash = make_hash(msgid=msg.msgid, listname=target) + msg.refresh_from_db() + assert msg.hashcode == new_hash + new_path = msg.get_file_path() + assert new_hash in new_path + assert os.path.exists(new_path) + # check redirect table + new_url = msg.get_absolute_url() + assert new_url != old_url + assert Redirect.objects.filter(old=old_url, new=new_url).exists() + # check index updated + data = QueryDict('email_list=acme') + request = rf.get('/arch/search/?' + data.urlencode()) + request.user = AnonymousUser() + form = AdvancedSearchForm(data=data, request=request) + search = search_from_form(form) + results = search.execute() + assert len(results) == 0 + data = QueryDict('email_list=acme-archived') + request = rf.get('/arch/search/?' + data.urlencode()) + request.user = AnonymousUser() + form = AdvancedSearchForm(data=data, request=request) + search = search_from_form(form) + results = search.execute() + assert len(results) == 4 + ids = [h.msgid for h in results] + assert sorted(ids) == ['api001', 'api002', 'api003', 'api004'] + # check db updated + assert Message.objects.filter(email_list__name=source).count() == 0 + assert Message.objects.filter(email_list__name=target).count() == 4 From 387a81156240e8a18950107c52a5f0f111e09f20 Mon Sep 17 00:00:00 2001 From: Ryan Cross Date: Wed, 15 Jan 2025 08:43:59 -0800 Subject: [PATCH 2/2] fix: fix issue with move_list test --- backend/mlarchive/tests/archive/conftest.py | 10 ++++++++++ backend/mlarchive/tests/archive/utils.py | 10 +++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/backend/mlarchive/tests/archive/conftest.py b/backend/mlarchive/tests/archive/conftest.py index e539fa56..06484ac7 100644 --- a/backend/mlarchive/tests/archive/conftest.py +++ b/backend/mlarchive/tests/archive/conftest.py @@ -280,9 +280,19 @@ def latin1_messages(): assert Message.objects.count() > 0 +def remove_all_files(directory): + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + if os.path.isfile(file_path): + os.remove(file_path) + + @pytest.fixture() def search_api_messages(): """Load messages for search_api tests""" + # clear archive message directory + arch_path = os.path.join(settings.ARCHIVE_DIR, 'acme') + remove_all_files(arch_path) content = io.StringIO() path = os.path.join(settings.BASE_DIR, 'tests', 'data', 'search_api.mbox') call_command('clear_index', interactive=False, stdout=content) diff --git a/backend/mlarchive/tests/archive/utils.py b/backend/mlarchive/tests/archive/utils.py index 45e55e38..baec9945 100644 --- a/backend/mlarchive/tests/archive/utils.py +++ b/backend/mlarchive/tests/archive/utils.py @@ -435,6 +435,10 @@ def test_purge_incoming(tmpdir, settings): assert not os.path.exists(old_file_path) +def list_only_files(directory): + return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] + + @pytest.mark.django_db(transaction=True) def test_move_list(rf, search_api_messages): source = 'acme' @@ -446,7 +450,7 @@ def test_move_list(rf, search_api_messages): new_list_dir = os.path.join(os.path.dirname(list_dir), target) # assert pre-conditions assert os.path.exists(path) - assert len(os.listdir(list_dir)) == 4 + assert len(list_only_files(list_dir)) == 4 assert not os.path.exists(os.path.join(list_dir, target)) assert Message.objects.filter(email_list__name=source).count() == 4 assert Message.objects.filter(email_list__name=target).count() == 0 @@ -464,9 +468,9 @@ def test_move_list(rf, search_api_messages): move_list(source, target) # check files moved assert not os.path.exists(path) - assert len(os.listdir(list_dir)) == 0 + assert len(list_only_files(list_dir)) == 0 assert os.path.exists(new_list_dir) - assert len(os.listdir(new_list_dir)) == 4 + assert len(list_only_files(new_list_dir)) == 4 # check new hash new_hash = make_hash(msgid=msg.msgid, listname=target) msg.refresh_from_db()