Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add management command move_list #3914

Merged
merged 2 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions backend/mlarchive/archive/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,19 @@ def get_message_from_bytes(b, policy):
return email.message_from_bytes(b, policy=email_policy.compat32)


def make_hash(msgid, listname):
"""
Returns the message hashcode, a SHA-1 digest of the Message-ID and listname.
Similar to the popular Web Email Archive, mail-archive.com
see: https://www.mail-archive.com/faq.html#msgid
"""
msgid_bytes = msgid.encode('utf8')
listname_bytes = listname.encode('utf8')
sha = hashlib.sha1(msgid_bytes)
sha.update(listname_bytes)
b64 = base64.urlsafe_b64encode(sha.digest())
return b64.decode('utf8')

# --------------------------------------------------
# Classes
# --------------------------------------------------
Expand Down Expand Up @@ -746,16 +759,8 @@ def get_date(self):
raise DateError("%s, %s" % (self.msgid, self.email_message.get_unixfrom()))

def get_hash(self):
"""Returns the message hashcode, a SHA-1 digest of the Message-ID and listname.
Similar to the popular Web Email Archive, mail-archive.com
see: https://www.mail-archive.com/faq.html#msgid
"""
msgid = self.msgid.encode('utf8')
listname = self.listname.encode('utf8')
sha = hashlib.sha1(msgid)
sha.update(listname)
b64 = base64.urlsafe_b64encode(sha.digest())
return b64.decode('utf8')
"""Returns the message hashcode"""
return make_hash(msgid=self.msgid, listname=self.listname)

def get_msgid(self):
msgid = self.normalize(self.email_message.get('Message-ID', ''))
Expand Down
31 changes: 31 additions & 0 deletions backend/mlarchive/archive/management/commands/move_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright The IETF Trust 2025, All Rights Reserved
# -*- coding: utf-8 -*-


from django.core.management.base import BaseCommand, CommandError
from mlarchive.archive.models import EmailList
from mlarchive.archive.utils import move_list

import logging
logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "Move messages from source list to target list"

def add_arguments(self, parser):
parser.add_argument('source', help='Source list name')
parser.add_argument('target', help='Target list name')

def handle(self, *args, **options):
source_name = options['source']
# confirm source list exists
try:
_ = EmailList.objects.get(name=source_name)
except EmailList.DoesNotExist:
raise CommandError(f'Source list does not exist: {source_name}')
try:
move_list(options['source'], options['target'])
except Exception as e:
logger.error(f'move list failed: {e}')
raise CommandError(f'Command failed. {e}')
46 changes: 45 additions & 1 deletion backend/mlarchive/archive/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import re
import requests
import shutil
import subprocess
from collections import defaultdict

Expand All @@ -20,7 +21,8 @@
from django.http import HttpResponse
from django.utils.encoding import smart_bytes

from mlarchive.archive.models import EmailList, Subscriber
from mlarchive.archive.models import EmailList, Subscriber, Redirect
from mlarchive.archive.mail import MessageWrapper
# from mlarchive.archive.signals import _export_lists, _list_save_handler


Expand Down Expand Up @@ -404,3 +406,45 @@ def purge_incoming():
file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
if file_mtime < cutoff_date:
os.remove(file_path)


def move_list(source, target):
'''Move messages from source list to target list. Includes:
- create the new list if it doesn't exist
- moving files on disk
- updating database and search index
- creating entries in the Redirect table to map original urls
to new urls
'''
try:
source_list = EmailList.objects.get(name=source)
except EmailList.DoesNotExist:
raise Exception(f'Email list does not exist: {source}')
target_list, created = EmailList.objects.get_or_create(
name=target,
defaults={'private': source_list.private})
if created and target_list.private:
for member in source_list.members.all():
target_list.members.add(member)
# create directory if needed
path = os.path.join(settings.ARCHIVE_DIR, target)
if not os.path.exists(path):
os.mkdir(path)
os.chmod(path, 0o2777)
# move message files
for msg in source_list.message_set.all():
_ = len(msg.pymsg) # evaluate msg.pymsg
source_path = msg.get_file_path()
old_url = msg.get_absolute_url()
# get new hashcode
mw = MessageWrapper(message=msg.pymsg, listname=target)
hashcode = mw.get_hash()
msg.hashcode = hashcode
msg.email_list = target_list
msg.save()
# move file on disk
target_path = msg.get_file_path()
shutil.move(source_path, target_path)
# create redirect
new_url = msg.get_absolute_url()
Redirect.objects.create(old=old_url, new=new_url)
10 changes: 10 additions & 0 deletions backend/mlarchive/tests/archive/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,19 @@ def latin1_messages():
assert Message.objects.count() > 0


def remove_all_files(directory):
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if os.path.isfile(file_path):
os.remove(file_path)


@pytest.fixture()
def search_api_messages():
"""Load messages for search_api tests"""
# clear archive message directory
arch_path = os.path.join(settings.ARCHIVE_DIR, 'acme')
remove_all_files(arch_path)
content = io.StringIO()
path = os.path.join(settings.BASE_DIR, 'tests', 'data', 'search_api.mbox')
call_command('clear_index', interactive=False, stdout=content)
Expand Down
77 changes: 75 additions & 2 deletions backend/mlarchive/tests/archive/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
from django.conf import settings
from django.core.cache import cache
from django.contrib.auth.models import AnonymousUser
from django.http import QueryDict
from mlarchive.archive.utils import (get_noauth, get_lists, get_lists_for_user,
lookup_user, process_members, check_inactive, EmailList, purge_incoming,
create_mbox_file, _get_lists_as_xml, get_subscribers, Subscriber,
get_mailman_lists, get_membership_3, get_subscriber_counts, get_fqdn,
update_mbox_files, _export_lists)
from mlarchive.archive.models import User, Message
update_mbox_files, _export_lists, move_list)
from mlarchive.archive.models import User, Message, Redirect
from mlarchive.archive.mail import make_hash
from mlarchive.archive.forms import AdvancedSearchForm
from mlarchive.archive.backends.elasticsearch import search_from_form
from factories import EmailListFactory


Expand Down Expand Up @@ -429,3 +433,72 @@ def test_purge_incoming(tmpdir, settings):
assert len(os.listdir(path)) == 1
assert os.path.exists(new_file_path)
assert not os.path.exists(old_file_path)


def list_only_files(directory):
return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]


@pytest.mark.django_db(transaction=True)
def test_move_list(rf, search_api_messages):
source = 'acme'
target = 'acme-archived'
msg = Message.objects.filter(email_list__name=source).last()
path = msg.get_file_path()
old_url = msg.get_absolute_url()
list_dir = os.path.dirname(path)
new_list_dir = os.path.join(os.path.dirname(list_dir), target)
# assert pre-conditions
assert os.path.exists(path)
assert len(list_only_files(list_dir)) == 4
assert not os.path.exists(os.path.join(list_dir, target))
assert Message.objects.filter(email_list__name=source).count() == 4
assert Message.objects.filter(email_list__name=target).count() == 0
# pre index state
data = QueryDict('email_list=acme')
request = rf.get('/arch/search/?' + data.urlencode())
request.user = AnonymousUser()
form = AdvancedSearchForm(data=data, request=request)
search = search_from_form(form)
results = search.execute()
assert len(results) == 4
ids = [h.msgid for h in results]
assert sorted(ids) == ['api001', 'api002', 'api003', 'api004']
# move messages
move_list(source, target)
# check files moved
assert not os.path.exists(path)
assert len(list_only_files(list_dir)) == 0
assert os.path.exists(new_list_dir)
assert len(list_only_files(new_list_dir)) == 4
# check new hash
new_hash = make_hash(msgid=msg.msgid, listname=target)
msg.refresh_from_db()
assert msg.hashcode == new_hash
new_path = msg.get_file_path()
assert new_hash in new_path
assert os.path.exists(new_path)
# check redirect table
new_url = msg.get_absolute_url()
assert new_url != old_url
assert Redirect.objects.filter(old=old_url, new=new_url).exists()
# check index updated
data = QueryDict('email_list=acme')
request = rf.get('/arch/search/?' + data.urlencode())
request.user = AnonymousUser()
form = AdvancedSearchForm(data=data, request=request)
search = search_from_form(form)
results = search.execute()
assert len(results) == 0
data = QueryDict('email_list=acme-archived')
request = rf.get('/arch/search/?' + data.urlencode())
request.user = AnonymousUser()
form = AdvancedSearchForm(data=data, request=request)
search = search_from_form(form)
results = search.execute()
assert len(results) == 4
ids = [h.msgid for h in results]
assert sorted(ids) == ['api001', 'api002', 'api003', 'api004']
# check db updated
assert Message.objects.filter(email_list__name=source).count() == 0
assert Message.objects.filter(email_list__name=target).count() == 4
Loading