From patchwork Thu Jun 4 14:18:16 2026 Return-Path: Received: from ringo (2a01cb00021ec0002e23edbec21b0e73.ipv6.abo.wanadoo.fr [IPv6:2a01:cb00:21e:c000:2e23:edbe:c21b:e73]) by patches.jarry.cc (Postfix) with ESMTP id DDBF61BC4352 for ; Thu, 04 Jun 2026 16:18:29 +0200 (CEST) From: Robin Jarry To: pw@patches.jarry.cc Subject: [PATCH v3 07/16] forge: add utilities for mailing-list sync Date: Thu, 4 Jun 2026 16:18:16 +0200 Message-ID: <20260604141826.2998337-8-robin@jarry.cc> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260604141826.2998337-1-robin@jarry.cc> References: <20260604141826.2998337-1-robin@jarry.cc> MIME-Version: 1.0 List-ID: X-Patchwork-Submitter: Robin Jarry X-Patchwork-Id: 104 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Add generic utility functions reusable across forge backends: sanitize_pr_body() strips HTML comments and AI-generated sections from pull request descriptions. sender_identity() resolves a (name, email) tuple for forge users, falling back to the project's sender_email. find_series_by_pr() and next_version() query SeriesMetadata to locate previously synced series and determine respin version numbers. reply_to_msgid() finds the message-id to thread replies under. ingest_emails() parses an mbox and creates Series/Patch/Cover objects directly in the database via parse_mail(), then stores forge metadata on the series. send_emails() forwards raw patch bytes via SMTP preserving the original message format. Also add an ignore_hints parameter to parse_mail() so that X-Patchwork-Hint: ignore headers do not prevent direct ingestion. Signed-off-by: Robin Jarry --- patchwork/forge/util.py | 217 +++++++++++++++ patchwork/parser.py | 5 +- patchwork/tests/forge/__init__.py | 0 patchwork/tests/forge/test_util.py | 406 +++++++++++++++++++++++++++++ 4 files changed, 626 insertions(+), 2 deletions(-) create mode 100644 patchwork/forge/util.py create mode 100644 patchwork/tests/forge/__init__.py create mode 100644 patchwork/tests/forge/test_util.py diff --git a/patchwork/forge/util.py b/patchwork/forge/util.py new file mode 100644 index 000000000000..0793d368b7bd --- /dev/null +++ b/patchwork/forge/util.py @@ -0,0 +1,217 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2026 Robin Jarry +# +# SPDX-License-Identifier: GPL-2.0-or-later + +""" +Generic utilities for mailing-list synchronization. +""" + +import email +import io +import logging +import mailbox +import os +import re + +from django.core.mail import get_connection +from django.db import transaction + +from patchwork.models import Cover +from patchwork.models import Patch +from patchwork.models import Series +from patchwork.models import SeriesMetadata +from patchwork.parser import DuplicateMailError +from patchwork.parser import clean_header +from patchwork.parser import parse_mail + +logger = logging.getLogger(__name__) + +HTML_COMMENT_RE = re.compile(r'(?s)') + +AI_SECTION_HEADERS = [ + 'summary by coderabbit', + 'summary by copilot', + 'walkthrough', + 'generated by', +] + + +def sanitize_pr_body(body): + """ + Strip HTML comments and AI-generated sections from a pull request body + before using it as a cover letter description. + """ + if not body: + return '' + body = HTML_COMMENT_RE.sub('', body) + lines = body.split('\n') + result = [] + skip = False + for line in lines: + lower = line.strip().lower() + if lower.startswith('#'): + heading = lower.lstrip('# ') + for marker in AI_SECTION_HEADERS: + if heading.startswith(marker): + skip = True + break + if skip: + continue + skip = False + if not skip: + result.append(line) + return '\n'.join(result).strip() + + +def sender_identity(user, forge_config): + """ + Return a (name, email) tuple for a forge user. Falls back to the project's + sender_email when the user has no public email address. + """ + name = user.name or user.login + if user.email: + addr = user.email + else: + addr = email.utils.parseaddr(forge_config.sender_email)[1] + name += ' (via Patchwork)' + return name, addr + + +def find_series_by_pr(backend, forge_config, pr_number): + """ + Find all series linked to a pull request via SeriesMetadata, + ordered by most recent first. + """ + return ( + Series.objects.filter( + project=forge_config.project, + metadata__key=backend.meta_key_pr(), + metadata__value=backend.pr_ref(forge_config, pr_number), + ) + .select_related('cover_letter') + .order_by('version') + ) + + +def reply_to_msgid(series): + """ + Return the message-id to use as In-Reply-To when sending replies to a + series. Prefers the cover letter, falls back to first patch. + """ + if series.cover_letter: + return series.cover_letter.msgid + patches = list(series.patches.order_by('number')[:1]) + if patches: + return patches[0].msgid + return '' + + +def next_version(backend, forge_config, event): + """ + Determine the version number and threading info for a respin. + + Returns (version, in_reply_to, previous_ref) where version is the next + version number, in_reply_to is the message-id of the original series cover + letter, and previous_ref is the previous HEAD SHA for range-diff + generation. + """ + series = find_series_by_pr(backend, forge_config, event.pr_number) + first = series.first() + last = series.last() + + if not first or not last: + return 1, '', '' + + return last.version + 1, reply_to_msgid(first), event.pr_before + + +def bytes_to_mbox(buf): + """ + Create a mailbox.mbox object from bytes. + """ + # The builtin constructor only accepts file paths. Trick it by passing + # /dev/null and replace the opened file with BytesIO. + mbox = mailbox.mbox(os.devnull, create=False) + mbox._file.close() + mbox._file = io.BytesIO(buf) + return mbox + + +@transaction.atomic +def ingest_emails(mbox, backend, forge_config, event): + """ + Parse raw emails from a mailbox.mbox and create Series/Patch/Cover objects + in the database via parse_mail(). After ingestion, store forge metadata on + the series using backend.series_metadata(). + """ + list_id = forge_config.project.listid + series = None + + for msg in mbox: + try: + result = parse_mail(msg, list_id, ignore_hints=True) + except DuplicateMailError: + logger.warning( + 'patch already ingested: %s: %s', + clean_header(msg.get('Message-ID')), + clean_header(msg.get('Subject')), + ) + continue + except ValueError: + logger.exception( + 'failed to ingest patch: %s: %s', + clean_header(msg.get('Message-ID')), + clean_header(msg.get('Subject')), + ) + continue + + if isinstance(result, (Cover, Patch)): + series = result.series + + if series: + metadata = backend.series_metadata(forge_config, event) + for key, value in metadata.items(): + if value: + SeriesMetadata.objects.update_or_create( + series=series, + key=key, + defaults={'value': value}, + ) + + +def _msg_header_addresses(msg, *headers): + values = [] + for h in headers: + for v in msg.get_all(h, []): + txt = clean_header(v) + if txt: + values.append(txt) + + addrs = set() + for _, addr in email.utils.getaddresses(values): + addrs.add(addr) + return list(addrs) + + +def send_emails(mbox, forge_config): + """ + Send raw emails from a mailbox.mbox via SMTP. Reads Sender, From, To and Cc + addresses from the email headers. + """ + with get_connection(fail_silently=False) as conn: + for key, msg in mbox.iteritems(): + senders = _msg_header_addresses(msg, 'sender') + recipients = _msg_header_addresses(msg, 'from', 'to', 'cc') + logger.info( + 'sending patch: %s -> %s: %s', + ','.join(senders), + ','.join(recipients), + clean_header(msg.get('subject', '')), + ) + # XXX: only works if email backend is smtp + errs = conn.connection.sendmail( + senders[0], recipients, mbox.get_bytes(key) + ) + for rcpt, err in errs.items(): + logger.warning('send patch to %s failed: %s', rcpt, err) diff --git a/patchwork/parser.py b/patchwork/parser.py index 13d043069944..f3d454f96828 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -1310,12 +1310,13 @@ def find_comment_addressed_by_header(mail): return False if 'X-Patchwork-Action-Required' in mail else None -def parse_mail(mail, list_id=None): +def parse_mail(mail, list_id=None, ignore_hints=False): """Parse a mail and add to the database. Args: mail (`mbox.Mail`): Mail to parse and add. list_id (str): Mailing list ID + ignore_hint (bool): Ignore X-Patchwork-Hint headers. Returns: patch/cover letter/comment @@ -1338,7 +1339,7 @@ def parse_mail(mail, list_id=None): raise ValueError("Missing 'Message-Id' header") hint = clean_header(mail.get('X-Patchwork-Hint', '')) - if hint and hint.lower() == 'ignore': + if hint and hint.lower() == 'ignore' and not ignore_hints: logger.info("Ignoring email due to 'ignore' hint") return diff --git a/patchwork/tests/forge/__init__.py b/patchwork/tests/forge/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/patchwork/tests/forge/test_util.py b/patchwork/tests/forge/test_util.py new file mode 100644 index 000000000000..c5b18c855f82 --- /dev/null +++ b/patchwork/tests/forge/test_util.py @@ -0,0 +1,406 @@ +# Patchwork - automated patch tracking system +# Copyright (C) 2026 Robin Jarry +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from unittest.mock import MagicMock +from unittest.mock import patch as mock_patch + +from django.test import TestCase + +from patchwork.forge import ForgeEvent +from patchwork.forge import ForgeUser +from patchwork.forge.util import bytes_to_mbox +from patchwork.forge.util import ingest_emails +from patchwork.forge.util import next_version +from patchwork.forge.util import reply_to_msgid +from patchwork.forge.util import sanitize_pr_body +from patchwork.forge.util import send_emails +from patchwork.forge.util import sender_identity +from patchwork.models import ForgeConfig +from patchwork.models import SeriesMetadata +from patchwork.tests.utils import create_cover +from patchwork.tests.utils import create_patches +from patchwork.tests.utils import create_project +from patchwork.tests.utils import create_series + + +class SanitizePRBodyTest(TestCase): + def test_strip_html_comments(self): + body = 'Hello\n\nWorld' + self.assertEqual(sanitize_pr_body(body), 'Hello\n\nWorld') + + def test_strip_multiline_html_comment(self): + body = 'Before\n\nAfter' + self.assertEqual(sanitize_pr_body(body), 'Before\n\nAfter') + + def test_strip_coderabbit_section(self): + body = ( + 'Real content\n\n' + '## Summary by CodeRabbit\n\n' + 'AI generated stuff\n' + 'more AI stuff\n' + ) + self.assertEqual(sanitize_pr_body(body), 'Real content') + + def test_strip_copilot_section(self): + body = 'Fix bug\n\n## Summary by Copilot\n\nAI stuff' + self.assertEqual(sanitize_pr_body(body), 'Fix bug') + + def test_strip_walkthrough_section(self): + body = 'Real\n\n## Walkthrough\n\nAI stuff' + self.assertEqual(sanitize_pr_body(body), 'Real') + + def test_preserve_normal_headings(self): + body = '## Description\n\nThis is fine\n\n## Notes\n\nAlso fine' + self.assertEqual(sanitize_pr_body(body), body) + + def test_empty_body(self): + self.assertEqual(sanitize_pr_body(''), '') + + def test_none_body(self): + self.assertEqual(sanitize_pr_body(None), '') + + +class SenderIdentityTest(TestCase): + def test_user_with_name_and_email(self): + user = ForgeUser(login='octocat', name='Octo Cat', email='o@c.com') + config = ForgeConfig(from_email='pw@example.com') + self.assertEqual( + sender_identity(user, config), ('Octo Cat', 'o@c.com') + ) + + def test_user_with_email_only(self): + user = ForgeUser(login='octocat', name='', email='o@c.com') + config = ForgeConfig(from_email='pw@example.com') + self.assertEqual( + sender_identity(user, config), + ('octocat', 'o@c.com'), + ) + + def test_user_without_email(self): + user = ForgeUser(login='octocat', name='Octo Cat', email='') + config = ForgeConfig(from_email='pw@example.com') + self.assertEqual( + sender_identity(user, config), + ('Octo Cat (via Patchwork)', 'pw@example.com'), + ) + + def test_user_without_email_fallback(self): + user = ForgeUser(login='octocat', name='', email='') + config = ForgeConfig(from_email='') + name, addr = sender_identity(user, config) + self.assertEqual(name, 'octocat (via Patchwork)') + self.assertTrue(addr) + + +class ReplyToMsgidTest(TestCase): + def test_series_with_cover_letter(self): + project = create_project() + series = create_series(project=project) + cover = create_cover(series=series) + self.assertEqual(reply_to_msgid(series), cover.msgid) + + def test_series_without_cover_letter(self): + project = create_project() + series = create_series(project=project) + patches = create_patches(count=1, series=series) + self.assertEqual(reply_to_msgid(series), patches[0].msgid) + + def test_empty_series(self): + series = create_series() + self.assertEqual(reply_to_msgid(series), '') + + +class NextVersionTest(TestCase): + def test_no_previous_series(self): + project = create_project() + backend = MagicMock() + backend.pr_ref.return_value = 'https://github.com/o/r/pull/1' + backend.meta_key_pr.return_value = 'github_pr' + forge_config = MagicMock() + forge_config.project = project + event = ForgeEvent(pr_number=1, pr_before='abc123') + + version, in_reply_to, previous_ref = next_version( + backend, forge_config, event + ) + self.assertEqual(version, 1) + self.assertEqual(in_reply_to, '') + self.assertEqual(previous_ref, '') + + def test_with_previous_series(self): + project = create_project() + series_v1 = create_series(project=project, version=1) + cover_v1 = create_cover(series=series_v1) + pr_ref = 'https://github.com/o/r/pull/42' + SeriesMetadata.objects.create( + series=series_v1, key='github_pr', value=pr_ref + ) + + backend = MagicMock() + backend.pr_ref.return_value = pr_ref + backend.meta_key_pr.return_value = 'github_pr' + forge_config = MagicMock() + forge_config.project = project + event = ForgeEvent(pr_number=42, pr_before='def456') + + version, in_reply_to, previous_ref = next_version( + backend, forge_config, event + ) + self.assertEqual(version, 2) + self.assertEqual(in_reply_to, cover_v1.msgid) + self.assertEqual(previous_ref, 'def456') + + def test_with_multiple_versions(self): + project = create_project() + series_v1 = create_series(project=project, version=1) + cover_v1 = create_cover(series=series_v1) + series_v2 = create_series(project=project, version=2) + create_cover(series=series_v2) + pr_ref = 'https://github.com/o/r/pull/42' + SeriesMetadata.objects.create( + series=series_v1, key='github_pr', value=pr_ref + ) + SeriesMetadata.objects.create( + series=series_v2, key='github_pr', value=pr_ref + ) + + backend = MagicMock() + backend.pr_ref.return_value = pr_ref + backend.meta_key_pr.return_value = 'github_pr' + forge_config = MagicMock() + forge_config.project = project + event = ForgeEvent(pr_number=42, pr_before='ghi789') + + version, in_reply_to, previous_ref = next_version( + backend, forge_config, event + ) + self.assertEqual(version, 3) + self.assertEqual(in_reply_to, cover_v1.msgid) + self.assertEqual(previous_ref, 'ghi789') + + +class BytesToMboxTest(TestCase): + MBOX_DATA = ( + b'From nobody Thu Jan 1 00:00:00 1970\n' + b'From: Test \n' + b'Subject: [PATCH 1/2] first patch\n' + b'Message-ID: \n' + b'\n' + b'First patch body.\n' + b'\n' + b'From nobody Thu Jan 1 00:00:00 1970\n' + b'From: Test \n' + b'Subject: [PATCH 2/2] second patch\n' + b'Message-ID: \n' + b'\n' + b'Second patch body.\n' + ) + + def test_parse_messages(self): + mbox = bytes_to_mbox(self.MBOX_DATA) + messages = list(mbox) + self.assertEqual(len(messages), 2) + self.assertIn('patch1@example.com', messages[0].get('Message-ID')) + self.assertIn('patch2@example.com', messages[1].get('Message-ID')) + + def test_get_bytes_preserves_content(self): + mbox = bytes_to_mbox(self.MBOX_DATA) + for key, msg in mbox.iteritems(): + raw = mbox.get_bytes(key) + self.assertIn(b'From: Test ', raw) + self.assertIn(msg.get('Subject').encode(), raw) + + def test_empty_input(self): + mbox = bytes_to_mbox(b'') + self.assertEqual(len(list(mbox)), 0) + + +class IngestEmailsTest(TestCase): + def _make_mbox(self, messages): + buf = b'' + for msg in messages: + buf += b'From nobody Thu Jan 1 00:00:00 1970\n' + buf += msg + b'\n' + return bytes_to_mbox(buf) + + def test_ingest_creates_metadata(self): + project = create_project() + series = create_series(project=project) + patch = create_patches(count=1, series=series)[0] + + mbox = self._make_mbox( + [ + b'From: Test \n' + b'Subject: [PATCH] fix thing\n' + b'Message-ID: \n' + b'\nBody\n', + ] + ) + + backend = MagicMock() + backend.series_metadata.return_value = { + 'github_pr': 'https://github.com/o/r/pull/1', + 'github_branch': 'fix-thing', + } + forge_config = MagicMock() + forge_config.project = project + + with mock_patch('patchwork.forge.util.parse_mail', return_value=patch): + ingest_emails(mbox, backend, forge_config, ForgeEvent()) + + backend.series_metadata.assert_called_once() + self.assertTrue( + SeriesMetadata.objects.filter( + series=series, key='github_pr' + ).exists() + ) + self.assertTrue( + SeriesMetadata.objects.filter( + series=series, key='github_branch' + ).exists() + ) + + def test_ingest_skips_duplicates(self): + from patchwork.parser import DuplicateMailError + + mbox = self._make_mbox( + [ + b'From: Test \n' + b'Subject: [PATCH] fix thing\n' + b'Message-ID: \n' + b'\nBody\n', + ] + ) + + backend = MagicMock() + forge_config = MagicMock() + forge_config.project = create_project() + + with mock_patch( + 'patchwork.forge.util.parse_mail', + side_effect=DuplicateMailError(msgid=''), + ): + ingest_emails(mbox, backend, forge_config, ForgeEvent()) + + backend.series_metadata.assert_not_called() + + def test_ingest_skips_value_errors(self): + mbox = self._make_mbox( + [ + b'From: Test \n' + b'Subject: [PATCH] fix thing\n' + b'Message-ID: \n' + b'\nBody\n', + ] + ) + + backend = MagicMock() + forge_config = MagicMock() + forge_config.project = create_project() + + with mock_patch( + 'patchwork.forge.util.parse_mail', + side_effect=ValueError('bad email'), + ): + ingest_emails(mbox, backend, forge_config, ForgeEvent()) + + backend.series_metadata.assert_not_called() + + def test_ingest_no_metadata_on_empty_values(self): + project = create_project() + series = create_series(project=project) + patch = create_patches(count=1, series=series)[0] + + mbox = self._make_mbox( + [ + b'From: Test \n' + b'Subject: [PATCH] fix thing\n' + b'Message-ID: \n' + b'\nBody\n', + ] + ) + + backend = MagicMock() + backend.series_metadata.return_value = { + 'github_pr': 'https://github.com/o/r/pull/1', + 'github_branch': '', + } + forge_config = MagicMock() + forge_config.project = project + + with mock_patch('patchwork.forge.util.parse_mail', return_value=patch): + ingest_emails(mbox, backend, forge_config, ForgeEvent()) + + self.assertTrue( + SeriesMetadata.objects.filter( + series=series, key='github_pr' + ).exists() + ) + self.assertFalse( + SeriesMetadata.objects.filter( + series=series, key='github_branch' + ).exists() + ) + + +class SendEmailsTest(TestCase): + MBOX_DATA = ( + b'From nobody Thu Jan 1 00:00:00 1970\n' + b'From: Author \n' + b'Sender: Bot \n' + b'To: list@example.com\n' + b'Cc: reviewer@example.com\n' + b'Subject: [PATCH 1/1] fix thing\n' + b'Message-ID: \n' + b'\n' + b'Patch body.\n' + ) + + def test_sends_via_smtp(self): + mbox = bytes_to_mbox(self.MBOX_DATA) + forge_config = MagicMock() + + mock_conn = MagicMock() + mock_conn.connection.sendmail.return_value = {} + + with mock_patch( + 'patchwork.forge.util.get_connection' + ) as mock_get_conn: + mock_get_conn.return_value.__enter__ = MagicMock( + return_value=mock_conn + ) + mock_get_conn.return_value.__exit__ = MagicMock(return_value=False) + send_emails(mbox, forge_config) + + mock_conn.connection.sendmail.assert_called_once() + call_args = mock_conn.connection.sendmail.call_args + sender = call_args[0][0] + recipients = call_args[0][1] + raw_bytes = call_args[0][2] + self.assertEqual(sender, 'bot@example.com') + self.assertIn('author@example.com', recipients) + self.assertIn('list@example.com', recipients) + self.assertIn('reviewer@example.com', recipients) + self.assertIn(b'[PATCH 1/1] fix thing', raw_bytes) + + def test_sends_raw_bytes(self): + mbox = bytes_to_mbox(self.MBOX_DATA) + forge_config = MagicMock() + + mock_conn = MagicMock() + mock_conn.connection.sendmail.return_value = {} + + with mock_patch( + 'patchwork.forge.util.get_connection' + ) as mock_get_conn: + mock_get_conn.return_value.__enter__ = MagicMock( + return_value=mock_conn + ) + mock_get_conn.return_value.__exit__ = MagicMock(return_value=False) + send_emails(mbox, forge_config) + + raw_bytes = mock_conn.connection.sendmail.call_args[0][2] + self.assertIn(b'Sender: Bot ', raw_bytes) + self.assertIn(b'Message-ID: ', raw_bytes)