diff --git a/changelog.d/18195.feature b/changelog.d/18195.feature new file mode 100644 index 000000000..7f7903bd7 --- /dev/null +++ b/changelog.d/18195.feature @@ -0,0 +1 @@ +Add plain-text handling for rich-text topics as per [MSC3765](https://github.com/matrix-org/matrix-spec-proposals/pull/3765). diff --git a/synapse/api/constants.py b/synapse/api/constants.py index e36461486..47b84b02e 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -262,6 +262,11 @@ class EventContentFields: TOMBSTONE_SUCCESSOR_ROOM: Final = "replacement_room" + # Used in m.room.topic events. + TOPIC: Final = "topic" + M_TOPIC: Final = "m.topic" + M_TEXT: Final = "m.text" + class EventUnsignedContentFields: """Fields found inside the 'unsigned' data on events""" @@ -270,6 +275,13 @@ class EventUnsignedContentFields: MEMBERSHIP: Final = "membership" +class MTextFields: + """Fields found inside m.text content blocks.""" + + BODY: Final = "body" + MIMETYPE: Final = "mimetype" + + class RoomTypes: """Understood values of the room_type field of m.room.create events.""" diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 2dfeb47c2..c486c8127 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -27,8 +27,6 @@ from typing import Any, Dict, List, Optional, Union import attr from synapse._pydantic_compat import ( - BaseModel, - Extra, StrictBool, StrictInt, StrictStr, @@ -47,6 +45,7 @@ from synapse.config.server import ( parse_listener_def, ) from synapse.types import JsonDict +from synapse.util.pydantic_models import ParseModel _DEPRECATED_WORKER_DUTY_OPTION_USED = """ The '%s' configuration option is deprecated and will be removed in a future @@ -90,30 +89,7 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: return obj -class ConfigModel(BaseModel): - """A custom version of Pydantic's BaseModel which - - - ignores unknown fields and - - does not allow fields to be overwritten after construction, - - but otherwise uses Pydantic's default behaviour. - - For now, ignore unknown fields. In the future, we could change this so that unknown - config values cause a ValidationError, provided the error messages are meaningful to - server operators. - - Subclassing in this way is recommended by - https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally - """ - - class Config: - # By default, ignore fields that we don't recognise. - extra = Extra.ignore - # By default, don't allow fields to be reassigned after parsing. - allow_mutation = False - - -class InstanceTcpLocationConfig(ConfigModel): +class InstanceTcpLocationConfig(ParseModel): """The host and port to talk to an instance via HTTP replication.""" host: StrictStr @@ -129,7 +105,7 @@ class InstanceTcpLocationConfig(ConfigModel): return f"{self.host}:{self.port}" -class InstanceUnixLocationConfig(ConfigModel): +class InstanceUnixLocationConfig(ParseModel): """The socket file to talk to an instance via HTTP replication.""" path: StrictStr diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 0f004d02d..b063e301e 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -51,6 +51,7 @@ from synapse.api.constants import ( HistoryVisibility, JoinRules, Membership, + MTextFields, RoomCreationPreset, RoomEncryptionAlgorithms, RoomTypes, @@ -1303,7 +1304,13 @@ class RoomCreationHandler: topic = room_config["topic"] topic_event, topic_context = await create_event( EventTypes.Topic, - {"topic": topic}, + { + EventContentFields.TOPIC: topic, + EventContentFields.M_TOPIC: { + # The mimetype property defaults to `text/plain` if omitted. + EventContentFields.M_TEXT: [{MTextFields.BODY: topic}] + }, + }, True, ) events_to_send.append((topic_event, topic_context)) diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 8f90c1706..aa3326080 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -36,6 +36,7 @@ from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.databases.main.state_deltas import StateDelta from synapse.types import JsonDict +from synapse.util.events import get_plain_text_topic_from_event_content if TYPE_CHECKING: from synapse.server import HomeServer @@ -299,7 +300,9 @@ class StatsHandler: elif delta.event_type == EventTypes.Name: room_state["name"] = event_content.get("name") elif delta.event_type == EventTypes.Topic: - room_state["topic"] = event_content.get("topic") + room_state["topic"] = get_plain_text_topic_from_event_content( + event_content + ) elif delta.event_type == EventTypes.RoomAvatar: room_state["avatar"] = event_content.get("url") elif delta.event_type == EventTypes.CanonicalAlias: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b7cc0433e..b7fbfdc0c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -78,6 +78,7 @@ from synapse.types import ( from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES from synapse.types.state import StateFilter from synapse.util import json_encoder +from synapse.util.events import get_plain_text_topic_from_event_content from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -3102,7 +3103,10 @@ class PersistEventsStore: def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None: if isinstance(event.content.get("topic"), str): self.store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + txn, + event, + "content.topic", + get_plain_text_topic_from_event_content(event.content) or "", ) def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase) -> None: diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 1d5c5e72f..47dfdf64e 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -49,6 +49,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import JsonDict +from synapse.util.events import get_plain_text_topic_from_event_content if TYPE_CHECKING: from synapse.server import HomeServer @@ -212,7 +213,9 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): value = content["body"] elif etype == "m.room.topic": key = "content.topic" - value = content["topic"] + value = ( + get_plain_text_topic_from_event_content(content) or "", + ) elif etype == "m.room.name": key = "content.name" value = content["name"] diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 79c49e7fd..74830b712 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -48,6 +48,7 @@ from synapse.storage.databases.main.events_worker import InvalidEventError from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.types import JsonDict from synapse.util.caches.descriptors import cached +from synapse.util.events import get_plain_text_topic_from_event_content if TYPE_CHECKING: from synapse.server import HomeServer @@ -611,7 +612,9 @@ class StatsStore(StateDeltasStore): elif event.type == EventTypes.Name: room_state["name"] = event.content.get("name") elif event.type == EventTypes.Topic: - room_state["topic"] = event.content.get("topic") + room_state["topic"] = get_plain_text_topic_from_event_content( + event.content + ) elif event.type == EventTypes.RoomAvatar: room_state["avatar"] = event.content.get("url") elif event.type == EventTypes.CanonicalAlias: diff --git a/synapse/types/rest/__init__.py b/synapse/types/rest/__init__.py index 183831e79..a02836dee 100644 --- a/synapse/types/rest/__init__.py +++ b/synapse/types/rest/__init__.py @@ -18,26 +18,8 @@ # [This file includes modifications made by New Vector Limited] # # -from synapse._pydantic_compat import BaseModel, Extra +from synapse.util.pydantic_models import ParseModel -class RequestBodyModel(BaseModel): - """A custom version of Pydantic's BaseModel which - - - ignores unknown fields and - - does not allow fields to be overwritten after construction, - - but otherwise uses Pydantic's default behaviour. - - Ignoring unknown fields is a useful default. It means that clients can provide - unstable field not known to the server without the request being refused outright. - - Subclassing in this way is recommended by - https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally - """ - - class Config: - # By default, ignore fields that we don't recognise. - extra = Extra.ignore - # By default, don't allow fields to be reassigned after parsing. - allow_mutation = False +class RequestBodyModel(ParseModel): + pass diff --git a/synapse/util/events.py b/synapse/util/events.py index ad9b94657..480826870 100644 --- a/synapse/util/events.py +++ b/synapse/util/events.py @@ -13,6 +13,11 @@ # # +from typing import Any, List, Optional + +from synapse._pydantic_compat import Field, StrictStr, ValidationError, validator +from synapse.types import JsonDict +from synapse.util.pydantic_models import ParseModel from synapse.util.stringutils import random_string @@ -27,3 +32,100 @@ def generate_fake_event_id() -> str: A string intended to look like an event ID, but with no actual meaning. """ return "$" + random_string(43) + + +class MTextRepresentation(ParseModel): + """ + See `TextualRepresentation` in the Matrix specification. + """ + + body: StrictStr + mimetype: Optional[StrictStr] + + +class MTopic(ParseModel): + """ + `m.room.topic` -> `content` -> `m.topic` + + Textual representation of the room topic in different mimetypes. Added in Matrix v1.15. + + See `TopicContentBlock` in the Matrix specification. + """ + + m_text: Optional[List[MTextRepresentation]] = Field(alias="m.text") + """ + An ordered array of textual representations in different mimetypes. + """ + + # Because "Receivers SHOULD use the first representation in the array that they + # understand.", we ignore invalid representations in the `m.text` field and use + # what we can. + @validator("m_text", pre=True) + def ignore_invalid_representations( + cls, m_text: Any + ) -> Optional[List[MTextRepresentation]]: + if not isinstance(m_text, list): + raise ValueError("m.text must be a list") + representations = [] + for element in m_text: + try: + representations.append(MTextRepresentation.parse_obj(element)) + except ValidationError: + continue + return representations + + +class TopicContent(ParseModel): + """ + Represents the `content` field of an `m.room.topic` event + """ + + topic: StrictStr + """ + The topic in plain text. + """ + + m_topic: Optional[MTopic] = Field(alias="m.topic") + """ + Textual representation of the room topic in different mimetypes. + """ + + # We ignore invalid `m.topic` fields as we can always fall back to the plain-text + # `topic` field. + @validator("m_topic", pre=True) + def ignore_invalid_m_topic(cls, m_topic: Any) -> Optional[MTopic]: + try: + return MTopic.parse_obj(m_topic) + except ValidationError: + return None + + +def get_plain_text_topic_from_event_content(content: JsonDict) -> Optional[str]: + """ + Given the `content` of an `m.room.topic` event, returns the plain-text topic + representation. Prefers pulling plain-text from the newer `m.topic` field if + available with a fallback to `topic`. + + Args: + content: The `content` field of an `m.room.topic` event. + + Returns: + A string representing the plain text topic. + """ + + try: + topic_content = TopicContent.parse_obj(content) + except ValidationError: + return None + + # Find the first `text/plain` topic ("Receivers SHOULD use the first + # representationin the array that they understand.") + if topic_content.m_topic and topic_content.m_topic.m_text: + for representation in topic_content.m_topic.m_text: + # The mimetype property defaults to `text/plain` if omitted. + if not representation.mimetype or representation.mimetype == "text/plain": + return representation.body + + # Fallback to the plain-old `topic` field if there isn't any `text/plain` topic + # representation available. + return topic_content.topic diff --git a/synapse/util/pydantic_models.py b/synapse/util/pydantic_models.py new file mode 100644 index 000000000..ba9e7bb7d --- /dev/null +++ b/synapse/util/pydantic_models.py @@ -0,0 +1,39 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2024 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# + +from synapse._pydantic_compat import BaseModel, Extra + + +class ParseModel(BaseModel): + """A custom version of Pydantic's BaseModel which + + - ignores unknown fields and + - does not allow fields to be overwritten after construction, + + but otherwise uses Pydantic's default behaviour. + + For now, ignore unknown fields. In the future, we could change this so that unknown + config values cause a ValidationError, provided the error messages are meaningful to + server operators. + + Subclassing in this way is recommended by + https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally + """ + + class Config: + # By default, ignore fields that we don't recognise. + extra = Extra.ignore + # By default, don't allow fields to be reassigned after parsing. + allow_mutation = False diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 6c93ead3b..3ba7584c6 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -757,6 +757,59 @@ class RoomsCreateTestCase(RoomBase): assert channel.resource_usage is not None self.assertEqual(37, channel.resource_usage.db_txn_count) + def test_post_room_topic(self) -> None: + # POST with topic key, expect new room id + channel = self.make_request("POST", "/createRoom", b'{"topic":"shenanigans"}') + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertTrue("room_id" in channel.json_body) + room_id = channel.json_body["room_id"] + + # GET topic event, expect content from topic key + channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,)) + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertEqual( + {"topic": "shenanigans", "m.topic": {"m.text": [{"body": "shenanigans"}]}}, + channel.json_body, + ) + + def test_post_room_topic_initial_state(self) -> None: + # POST with m.room.topic in initial state, expect new room id + channel = self.make_request( + "POST", + "/createRoom", + b'{"initial_state":[{"type": "m.room.topic", "content": {"topic": "foobar"}}]}', + ) + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertTrue("room_id" in channel.json_body) + room_id = channel.json_body["room_id"] + + # GET topic event, expect content from initial state + channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,)) + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertEqual( + {"topic": "foobar"}, + channel.json_body, + ) + + def test_post_room_topic_overriding_initial_state(self) -> None: + # POST with m.room.topic in initial state and topic key, expect new room id + channel = self.make_request( + "POST", + "/createRoom", + b'{"initial_state":[{"type": "m.room.topic", "content": {"topic": "foobar"}}], "topic":"shenanigans"}', + ) + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertTrue("room_id" in channel.json_body) + room_id = channel.json_body["room_id"] + + # GET topic event, expect content from topic key + channel = self.make_request("GET", "/rooms/%s/state/m.room.topic" % (room_id,)) + self.assertEqual(HTTPStatus.OK, channel.code) + self.assertEqual( + {"topic": "shenanigans", "m.topic": {"m.text": [{"body": "shenanigans"}]}}, + channel.json_body, + ) + def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id channel = self.make_request("POST", "/createRoom", b'{"visibility":"private"}') diff --git a/tests/util/test_events.py b/tests/util/test_events.py new file mode 100644 index 000000000..d61ca6b3a --- /dev/null +++ b/tests/util/test_events.py @@ -0,0 +1,118 @@ +# +# This file is licensed under the Affero General Public License (AGPL) version 3. +# +# Copyright (C) 2025 New Vector, Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# See the GNU Affero General Public License for more details: +# . +# +# Originally licensed under the Apache License, Version 2.0: +# . +# +# [This file includes modifications made by New Vector Limited] +# +# + + +from synapse.util.events import get_plain_text_topic_from_event_content + +from tests import unittest + + +class EventsTestCase(unittest.TestCase): + def test_get_plain_text_topic_no_topic(self) -> None: + # No legacy or rich topic, expect None + topic = get_plain_text_topic_from_event_content({}) + self.assertEqual(None, topic) + + def test_get_plain_text_topic_no_rich_topic(self) -> None: + # Only legacy topic, expect legacy topic + topic = get_plain_text_topic_from_event_content({"topic": "shenanigans"}) + self.assertEqual("shenanigans", topic) + + def test_get_plain_text_topic_rich_topic_without_representations(self) -> None: + # Legacy topic and rich topic without representations, expect legacy topic + topic = get_plain_text_topic_from_event_content( + {"topic": "shenanigans", "m.topic": {"m.text": []}} + ) + self.assertEqual("shenanigans", topic) + + def test_get_plain_text_topic_rich_topic_without_plain_text_representation( + self, + ) -> None: + # Legacy topic and rich topic without plain text representation, expect legacy topic + topic = get_plain_text_topic_from_event_content( + { + "topic": "shenanigans", + "m.topic": { + "m.text": [ + {"mimetype": "text/html", "body": "foobar"} + ] + }, + } + ) + self.assertEqual("shenanigans", topic) + + def test_get_plain_text_topic_rich_topic_with_plain_text_representation( + self, + ) -> None: + # Legacy topic and rich topic with plain text representation, expect plain text representation + topic = get_plain_text_topic_from_event_content( + { + "topic": "shenanigans", + "m.topic": {"m.text": [{"mimetype": "text/plain", "body": "foobar"}]}, + } + ) + self.assertEqual("foobar", topic) + + def test_get_plain_text_topic_rich_topic_with_implicit_plain_text_representation( + self, + ) -> None: + # Legacy topic and rich topic with implicit plain text representation, expect plain text representation + topic = get_plain_text_topic_from_event_content( + {"topic": "shenanigans", "m.topic": {"m.text": [{"body": "foobar"}]}} + ) + self.assertEqual("foobar", topic) + + def test_get_plain_text_topic_rich_topic_with_invalid_plain_text_representation( + self, + ) -> None: + # Legacy topic and rich topic with invalid plain text representation, expect legacy topic + topic = get_plain_text_topic_from_event_content( + {"topic": "shenanigans", "m.topic": {"m.text": [{"body": 1337}]}} + ) + self.assertEqual("shenanigans", topic) + + def test_get_plain_text_topic_rich_topic_with_invalid_and_second_valid_plain_text_representation( + self, + ) -> None: + # Legacy topic and rich topic with invalid and second valid plain text representation, expect second plain text representation + topic = get_plain_text_topic_from_event_content( + { + "topic": "shenanigans", + "m.topic": {"m.text": [{"body": 1337}, {"body": "foobar"}]}, + } + ) + self.assertEqual("foobar", topic) + + def test_get_plain_text_topic_rich_topic_with_plain_text_and_other_representation( + self, + ) -> None: + # Legacy topic and rich topic with plain text representation, expect plain text representation + topic = get_plain_text_topic_from_event_content( + { + "topic": "shenanigans", + "m.topic": { + "m.text": [ + {"mimetype": "text/html", "body": "foobar"}, + {"mimetype": "text/plain", "body": "foobar"}, + ] + }, + } + ) + self.assertEqual("foobar", topic)