diff --git a/changelog.d/18297.misc b/changelog.d/18297.misc new file mode 100644 index 000000000..5032d4817 --- /dev/null +++ b/changelog.d/18297.misc @@ -0,0 +1 @@ +Apply file hashing and existing quarantines to media downloaded for URL previews. diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 859b30e02..18c5a8ece 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -378,7 +378,6 @@ class MediaRepository: media_length=content_length, user_id=auth_user, sha256=sha256, - # TODO: Better name? quarantined_by="system" if should_quarantine else None, ) diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index 2e65a0478..8ef2b3f0c 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -41,7 +41,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.http.client import SimpleHttpClient from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.media._base import FileInfo, get_filename_from_headers -from synapse.media.media_storage import MediaStorage +from synapse.media.media_storage import MediaStorage, SHA256TransparentIOWriter from synapse.media.oembed import OEmbedProvider from synapse.media.preview_html import decode_body, parse_html_to_open_graph from synapse.metrics.background_process_metrics import run_as_background_process @@ -593,17 +593,26 @@ class UrlPreviewer: file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) async with self.media_storage.store_into_file(file_info) as (f, fname): + sha256writer = SHA256TransparentIOWriter(f) if url.startswith("data:"): if not allow_data_urls: raise SynapseError( 500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN ) - download_result = await self._parse_data_url(url, f) + download_result = await self._parse_data_url(url, sha256writer.wrap()) else: - download_result = await self._download_url(url, f) + download_result = await self._download_url(url, sha256writer.wrap()) try: + sha256 = sha256writer.hexdigest() + should_quarantine = await self.store.get_is_hash_quarantined(sha256) + + if should_quarantine: + logger.warn( + "Media has been automatically quarantined as it matched existing quarantined media" + ) + time_now_ms = self.clock.time_msec() await self.store.store_local_media( @@ -614,6 +623,8 @@ class UrlPreviewer: media_length=download_result.length, user_id=user, url_cache=url, + sha256=sha256, + quarantined_by="system" if should_quarantine else None, ) except Exception as e: