aka PEP 585, added in Python 3.9 - https://peps.python.org/pep-0585/ - https://docs.astral.sh/ruff/rules/non-pep585-annotation/
297 lines
10 KiB
Python
297 lines
10 KiB
Python
#
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
#
|
|
# Copyright 2020 The Matrix.org Foundation C.I.C.
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# See the GNU Affero General Public License for more details:
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
#
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
#
|
|
# [This file includes modifications made by New Vector Limited]
|
|
#
|
|
#
|
|
import logging
|
|
import math
|
|
import resource
|
|
import sys
|
|
from typing import TYPE_CHECKING, Mapping, Sized
|
|
|
|
from prometheus_client import Gauge
|
|
|
|
from twisted.internet import defer
|
|
|
|
from synapse.metrics import SERVER_NAME_LABEL
|
|
from synapse.types import JsonDict
|
|
from synapse.util.constants import (
|
|
MILLISECONDS_PER_SECOND,
|
|
ONE_HOUR_SECONDS,
|
|
ONE_MINUTE_SECONDS,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from synapse.server import HomeServer
|
|
|
|
logger = logging.getLogger("synapse.app.homeserver")
|
|
|
|
INITIAL_DELAY_BEFORE_FIRST_PHONE_HOME_SECONDS = 5 * ONE_MINUTE_SECONDS
|
|
"""
|
|
We wait 5 minutes to send the first set of stats as the server can be quite busy the
|
|
first few minutes
|
|
"""
|
|
|
|
PHONE_HOME_INTERVAL_SECONDS = 3 * ONE_HOUR_SECONDS
|
|
"""
|
|
Phone home stats are sent every 3 hours
|
|
"""
|
|
|
|
# Contains the list of processes we will be monitoring
|
|
# currently either 0 or 1
|
|
_stats_process: list[tuple[int, "resource.struct_rusage"]] = []
|
|
|
|
# Gauges to expose monthly active user control metrics
|
|
current_mau_gauge = Gauge(
|
|
"synapse_admin_mau_current",
|
|
"Current MAU",
|
|
labelnames=[SERVER_NAME_LABEL],
|
|
)
|
|
current_mau_by_service_gauge = Gauge(
|
|
"synapse_admin_mau_current_mau_by_service",
|
|
"Current MAU by service",
|
|
labelnames=["app_service", SERVER_NAME_LABEL],
|
|
)
|
|
max_mau_gauge = Gauge(
|
|
"synapse_admin_mau_max",
|
|
"MAU Limit",
|
|
labelnames=[SERVER_NAME_LABEL],
|
|
)
|
|
registered_reserved_users_mau_gauge = Gauge(
|
|
"synapse_admin_mau_registered_reserved_users",
|
|
"Registered users with reserved threepids",
|
|
labelnames=[SERVER_NAME_LABEL],
|
|
)
|
|
|
|
|
|
def phone_stats_home(
|
|
hs: "HomeServer",
|
|
stats: JsonDict,
|
|
stats_process: list[tuple[int, "resource.struct_rusage"]] = _stats_process,
|
|
) -> "defer.Deferred[None]":
|
|
async def _phone_stats_home(
|
|
hs: "HomeServer",
|
|
stats: JsonDict,
|
|
stats_process: list[tuple[int, "resource.struct_rusage"]] = _stats_process,
|
|
) -> None:
|
|
"""Collect usage statistics and send them to the configured endpoint.
|
|
|
|
Args:
|
|
hs: the HomeServer object to use for gathering usage data.
|
|
stats: the dict in which to store the statistics sent to the configured
|
|
endpoint. Mostly used in tests to figure out the data that is supposed to
|
|
be sent.
|
|
stats_process: statistics about resource usage of the process.
|
|
"""
|
|
|
|
logger.info("Gathering stats for reporting")
|
|
now = int(hs.get_clock().time())
|
|
# Ensure the homeserver has started.
|
|
assert hs.start_time is not None
|
|
uptime = int(now - hs.start_time)
|
|
if uptime < 0:
|
|
uptime = 0
|
|
|
|
#
|
|
# Performance statistics. Keep this early in the function to maintain reliability of `test_performance_100` test.
|
|
#
|
|
old = stats_process[0]
|
|
new = (now, resource.getrusage(resource.RUSAGE_SELF))
|
|
stats_process[0] = new
|
|
|
|
# Get RSS in bytes
|
|
stats["memory_rss"] = new[1].ru_maxrss
|
|
|
|
# Get CPU time in % of a single core, not % of all cores
|
|
used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
|
|
old[1].ru_utime + old[1].ru_stime
|
|
)
|
|
if used_cpu_time == 0 or new[0] == old[0]:
|
|
stats["cpu_average"] = 0
|
|
else:
|
|
stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
|
|
|
|
#
|
|
# General statistics
|
|
#
|
|
|
|
store = hs.get_datastores().main
|
|
common_metrics = await hs.get_common_usage_metrics_manager().get_metrics()
|
|
|
|
stats["homeserver"] = hs.config.server.server_name
|
|
stats["server_context"] = hs.config.server.server_context
|
|
stats["timestamp"] = now
|
|
stats["uptime_seconds"] = uptime
|
|
version = sys.version_info
|
|
stats["python_version"] = "{}.{}.{}".format(
|
|
version.major, version.minor, version.micro
|
|
)
|
|
stats["total_users"] = await store.count_all_users()
|
|
|
|
total_nonbridged_users = await store.count_nonbridged_users()
|
|
stats["total_nonbridged_users"] = total_nonbridged_users
|
|
|
|
daily_user_type_results = await store.count_daily_user_type()
|
|
for name, count in daily_user_type_results.items():
|
|
stats["daily_user_type_" + name] = count
|
|
|
|
room_count = await store.get_room_count()
|
|
stats["total_room_count"] = room_count
|
|
|
|
stats["daily_active_users"] = common_metrics.daily_active_users
|
|
stats["monthly_active_users"] = await store.count_monthly_users()
|
|
daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
|
|
stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
|
|
stats["daily_e2ee_messages"] = await store.count_daily_e2ee_messages()
|
|
daily_sent_e2ee_messages = await store.count_daily_sent_e2ee_messages()
|
|
stats["daily_sent_e2ee_messages"] = daily_sent_e2ee_messages
|
|
stats["daily_active_rooms"] = await store.count_daily_active_rooms()
|
|
stats["daily_messages"] = await store.count_daily_messages()
|
|
daily_sent_messages = await store.count_daily_sent_messages()
|
|
stats["daily_sent_messages"] = daily_sent_messages
|
|
|
|
r30v2_results = await store.count_r30v2_users()
|
|
for name, count in r30v2_results.items():
|
|
stats["r30v2_users_" + name] = count
|
|
|
|
stats["cache_factor"] = hs.config.caches.global_factor
|
|
stats["event_cache_size"] = hs.config.caches.event_cache_size
|
|
|
|
#
|
|
# Database version
|
|
#
|
|
|
|
# This only reports info about the *main* database.
|
|
stats["database_engine"] = store.db_pool.engine.module.__name__
|
|
stats["database_server_version"] = store.db_pool.engine.server_version
|
|
|
|
#
|
|
# Logging configuration
|
|
#
|
|
synapse_logger = logging.getLogger("synapse")
|
|
log_level = synapse_logger.getEffectiveLevel()
|
|
stats["log_level"] = logging.getLevelName(log_level)
|
|
|
|
logger.info(
|
|
"Reporting stats to %s: %s", hs.config.metrics.report_stats_endpoint, stats
|
|
)
|
|
try:
|
|
await hs.get_proxied_http_client().put_json(
|
|
hs.config.metrics.report_stats_endpoint, stats
|
|
)
|
|
except Exception as e:
|
|
logger.warning("Error reporting stats: %s", e)
|
|
|
|
return hs.run_as_background_process(
|
|
"phone_stats_home", _phone_stats_home, hs, stats, stats_process
|
|
)
|
|
|
|
|
|
def start_phone_stats_home(hs: "HomeServer") -> None:
|
|
"""
|
|
Start the background tasks which report phone home stats.
|
|
"""
|
|
server_name = hs.hostname
|
|
clock = hs.get_clock()
|
|
|
|
stats: JsonDict = {}
|
|
|
|
def performance_stats_init() -> None:
|
|
_stats_process.clear()
|
|
_stats_process.append(
|
|
(int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
|
|
)
|
|
|
|
# Rather than update on per session basis, batch up the requests.
|
|
# If you increase the loop period, the accuracy of user_daily_visits
|
|
# table will decrease
|
|
clock.looping_call(
|
|
hs.get_datastores().main.generate_user_daily_visits,
|
|
5 * ONE_MINUTE_SECONDS * MILLISECONDS_PER_SECOND,
|
|
)
|
|
|
|
# monthly active user limiting functionality
|
|
clock.looping_call(
|
|
hs.get_datastores().main.reap_monthly_active_users,
|
|
ONE_HOUR_SECONDS * MILLISECONDS_PER_SECOND,
|
|
)
|
|
hs.get_datastores().main.reap_monthly_active_users()
|
|
|
|
def generate_monthly_active_users() -> "defer.Deferred[None]":
|
|
async def _generate_monthly_active_users() -> None:
|
|
current_mau_count = 0
|
|
current_mau_count_by_service: Mapping[str, int] = {}
|
|
reserved_users: Sized = ()
|
|
store = hs.get_datastores().main
|
|
if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only:
|
|
current_mau_count = await store.get_monthly_active_count()
|
|
current_mau_count_by_service = (
|
|
await store.get_monthly_active_count_by_service()
|
|
)
|
|
reserved_users = await store.get_registered_reserved_users()
|
|
current_mau_gauge.labels(**{SERVER_NAME_LABEL: server_name}).set(
|
|
float(current_mau_count)
|
|
)
|
|
|
|
for app_service, count in current_mau_count_by_service.items():
|
|
current_mau_by_service_gauge.labels(
|
|
app_service=app_service, **{SERVER_NAME_LABEL: server_name}
|
|
).set(float(count))
|
|
|
|
registered_reserved_users_mau_gauge.labels(
|
|
**{SERVER_NAME_LABEL: server_name}
|
|
).set(float(len(reserved_users)))
|
|
max_mau_gauge.labels(**{SERVER_NAME_LABEL: server_name}).set(
|
|
float(hs.config.server.max_mau_value)
|
|
)
|
|
|
|
return hs.run_as_background_process(
|
|
"generate_monthly_active_users",
|
|
_generate_monthly_active_users,
|
|
)
|
|
|
|
if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only:
|
|
generate_monthly_active_users()
|
|
clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
|
|
# End of monthly active user settings
|
|
|
|
if hs.config.metrics.report_stats:
|
|
logger.info("Scheduling stats reporting for 3 hour intervals")
|
|
clock.looping_call(
|
|
phone_stats_home,
|
|
PHONE_HOME_INTERVAL_SECONDS * MILLISECONDS_PER_SECOND,
|
|
hs,
|
|
stats,
|
|
)
|
|
|
|
# We need to defer this init for the cases that we daemonize
|
|
# otherwise the process ID we get is that of the non-daemon process
|
|
clock.call_later(
|
|
0,
|
|
performance_stats_init,
|
|
)
|
|
|
|
# We wait 5 minutes to send the first set of stats as the server can
|
|
# be quite busy the first few minutes
|
|
clock.call_later(
|
|
INITIAL_DELAY_BEFORE_FIRST_PHONE_HOME_SECONDS,
|
|
phone_stats_home,
|
|
hs,
|
|
stats,
|
|
)
|