From 306d8b23bd405802ef668e99420d37f6f8a2ecec Mon Sep 17 00:00:00 2001 From: shcherbak Date: Fri, 29 May 2026 13:57:33 +0300 Subject: [PATCH] Feat/gcp json formatter (#19775) Co-authored-by: Andrew Morgan --- changelog.d/19775.misc | 1 + docs/structured_logging.md | 41 ++++++++++++++++ synapse/logging/__init__.py | 8 +++- synapse/logging/_terse_json.py | 29 ++++++++++++ tests/logging/test_terse_json.py | 80 +++++++++++++++++++++++++++++++- 5 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 changelog.d/19775.misc diff --git a/changelog.d/19775.misc b/changelog.d/19775.misc new file mode 100644 index 0000000000..3ff0fd34b9 --- /dev/null +++ b/changelog.d/19775.misc @@ -0,0 +1 @@ +Add `GcpJsonFormatter` logging formatter for use with Google Cloud Logging and GKE deployments. \ No newline at end of file diff --git a/docs/structured_logging.md b/docs/structured_logging.md index 761d6466dd..c6916670aa 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -78,3 +78,44 @@ loggers: The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will log JSON formatted messages to a remote endpoint at 10.1.2.3:9999. + +## Google Cloud Logging (GKE) + +When running Synapse on GKE, use `synapse.logging.GcpJsonFormatter`. It outputs +JSON to stdout with a `severity` field that Google Cloud Logging maps to the +correct per-entry severity. Without this, GKE assigns `ERROR` to everything +written to stderr regardless of the actual Python log level. + +Example output: + +```json +{"severity":"INFO","message":"Processed request: 3.481sec 200 GET /sync","logger":"synapse.access.http.8008","time":"2026-05-12T13:40:37.829Z"} +``` + +Configuration: + +```yaml +version: 1 +disable_existing_loggers: false + +formatters: + gcp_json: + class: synapse.logging.GcpJsonFormatter + +handlers: + console: + class: logging.StreamHandler + formatter: gcp_json + stream: ext://sys.stdout + +loggers: + synapse.storage.SQL: + level: WARNING + twisted: + handlers: [console] + propagate: false + +root: + level: INFO + handlers: [console] +``` diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index 15b92d7ef3..4058561e5e 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -22,10 +22,14 @@ import logging from synapse.logging._remote import RemoteHandler -from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter +from synapse.logging._terse_json import ( + GcpJsonFormatter, + JsonFormatter, + TerseJsonFormatter, +) # These are imported to allow for nicer logging configuration files. -__all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"] +__all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter", "GcpJsonFormatter"] # Debug logger for https://github.com/matrix-org/synapse/issues/9533 etc issue9533_logger = logging.getLogger("synapse.9533_debug") diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index d9ff70b252..afa3288809 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -25,6 +25,7 @@ Log formatters that output terse JSON. import json import logging +from datetime import datetime, timezone _encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":")) @@ -93,3 +94,31 @@ class TerseJsonFormatter(JsonFormatter): } return self._format(record, event) + + +class GcpJsonFormatter(logging.Formatter): + """JSON formatter compatible with Google Cloud Logging structured logging. + + Outputs `severity` (not `level`) so GCL correctly maps each log record to + the right severity instead of inheriting ERROR from stderr. + """ + + def format(self, record: logging.LogRecord) -> str: + msg = record.getMessage() + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + if record.exc_text: + msg = f"{msg}\n{record.exc_text}" + + event = { + "severity": record.levelname, + "message": msg, + "logger": record.name, + "time": datetime.fromtimestamp(record.created, tz=timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%S.%f" + )[:-3] + + "Z", + } + + return _encoder.encode(event) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index a857737ddf..433408fee6 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -28,7 +28,11 @@ from twisted.web.http import HTTPChannel from twisted.web.server import Request from synapse.http.site import SynapseRequest -from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter +from synapse.logging._terse_json import ( + GcpJsonFormatter, + JsonFormatter, + TerseJsonFormatter, +) from synapse.logging.context import LoggingContext, LoggingContextFilter from synapse.types import JsonDict @@ -251,3 +255,77 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase): self.assertEqual(log["log"], "Hello there, wally!") self.assertEqual(log["exc_type"], "ValueError") self.assertEqual(log["exc_value"], "That's wrong, you wally!") + + +class GcpJsonFormatterTestCase(LoggerCleanupMixin, TestCase): + def setUp(self) -> None: + self.output = StringIO() + + def get_log_line(self) -> JsonDict: + data = self.output.getvalue() + logs = data.splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(data.count("\n"), 1) + return json.loads(logs[0]) + + def test_gcp_json_output(self) -> None: + """ + GcpJsonFormatter produces exactly the four fields GCL expects. + """ + handler = logging.StreamHandler(self.output) + handler.setFormatter(GcpJsonFormatter()) + logger = self.get_logger(handler) + + logger.info("Hello there, %s!", "wally") + + log = self.get_log_line() + + self.assertIncludes( + log.keys(), {"severity", "message", "logger", "time"}, exact=True + ) + self.assertEqual(log["message"], "Hello there, wally!") + self.assertEqual(log["severity"], "INFO") + self.assertTrue(log["time"].endswith("Z")) + + def test_severity_levels(self) -> None: + """ + Python log levels are mapped to their GCL severity equivalents. + """ + cases = [ + (logging.DEBUG, "DEBUG"), + (logging.INFO, "INFO"), + (logging.WARNING, "WARNING"), + (logging.ERROR, "ERROR"), + (logging.CRITICAL, "CRITICAL"), + ] + for level, expected_severity in cases: + self.output = StringIO() + handler = logging.StreamHandler(self.output) + handler.setFormatter(GcpJsonFormatter()) + logger = self.get_logger(handler) + logger.setLevel(level) + logger.log(level, "test") + log = self.get_log_line() + self.assertEqual(log["severity"], expected_severity, f"level={level}") + + def test_gcp_json_with_exception(self) -> None: + """ + Exception info is appended to the message field, not separate keys. + """ + handler = logging.StreamHandler(self.output) + handler.setFormatter(GcpJsonFormatter()) + logger = self.get_logger(handler) + + try: + raise ValueError("That's wrong, you wally!") + except ValueError: + logger.exception("Hello there, %s!", "wally") + + log = self.get_log_line() + + self.assertIncludes( + log.keys(), {"severity", "message", "logger", "time"}, exact=True + ) + self.assertIn("Hello there, wally!", log["message"]) + self.assertIn("ValueError", log["message"]) + self.assertIn("That's wrong, you wally!", log["message"])