From 87c2091b0aa637c2a0ada60b811514cac302153c Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 15 Feb 2023 11:17:33 +0100 Subject: [PATCH] redact tokens, etc. from request logs replaces `?token=abc123` with `?token=[secret]` in logs --- jupyter_server/log.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/jupyter_server/log.py b/jupyter_server/log.py index 21ea9d1d8d..1ca87b279c 100644 --- a/jupyter_server/log.py +++ b/jupyter_server/log.py @@ -6,12 +6,39 @@ # the file LICENSE, distributed as part of this software. # ----------------------------------------------------------------------------- import json +from urllib.parse import urlparse, urlunparse from tornado.log import access_log from .auth import User from .prometheus.log_functions import prometheus_log_method +# url params to be scrubbed if seen +# any url param that *contains* one of these +# will be scrubbed from logs +_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"} + + +def _scrub_uri(uri: str) -> str: + """scrub auth info from uri""" + parsed = urlparse(uri) + if parsed.query: + # check for potentially sensitive url params + # use manual list + split rather than parsing + # to minimally perturb original + parts = parsed.query.split("&") + changed = False + for i, s in enumerate(parts): + key, sep, value = s.partition("=") + for substring in _SCRUB_PARAM_KEYS: + if substring in key: + parts[i] = f"{key}{sep}[secret]" + changed = True + if changed: + parsed = parsed._replace(query="&".join(parts)) + return urlunparse(parsed) + return uri + def log_request(handler): """log a bit more information about each request than tornado's default @@ -43,7 +70,7 @@ def log_request(handler): "status": status, "method": request.method, "ip": request.remote_ip, - "uri": request.uri, + "uri": _scrub_uri(request.uri), "request_time": request_time, } # log username @@ -59,7 +86,7 @@ def log_request(handler): msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms" if status >= 400: # noqa[PLR2004] # log bad referers - ns["referer"] = request.headers.get("Referer", "None") + ns["referer"] = _scrub_uri(request.headers.get("Referer", "None")) msg = msg + " referer={referer}" if status >= 500 and status != 502: # noqa[PLR2004] # Log a subset of the headers if it caused an error.