Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redact tokens, etc. in url parameters from request logs #1212

Merged
merged 1 commit into from
Feb 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions jupyter_server/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,39 @@
# the file LICENSE, distributed as part of this software.
# -----------------------------------------------------------------------------
import json
from urllib.parse import urlparse, urlunparse

from tornado.log import access_log

from .auth import User
from .prometheus.log_functions import prometheus_log_method

# url params to be scrubbed if seen
# any url param that *contains* one of these
# will be scrubbed from logs
_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"}


def _scrub_uri(uri: str) -> str:
"""scrub auth info from uri"""
parsed = urlparse(uri)
if parsed.query:
# check for potentially sensitive url params
# use manual list + split rather than parsing
# to minimally perturb original
parts = parsed.query.split("&")
changed = False
for i, s in enumerate(parts):
key, sep, value = s.partition("=")
for substring in _SCRUB_PARAM_KEYS:
if substring in key:
parts[i] = f"{key}{sep}[secret]"
changed = True
if changed:
parsed = parsed._replace(query="&".join(parts))
return urlunparse(parsed)
return uri


def log_request(handler):
"""log a bit more information about each request than tornado's default
Expand Down Expand Up @@ -43,7 +70,7 @@ def log_request(handler):
"status": status,
"method": request.method,
"ip": request.remote_ip,
"uri": request.uri,
"uri": _scrub_uri(request.uri),
"request_time": request_time,
}
# log username
Expand All @@ -59,7 +86,7 @@ def log_request(handler):
msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms"
if status >= 400: # noqa[PLR2004]
# log bad referers
ns["referer"] = request.headers.get("Referer", "None")
ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"))
msg = msg + " referer={referer}"
if status >= 500 and status != 502: # noqa[PLR2004]
# Log a subset of the headers if it caused an error.
Expand Down