From 9ccbc162c4eb63973886c8f81c97ae92dcf4a8e6 Mon Sep 17 00:00:00 2001 From: Deimos Date: Tue, 10 Sep 2019 20:00:40 -0600 Subject: [PATCH] Add is_bot label to Pyramid request metric This will make it possible to separate out crawler traffic from real users. --- tildes/tildes/request_methods.py | 19 +++++++++++++++++++ tildes/tildes/tweens.py | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tildes/tildes/request_methods.py b/tildes/tildes/request_methods.py index ec9fb76..4262fc6 100644 --- a/tildes/tildes/request_methods.py +++ b/tildes/tildes/request_methods.py @@ -19,6 +19,24 @@ def get_redis_connection(request: Request) -> Redis: return Redis(unix_socket_path=socket) +def is_bot(request: Request) -> bool: + """Return whether the request is by a known bot (e.g. search engine crawlers).""" + bot_user_agent_substrings = ( + "bingbot", + "Googlebot", + "qotnews scraper", + "Qwantify", + "YandexBot", + ) + + if request.user_agent: + return any( + [substring in request.user_agent for substring in bot_user_agent_substrings] + ) + + return False + + def is_safe_request_method(request: Request) -> bool: """Return whether the request method is "safe" (is GET or HEAD).""" return request.method in {"GET", "HEAD"} @@ -124,6 +142,7 @@ def current_listing_normal_url( def includeme(config: Configurator) -> None: """Attach the request methods to the Pyramid request object.""" + config.add_request_method(is_bot, "is_bot", reify=True) config.add_request_method(is_safe_request_method, "is_safe_method", reify=True) # Add the request.redis request method to access a redis connection. This is done in diff --git a/tildes/tildes/tweens.py b/tildes/tildes/tweens.py index 8c5a385..977093d 100644 --- a/tildes/tildes/tweens.py +++ b/tildes/tildes/tweens.py @@ -42,7 +42,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable: request_histogram = Histogram( "tildes_pyramid_requests_seconds", "Request processing times", - labelnames=["route", "status_code", "method", "logged_in"], + labelnames=["route", "status_code", "method", "logged_in", "is_bot"], ) def metrics_tween(request: Request) -> Response: @@ -60,6 +60,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable: status_code=response.status_code, method=request.method, logged_in=str(bool(request.user)).lower(), + is_bot=str(request.is_bot).lower(), ).observe(duration) return response