Browse Source

Add is_bot label to Pyramid request metric

This will make it possible to separate out crawler traffic from real
users.
merge-requests/85/head
Deimos 5 years ago
parent
commit
9ccbc162c4
  1. 19
      tildes/tildes/request_methods.py
  2. 3
      tildes/tildes/tweens.py

19
tildes/tildes/request_methods.py

@ -19,6 +19,24 @@ def get_redis_connection(request: Request) -> Redis:
return Redis(unix_socket_path=socket) return Redis(unix_socket_path=socket)
def is_bot(request: Request) -> bool:
"""Return whether the request is by a known bot (e.g. search engine crawlers)."""
bot_user_agent_substrings = (
"bingbot",
"Googlebot",
"qotnews scraper",
"Qwantify",
"YandexBot",
)
if request.user_agent:
return any(
[substring in request.user_agent for substring in bot_user_agent_substrings]
)
return False
def is_safe_request_method(request: Request) -> bool: def is_safe_request_method(request: Request) -> bool:
"""Return whether the request method is "safe" (is GET or HEAD).""" """Return whether the request method is "safe" (is GET or HEAD)."""
return request.method in {"GET", "HEAD"} return request.method in {"GET", "HEAD"}
@ -124,6 +142,7 @@ def current_listing_normal_url(
def includeme(config: Configurator) -> None: def includeme(config: Configurator) -> None:
"""Attach the request methods to the Pyramid request object.""" """Attach the request methods to the Pyramid request object."""
config.add_request_method(is_bot, "is_bot", reify=True)
config.add_request_method(is_safe_request_method, "is_safe_method", reify=True) config.add_request_method(is_safe_request_method, "is_safe_method", reify=True)
# Add the request.redis request method to access a redis connection. This is done in # Add the request.redis request method to access a redis connection. This is done in

3
tildes/tildes/tweens.py

@ -42,7 +42,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable:
request_histogram = Histogram( request_histogram = Histogram(
"tildes_pyramid_requests_seconds", "tildes_pyramid_requests_seconds",
"Request processing times", "Request processing times",
labelnames=["route", "status_code", "method", "logged_in"],
labelnames=["route", "status_code", "method", "logged_in", "is_bot"],
) )
def metrics_tween(request: Request) -> Response: def metrics_tween(request: Request) -> Response:
@ -60,6 +60,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable:
status_code=response.status_code, status_code=response.status_code,
method=request.method, method=request.method,
logged_in=str(bool(request.user)).lower(), logged_in=str(bool(request.user)).lower(),
is_bot=str(request.is_bot).lower(),
).observe(duration) ).observe(duration)
return response return response

Loading…
Cancel
Save