Removed traffic filtering, now handled by Anubis

This commit is contained in:
2025-04-19 22:34:54 +02:00
parent 68142f4d7e
commit ff8831e4ca
3 changed files with 0 additions and 201 deletions

View File

@@ -34,7 +34,6 @@ INSTALLED_APPS = [
]
MIDDLEWARE = [
"kakigoori.traffic_filtering.TrafficFiltering",
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",

View File

@@ -1,71 +0,0 @@
import json
import os
import re
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from django.http import HttpRequest, HttpResponseForbidden
class TrafficRuleAction(Enum):
DENY = "DENY"
ALLOW = "ALLOW"
NO_ACTION = "NO_ACTION"
@dataclass
class TrafficRule:
name: str
user_agent_regex: re.Pattern
action: TrafficRuleAction
def test_rule(self, request: HttpRequest):
user_agent = request.META.get("HTTP_USER_AGENT") or None
if not user_agent:
return TrafficRuleAction.DENY
print(user_agent)
if self.user_agent_regex.search(user_agent):
return self.action
return TrafficRuleAction.NO_ACTION
class TrafficFiltering:
traffic_rules = []
def __init__(self, get_response):
self.get_response = get_response
with open(
os.path.join(Path(__file__).resolve().parent, "traffic_rules.json")
) as f:
traffic_rules_json = json.load(f)
for rule in traffic_rules_json["rules"]:
# noinspection PyTypeChecker
self.traffic_rules.append(
TrafficRule(
rule["name"],
re.compile(rule["user_agent_regex"]),
TrafficRuleAction[rule["action"]],
)
)
def __call__(self, request: HttpRequest):
for traffic_rule in self.traffic_rules:
print(f"Checking for {traffic_rule.name}")
action = traffic_rule.test_rule(request)
print(action)
match action:
case TrafficRuleAction.DENY:
return HttpResponseForbidden()
case TrafficRuleAction.ALLOW:
break
case TrafficRuleAction.NO_ACTION:
continue
response = self.get_response(request)
return response

View File

@@ -1,129 +0,0 @@
{
"rules": [
{
"name": "Amazonbot",
"user_agent_regex": "Amazonbot",
"action": "DENY"
},
{
"name": "googlebot",
"user_agent_regex": "\\+http://www\\.google\\.com/bot\\.html",
"action": "ALLOW"
},
{
"name": "kagi",
"user_agent_regex": "\\+https://kagi\\.com/bot",
"action": "ALLOW"
},
{
"name": "marginalia",
"user_agent_regex": "search\\.marginalia\\.nu",
"action": "ALLOW"
},
{
"name": "mojeekbot",
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
"action": "ALLOW"
},
{
"name": "us-artificial-intelligence-scraper",
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
"action": "DENY"
},
{
"name": "lightpanda",
"user_agent_regex": "^Lightpanda/.*$",
"action": "DENY"
},
{
"name": "headless-chrome",
"user_agent_regex": "HeadlessChrome",
"action": "DENY"
},
{
"name": "headless-chromium",
"user_agent_regex": "HeadlessChromium",
"action": "DENY"
},
{
"name": "imagesift",
"user_agent_regex": "\\+imagesift\\.com",
"action": "DENY"
},
{
"name": "imagesift",
"user_agent_regex": "\\+https\\://opensiteexplorer\\.org/dotbot",
"action": "DENY"
},
{
"name": "SemrushBot",
"user_agent_regex": "\\+http\\://www\\.semrush\\.com/bot\\.html",
"action": "DENY"
},
{
"name": "Facebook",
"user_agent_regex": "\\+https?://(?:www|developers).facebook.com",
"action": "DENY"
},
{
"name": "Bytedance",
"user_agent_regex": "Bytespider",
"action": "DENY"
},
{
"name": "MJ12Bot",
"user_agent_regex": "http://mj12bot\\.com/",
"action": "DENY"
},
{
"name": "Dataprovider.com",
"user_agent_regex": "Dataprovider\\.com",
"action": "DENY"
},
{
"name": "Dataprovider.com",
"user_agent_regex": "Dataprovider\\.com",
"action": "DENY"
},
{
"name": "BitSightBot",
"user_agent_regex": "BitSightBot",
"action": "DENY"
},
{
"name": "babbar.tech",
"user_agent_regex": "\\+http\\://babbar\\.tech/crawler",
"action": "DENY"
},
{
"name": "censys",
"user_agent_regex": "\\+https://about\\.censys\\.io/",
"action": "DENY"
},
{
"name": "censys",
"user_agent_regex": "\\+https://about\\.censys\\.io/",
"action": "DENY"
},
{
"name": "Baidu",
"user_agent_regex": "Baiduspider",
"action": "DENY"
},
{
"name": "Expanse",
"user_agent_regex": "scaninfo@paloaltonetworks\\.com",
"action": "DENY"
},
{
"name": "InternetMeasurement",
"user_agent_regex": "InternetMeasurement",
"action": "DENY"
},
{
"name": "OtherAI (ai.robots.txt)",
"user_agent_regex": "(AI2Bot|Ai2Bot\\-Dolma|Amazonbot|anthropic\\-ai|Applebot|Applebot\\-Extended|Brightbot\\ 1\\.0|Bytespider|CCBot|ChatGPT\\-User|Claude\\-Web|ClaudeBot|cohere\\-ai|cohere\\-training\\-data\\-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google\\-Extended|GoogleOther|GoogleOther\\-Image|GoogleOther\\-Video|GPTBot|iaskspider/2\\.0|ICC\\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\\ Bot|Meta\\-ExternalAgent|Meta\\-ExternalFetcher|OAI\\-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot\\-OCOB|SemrushBot\\-SWA|Sidetrade\\ indexer\\ bot|Timpibot|VelenPublicWebCrawler|Webzio\\-Extended|YouBot)",
"action": "DENY"
}
]
}