Removed traffic filtering, now handled by Anubis
This commit is contained in:
@@ -34,7 +34,6 @@ INSTALLED_APPS = [
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
"kakigoori.traffic_filtering.TrafficFiltering",
|
||||
"django.middleware.security.SecurityMiddleware",
|
||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||
"django.middleware.common.CommonMiddleware",
|
||||
|
@@ -1,71 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
from django.http import HttpRequest, HttpResponseForbidden
|
||||
|
||||
|
||||
class TrafficRuleAction(Enum):
|
||||
DENY = "DENY"
|
||||
ALLOW = "ALLOW"
|
||||
NO_ACTION = "NO_ACTION"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrafficRule:
|
||||
name: str
|
||||
user_agent_regex: re.Pattern
|
||||
action: TrafficRuleAction
|
||||
|
||||
def test_rule(self, request: HttpRequest):
|
||||
user_agent = request.META.get("HTTP_USER_AGENT") or None
|
||||
if not user_agent:
|
||||
return TrafficRuleAction.DENY
|
||||
|
||||
print(user_agent)
|
||||
|
||||
if self.user_agent_regex.search(user_agent):
|
||||
return self.action
|
||||
|
||||
return TrafficRuleAction.NO_ACTION
|
||||
|
||||
|
||||
class TrafficFiltering:
|
||||
traffic_rules = []
|
||||
|
||||
def __init__(self, get_response):
|
||||
self.get_response = get_response
|
||||
|
||||
with open(
|
||||
os.path.join(Path(__file__).resolve().parent, "traffic_rules.json")
|
||||
) as f:
|
||||
traffic_rules_json = json.load(f)
|
||||
|
||||
for rule in traffic_rules_json["rules"]:
|
||||
# noinspection PyTypeChecker
|
||||
self.traffic_rules.append(
|
||||
TrafficRule(
|
||||
rule["name"],
|
||||
re.compile(rule["user_agent_regex"]),
|
||||
TrafficRuleAction[rule["action"]],
|
||||
)
|
||||
)
|
||||
|
||||
def __call__(self, request: HttpRequest):
|
||||
for traffic_rule in self.traffic_rules:
|
||||
print(f"Checking for {traffic_rule.name}")
|
||||
action = traffic_rule.test_rule(request)
|
||||
print(action)
|
||||
match action:
|
||||
case TrafficRuleAction.DENY:
|
||||
return HttpResponseForbidden()
|
||||
case TrafficRuleAction.ALLOW:
|
||||
break
|
||||
case TrafficRuleAction.NO_ACTION:
|
||||
continue
|
||||
|
||||
response = self.get_response(request)
|
||||
return response
|
@@ -1,129 +0,0 @@
|
||||
{
|
||||
"rules": [
|
||||
{
|
||||
"name": "Amazonbot",
|
||||
"user_agent_regex": "Amazonbot",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "googlebot",
|
||||
"user_agent_regex": "\\+http://www\\.google\\.com/bot\\.html",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "kagi",
|
||||
"user_agent_regex": "\\+https://kagi\\.com/bot",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "marginalia",
|
||||
"user_agent_regex": "search\\.marginalia\\.nu",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "mojeekbot",
|
||||
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "us-artificial-intelligence-scraper",
|
||||
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "lightpanda",
|
||||
"user_agent_regex": "^Lightpanda/.*$",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "headless-chrome",
|
||||
"user_agent_regex": "HeadlessChrome",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "headless-chromium",
|
||||
"user_agent_regex": "HeadlessChromium",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "imagesift",
|
||||
"user_agent_regex": "\\+imagesift\\.com",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "imagesift",
|
||||
"user_agent_regex": "\\+https\\://opensiteexplorer\\.org/dotbot",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "SemrushBot",
|
||||
"user_agent_regex": "\\+http\\://www\\.semrush\\.com/bot\\.html",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Facebook",
|
||||
"user_agent_regex": "\\+https?://(?:www|developers).facebook.com",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Bytedance",
|
||||
"user_agent_regex": "Bytespider",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "MJ12Bot",
|
||||
"user_agent_regex": "http://mj12bot\\.com/",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Dataprovider.com",
|
||||
"user_agent_regex": "Dataprovider\\.com",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Dataprovider.com",
|
||||
"user_agent_regex": "Dataprovider\\.com",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "BitSightBot",
|
||||
"user_agent_regex": "BitSightBot",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "babbar.tech",
|
||||
"user_agent_regex": "\\+http\\://babbar\\.tech/crawler",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "censys",
|
||||
"user_agent_regex": "\\+https://about\\.censys\\.io/",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "censys",
|
||||
"user_agent_regex": "\\+https://about\\.censys\\.io/",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Baidu",
|
||||
"user_agent_regex": "Baiduspider",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "Expanse",
|
||||
"user_agent_regex": "scaninfo@paloaltonetworks\\.com",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "InternetMeasurement",
|
||||
"user_agent_regex": "InternetMeasurement",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "OtherAI (ai.robots.txt)",
|
||||
"user_agent_regex": "(AI2Bot|Ai2Bot\\-Dolma|Amazonbot|anthropic\\-ai|Applebot|Applebot\\-Extended|Brightbot\\ 1\\.0|Bytespider|CCBot|ChatGPT\\-User|Claude\\-Web|ClaudeBot|cohere\\-ai|cohere\\-training\\-data\\-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google\\-Extended|GoogleOther|GoogleOther\\-Image|GoogleOther\\-Video|GPTBot|iaskspider/2\\.0|ICC\\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\\ Bot|Meta\\-ExternalAgent|Meta\\-ExternalFetcher|OAI\\-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot\\-OCOB|SemrushBot\\-SWA|Sidetrade\\ indexer\\ bot|Timpibot|VelenPublicWebCrawler|Webzio\\-Extended|YouBot)",
|
||||
"action": "DENY"
|
||||
}
|
||||
]
|
||||
}
|
Reference in New Issue
Block a user