Removed traffic filtering, now handled by Anubis
This commit is contained in:
@@ -34,7 +34,6 @@ INSTALLED_APPS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
"kakigoori.traffic_filtering.TrafficFiltering",
|
|
||||||
"django.middleware.security.SecurityMiddleware",
|
"django.middleware.security.SecurityMiddleware",
|
||||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||||
"django.middleware.common.CommonMiddleware",
|
"django.middleware.common.CommonMiddleware",
|
||||||
|
@@ -1,71 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from enum import Enum
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from django.http import HttpRequest, HttpResponseForbidden
|
|
||||||
|
|
||||||
|
|
||||||
class TrafficRuleAction(Enum):
|
|
||||||
DENY = "DENY"
|
|
||||||
ALLOW = "ALLOW"
|
|
||||||
NO_ACTION = "NO_ACTION"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TrafficRule:
|
|
||||||
name: str
|
|
||||||
user_agent_regex: re.Pattern
|
|
||||||
action: TrafficRuleAction
|
|
||||||
|
|
||||||
def test_rule(self, request: HttpRequest):
|
|
||||||
user_agent = request.META.get("HTTP_USER_AGENT") or None
|
|
||||||
if not user_agent:
|
|
||||||
return TrafficRuleAction.DENY
|
|
||||||
|
|
||||||
print(user_agent)
|
|
||||||
|
|
||||||
if self.user_agent_regex.search(user_agent):
|
|
||||||
return self.action
|
|
||||||
|
|
||||||
return TrafficRuleAction.NO_ACTION
|
|
||||||
|
|
||||||
|
|
||||||
class TrafficFiltering:
|
|
||||||
traffic_rules = []
|
|
||||||
|
|
||||||
def __init__(self, get_response):
|
|
||||||
self.get_response = get_response
|
|
||||||
|
|
||||||
with open(
|
|
||||||
os.path.join(Path(__file__).resolve().parent, "traffic_rules.json")
|
|
||||||
) as f:
|
|
||||||
traffic_rules_json = json.load(f)
|
|
||||||
|
|
||||||
for rule in traffic_rules_json["rules"]:
|
|
||||||
# noinspection PyTypeChecker
|
|
||||||
self.traffic_rules.append(
|
|
||||||
TrafficRule(
|
|
||||||
rule["name"],
|
|
||||||
re.compile(rule["user_agent_regex"]),
|
|
||||||
TrafficRuleAction[rule["action"]],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __call__(self, request: HttpRequest):
|
|
||||||
for traffic_rule in self.traffic_rules:
|
|
||||||
print(f"Checking for {traffic_rule.name}")
|
|
||||||
action = traffic_rule.test_rule(request)
|
|
||||||
print(action)
|
|
||||||
match action:
|
|
||||||
case TrafficRuleAction.DENY:
|
|
||||||
return HttpResponseForbidden()
|
|
||||||
case TrafficRuleAction.ALLOW:
|
|
||||||
break
|
|
||||||
case TrafficRuleAction.NO_ACTION:
|
|
||||||
continue
|
|
||||||
|
|
||||||
response = self.get_response(request)
|
|
||||||
return response
|
|
@@ -1,129 +0,0 @@
|
|||||||
{
|
|
||||||
"rules": [
|
|
||||||
{
|
|
||||||
"name": "Amazonbot",
|
|
||||||
"user_agent_regex": "Amazonbot",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "googlebot",
|
|
||||||
"user_agent_regex": "\\+http://www\\.google\\.com/bot\\.html",
|
|
||||||
"action": "ALLOW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "kagi",
|
|
||||||
"user_agent_regex": "\\+https://kagi\\.com/bot",
|
|
||||||
"action": "ALLOW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "marginalia",
|
|
||||||
"user_agent_regex": "search\\.marginalia\\.nu",
|
|
||||||
"action": "ALLOW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "mojeekbot",
|
|
||||||
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
|
|
||||||
"action": "ALLOW"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "us-artificial-intelligence-scraper",
|
|
||||||
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "lightpanda",
|
|
||||||
"user_agent_regex": "^Lightpanda/.*$",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "headless-chrome",
|
|
||||||
"user_agent_regex": "HeadlessChrome",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "headless-chromium",
|
|
||||||
"user_agent_regex": "HeadlessChromium",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "imagesift",
|
|
||||||
"user_agent_regex": "\\+imagesift\\.com",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "imagesift",
|
|
||||||
"user_agent_regex": "\\+https\\://opensiteexplorer\\.org/dotbot",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SemrushBot",
|
|
||||||
"user_agent_regex": "\\+http\\://www\\.semrush\\.com/bot\\.html",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Facebook",
|
|
||||||
"user_agent_regex": "\\+https?://(?:www|developers).facebook.com",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Bytedance",
|
|
||||||
"user_agent_regex": "Bytespider",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MJ12Bot",
|
|
||||||
"user_agent_regex": "http://mj12bot\\.com/",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Dataprovider.com",
|
|
||||||
"user_agent_regex": "Dataprovider\\.com",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Dataprovider.com",
|
|
||||||
"user_agent_regex": "Dataprovider\\.com",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "BitSightBot",
|
|
||||||
"user_agent_regex": "BitSightBot",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "babbar.tech",
|
|
||||||
"user_agent_regex": "\\+http\\://babbar\\.tech/crawler",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "censys",
|
|
||||||
"user_agent_regex": "\\+https://about\\.censys\\.io/",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "censys",
|
|
||||||
"user_agent_regex": "\\+https://about\\.censys\\.io/",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Baidu",
|
|
||||||
"user_agent_regex": "Baiduspider",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Expanse",
|
|
||||||
"user_agent_regex": "scaninfo@paloaltonetworks\\.com",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "InternetMeasurement",
|
|
||||||
"user_agent_regex": "InternetMeasurement",
|
|
||||||
"action": "DENY"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "OtherAI (ai.robots.txt)",
|
|
||||||
"user_agent_regex": "(AI2Bot|Ai2Bot\\-Dolma|Amazonbot|anthropic\\-ai|Applebot|Applebot\\-Extended|Brightbot\\ 1\\.0|Bytespider|CCBot|ChatGPT\\-User|Claude\\-Web|ClaudeBot|cohere\\-ai|cohere\\-training\\-data\\-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google\\-Extended|GoogleOther|GoogleOther\\-Image|GoogleOther\\-Video|GPTBot|iaskspider/2\\.0|ICC\\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\\ Bot|Meta\\-ExternalAgent|Meta\\-ExternalFetcher|OAI\\-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot\\-OCOB|SemrushBot\\-SWA|Sidetrade\\ indexer\\ bot|Timpibot|VelenPublicWebCrawler|Webzio\\-Extended|YouBot)",
|
|
||||||
"action": "DENY"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
Reference in New Issue
Block a user