From 12f0057cf55652cec33c26c8a4262552bfea7631 Mon Sep 17 00:00:00 2001 From: Remilia Da Costa Faro Date: Tue, 25 Mar 2025 17:41:51 -0400 Subject: [PATCH] Added ai.robots.txt rules for traffic filtering --- kakigoori/traffic_rules.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kakigoori/traffic_rules.json b/kakigoori/traffic_rules.json index 35bb1e3..c25aeba 100644 --- a/kakigoori/traffic_rules.json +++ b/kakigoori/traffic_rules.json @@ -119,6 +119,11 @@ "name": "InternetMeasurement", "user_agent_regex": "InternetMeasurement", "action": "DENY" + }, + { + "name": "OtherAI (ai.robots.txt)", + "user_agent_regex": "(AI2Bot|Ai2Bot\\-Dolma|Amazonbot|anthropic\\-ai|Applebot|Applebot\\-Extended|Brightbot\\ 1\\.0|Bytespider|CCBot|ChatGPT\\-User|Claude\\-Web|ClaudeBot|cohere\\-ai|cohere\\-training\\-data\\-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google\\-Extended|GoogleOther|GoogleOther\\-Image|GoogleOther\\-Video|GPTBot|iaskspider/2\\.0|ICC\\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\\ Bot|Meta\\-ExternalAgent|Meta\\-ExternalFetcher|OAI\\-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot\\-OCOB|SemrushBot\\-SWA|Sidetrade\\ indexer\\ bot|Timpibot|VelenPublicWebCrawler|Webzio\\-Extended|YouBot)", + "action": "DENY" } ] } \ No newline at end of file