From f0fd5f530dcf3fcaadafa0b9c178ece274eb3c69 Mon Sep 17 00:00:00 2001 From: ErrorOliver Date: Thu, 6 Nov 2025 20:47:57 +0000 Subject: [PATCH] AI crawl block --- src/webpage/robots.txt | 67 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/src/webpage/robots.txt b/src/webpage/robots.txt index 08bfbaa..f0bd4a5 100644 --- a/src/webpage/robots.txt +++ b/src/webpage/robots.txt @@ -1,4 +1,69 @@ -User-agent: * +# Block all known AI crawlers and assistants +# from using content for training AI models. +# Source: https://robotstxt.com/ai +User-Agent: GPTBot +User-Agent: ClaudeBot +User-Agent: Claude-User +User-Agent: Claude-SearchBot +User-Agent: CCBot +User-Agent: Google-Extended +User-Agent: Applebot-Extended +User-Agent: Facebookbot +User-Agent: Meta-ExternalAgent +User-Agent: Meta-ExternalFetcher +User-Agent: diffbot +User-Agent: PerplexityBot +User-Agent: Perplexity‑User +User-Agent: Omgili +User-Agent: Omgilibot +User-Agent: webzio-extended +User-Agent: ImagesiftBot +User-Agent: Bytespider +User-Agent: TikTokSpider +User-Agent: Amazonbot +User-Agent: Youbot +User-Agent: SemrushBot-OCOB +User-Agent: Petalbot +User-Agent: VelenPublicWebCrawler +User-Agent: TurnitinBot +User-Agent: Timpibot +User-Agent: OAI-SearchBot +User-Agent: ICC-Crawler +User-Agent: AI2Bot +User-Agent: AI2Bot-Dolma +User-Agent: DataForSeoBot +User-Agent: AwarioBot +User-Agent: AwarioSmartBot +User-Agent: AwarioRssBot +User-Agent: Google-CloudVertexBot +User-Agent: PanguBot +User-Agent: Kangaroo Bot +User-Agent: Sentibot +User-Agent: img2dataset +User-Agent: Meltwater +User-Agent: Seekr +User-Agent: peer39_crawler +User-Agent: cohere-ai +User-Agent: cohere-training-data-crawler +User-Agent: DuckAssistBot +User-Agent: Scrapy +User-Agent: Cotoyogi +User-Agent: aiHitBot +User-Agent: Factset_spyderbot +User-Agent: FirecrawlAgent + +Disallow: / +DisallowAITraining: / + +# Block any non-specified AI crawlers (e.g., new +# or unknown bots) from using content for training +# AI models, while allowing the website to be +# indexed and accessed by bots. These directives +# are still experimental and may not be supported +# by all AI crawlers. +User-Agent: * +DisallowAITraining: / +Content-Usage: ai=n Disallow: /channel/ Allow: /invite/ Allow: /oauth2/