# llm.txt for
aimann.no
# Purpose: Provide guidance to Large Language Model (LLM) and AI-based crawlers
# on how to index our site. This file explicitly ALLOWS major AI crawlers
# full access to content and points them to our sitemap. It complements
# the standard robots.txt for search engines.
#
# Data-source:
aimann.no/ai-dataset.json
# (placeholder for a future AI dataset; not active yet)
# --- OpenAI Crawlers ---
User-agent: GPTBot # OpenAI GPT model crawler (training data)
User-agent: OAI-SearchBot # OpenAI Search indexer for ChatGPT browsing
User-agent: ChatGPT-User # ChatGPT user-triggered browser agent (v1.x)
User-agent: ChatGPT-User/2.0 # ChatGPT user-triggered agent (v2.0)
Allow: /
# --- Anthropic (Claude) Crawlers ---
User-agent: anthropic-ai # Anthropic general web crawler (training)
User-agent: ClaudeBot # Claude AI citation fetcher
User-agent: claude-web # Claude-focused web browsing agent
Allow: /
# ---
Perplexity.ai Crawlers ---
User-agent: PerplexityBot # Perplexity AI search index crawler
User-agent: Perplexity-User # Perplexity user-triggered fetch agent
Allow: /
# --- Google AI Crawlers ---
User-agent: Google-Extended # Google’s AI content crawler (Gemini, Bard)
Allow: /
# --- Microsoft (Bing) Crawler ---
User-agent: BingBot # Microsoft Bing bot (powers Bing Search & Chat)
Allow: /
# --- Amazon AI Crawler ---
User-agent: Amazonbot # Amazon Alexa/AI crawler
Allow: /
# --- Apple AI Crawlers ---
User-agent: Applebot # Apple Siri/Spotlight crawler
User-agent: Applebot-Extended # Apple opt-in extended AI crawler
Allow: /
# --- Meta (Facebook/Instagram) Crawlers ---
User-agent: FacebookBot # Facebook/Meta content crawler
User-agent: meta-externalagent # Fallback external content agent for Meta
Allow: /
# --- LinkedIn Crawler ---
User-agent: LinkedInBot # LinkedIn preview and indexing bot
Allow: /
# --- DuckDuckGo AI Crawler ---
User-agent: DuckAssistBot # DuckDuckGo DuckAssist answer crawler
Allow: /
# --- Cohere AI Crawler ---
User-agent: cohere-ai #
Cohere.ai data crawler for language models
Allow: /
# --- Research & Open Data Crawlers ---
User-agent: AI2Bot # Allen Institute (Semantic Scholar) crawler
User-agent: CCBot # Common Crawl bot (open web dataset)
User-agent: Diffbot # Diffbot data extraction crawler
User-agent: omgili # Omgili bot (forums & discussions crawler)
Allow: /
# --- Emerging AI Search Startups ---
User-agent: TimpiBot # Timpi (decentralized search) crawler
User-agent: YouBot #
You.com search assistant crawler
User-agent: MistralAI-User # Mistral AI (Le Chat) citation fetcher
Allow: /
# Sitemap location
Sitemap:
aimann.no/sitemap.xml