# robots.txt for https://renovativ.ai
# See https://www.robotstxt.org/robotstxt.html
# See RFC 9309: https://www.rfc-editor.org/rfc/rfc9309

# Allow all standard crawlers
User-agent: *
Allow: /
Disallow: /api/
Disallow: /admin/
Disallow: /private/

# Sitemap location
Sitemap: https://renovativ.ai/sitemap.xml

# ============================================
# AI Crawler Rules
# ============================================

# OpenAI GPTBot - https://platform.openai.com/docs/gptbot
User-agent: GPTBot
Allow: /
Disallow: /api/

# OpenAI SearchBot - https://platform.openai.com/docs/bots
User-agent: OAI-SearchBot
Allow: /

# Anthropic Claude-Web - https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web
User-agent: Claude-Web
Allow: /
Disallow: /api/

# Google Extended (AI training) - https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers
User-agent: Google-Extended
Allow: /
Disallow: /api/

# PerplexityBot - https://docs.perplexity.ai/docs/perplexity-crawlers
User-agent: PerplexityBot
Allow: /

# Bing AI - https://blogs.bing.com/webmaster/2023/09/29/how-to-control-ai-crawlers
User-agent: BingPreview
User-agent: Bingbot
Allow: /

# Common Crawl - https://commoncrawl.org/big-picture/frequently-asked-questions/
User-agent: CCBot
Allow: /

# Facebook/Meta - https://developers.facebook.com/docs/sharing/webmasters/crawler/
User-agent: FacebookBot
Allow: /

# ============================================
# Content Signals (https://contentsignals.org/)
# ============================================
# ai-train: Whether content may be used for AI training
# search: Whether content may be used for search indexing
# ai-input: Whether content may be used as AI input/context

Content-Signal: ai-train=yes, search=yes, ai-input=yes