# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# ====================================================
# CareerCompass Robots.txt
# Updated: 2026-03-26 | GEO-Optimized for AI Search
# ====================================================

# === Default: Allow all crawlers ===
User-agent: *
Allow: /
# Block old numeric career URLs (/career/1, /career/274, etc.)
# These redirect to slug-based URLs (/career/software-engineer) via JavaScript.
# Blocking them saves crawl budget and fixes "Page with redirect" in GSC.
Disallow: /career/0
Disallow: /career/1
Disallow: /career/2
Disallow: /career/3
Disallow: /career/4
Disallow: /career/5
Disallow: /career/6
Disallow: /career/7
Disallow: /career/8
Disallow: /career/9
Crawl-delay: 1

# === Google Search Crawlers ===
User-agent: Googlebot
Allow: /
# Block numeric career URLs for Googlebot too
Disallow: /career/0
Disallow: /career/1
Disallow: /career/2
Disallow: /career/3
Disallow: /career/4
Disallow: /career/5
Disallow: /career/6
Disallow: /career/7
Disallow: /career/8
Disallow: /career/9

User-agent: Googlebot-Smartphone
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Googlebot-Video
Allow: /

User-agent: Googlebot-News
Allow: /

# === Google Ads Crawlers ===
User-agent: AdsBot-Google
Allow: /

User-agent: AdsBot-Google-Mobile
Allow: /

# === Google AI & Gemini (Allow for AI Overviews) ===
User-agent: Google-Extended
Allow: /

# === OpenAI / ChatGPT (Allow for ChatGPT Search visibility) ===
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# === Anthropic / Claude ===
User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

# === Perplexity (Allow for Perplexity AI Search) ===
User-agent: PerplexityBot
Allow: /

# === Microsoft Bing / Copilot ===
User-agent: Bingbot
Allow: /

User-agent: msnbot
Allow: /

# === Block training-only crawlers (optional - blocks data harvesting without blocking AI search) ===
# Uncomment if you want to block Common Crawl (training dataset only):
# User-agent: CCBot
# Disallow: /

# === Sitemaps ===
Sitemap: https://www.uddisha.com/sitemap.xml
Sitemap: https://www.uddisha.com/sitemap-blogs.xml