{
  "description": "Weekly benchmark scan targets. 50 domains selected for long-term agent-readiness tracking. Edited annually.",
  "lastReviewed": "2026-04-20",
  "sources": {
    "tranco_top": "40 domains sampled from the Tranco Top 1M list (tranco-list.eu), filtered to scan-suitable sites (excludes CDNs, login-walled, and asset-only hosts).",
    "curated_agent_relevant": "10 domains curated for agent / AI infrastructure relevance."
  },
  "domains": [
    "google.com",
    "youtube.com",
    "wikipedia.org",
    "amazon.com",
    "reddit.com",
    "linkedin.com",
    "bing.com",
    "yahoo.com",
    "duckduckgo.com",
    "microsoft.com",
    "apple.com",
    "twitter.com",
    "instagram.com",
    "pinterest.com",
    "nytimes.com",
    "bbc.com",
    "cnn.com",
    "theguardian.com",
    "washingtonpost.com",
    "reuters.com",
    "bloomberg.com",
    "wsj.com",
    "medium.com",
    "substack.com",
    "quora.com",
    "imdb.com",
    "netflix.com",
    "spotify.com",
    "ebay.com",
    "walmart.com",
    "target.com",
    "adobe.com",
    "salesforce.com",
    "zoom.us",
    "slack.com",
    "notion.so",
    "figma.com",
    "dropbox.com",
    "atlassian.com",
    "shopify.com",
    "anthropic.com",
    "openai.com",
    "github.com",
    "stackoverflow.com",
    "vercel.com",
    "supabase.com",
    "huggingface.co",
    "langchain.com",
    "modelcontextprotocol.io",
    "everyailaw.com"
  ],
  "excluded_patterns": [
    "*.gstatic.com — CDN, no public content",
    "facebook.com — aggressively blocks bots, unstable scans",
    "instagram.com — note: currently included; review if scans fail repeatedly",
    "tiktok.com — content lives behind app walls",
    "x.com — rebrand-in-flux, redirect chains"
  ]
}
