{
  "description": "Benchmark and capability claims for Claude Fable 5 / Mythos 5, each traceable to Anthropic's 2026-06-09 announcement. Chart URLs are Anthropic's originals.",
  "announcement": "https://www.anthropic.com/news/claude-fable-5-mythos-5",
  "overview_chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/1e65982497d7d4891219ed0e83141625a291b860-2600x2870.png",
  "claims": [
    {"domain": "software_engineering", "claim": "Stripe: 50-million-line Ruby codebase migration completed in one day (team estimate: two months)", "source": "announcement"},
    {"domain": "software_engineering", "claim": "Highest score among frontier models on Cognition FrontierCode, at medium effort", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/d3c3efe0e8ab310856368cee2b2161439db6676a-1920x1080.png", "chart_2": "https://www-cdn.anthropic.com/images/4zrzovbb/website/036229d8f9be9a5a911dbbd863b3c6cc09a79a70-3840x2160.png"},
    {"domain": "knowledge_work", "claim": "Hebbia: first model to break 90% on core analytics benchmark — a 10-point jump over Opus; highest score on Hebbia Finance Benchmark"},
    {"domain": "knowledge_work", "claim": "IMC: aced trading-analysis evaluations nearly across the board"},
    {"domain": "vision", "claim": "Completed Pokemon FireRed vision-only with a minimal harness; rebuilds web-app source from screenshots; extracts precise numbers from scientific figures"},
    {"domain": "memory_long_context", "claim": "With file-based memory in Slay the Spire: performance improved 3x more than Opus 4.8; reached final act 3x more often"},
    {"domain": "physics_research", "claim": "Notation Capital: strongest model on frontier physics research using one third of the reasoning tokens; in 36 hours reached near GPT-5.5's four-day result"},
    {"domain": "vibe_coding", "claim": "Vibe: highest-performing model on ViBench, nearly saturating base use cases"},
    {"domain": "mythos_drug_design", "claim": "Mythos 5: ~10x acceleration on aspects of drug design; 9 of 14 protein targets yielded strong candidates", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/6a97019c4d8ea13fdd7200455f6dd9e8c267ba0b-1920x1080.png"},
    {"domain": "mythos_molecular_biology", "claim": "Scientists preferred Mythos hypotheses over Opus-class ~80% of the time in blinded comparisons; one E. coli mechanism hypothesis corroborated independently"},
    {"domain": "mythos_genomics", "claim": "Week-long autonomous run: single-cell data for millions of cells across 138 species; custom model beat a recent Science publication while 100x smaller"},
    {"domain": "mythos_biology_evals", "claim": "Outperformed dedicated protein language models on AAV property prediction using biological reasoning alone", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/3437ad5c0853a7bd273ed5e56289a4f38dcd9731-3840x2160.png"}
  ],
  "safety": [
    {"claim": "Safeguard classifiers (cyber, bio/chem, distillation) trigger on <5% of sessions; fallback to Opus 4.8 when triggered", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/faf941fe1ebfd09139d39b8e4ad9048121979284-3840x2160.png"},
    {"claim": "No universal jailbreaks in 1,000+ external bug-bounty hours; zero compliance with harmful single-turn requests across 30 public jailbreak techniques", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/6bede3f6101d15bd899922917ea6246adda4515b-1920x1080.png"},
    {"claim": "Mythos 5 misalignment (incl. deception) measured low, similar to Opus 4.8", "chart": "https://www-cdn.anthropic.com/images/4zrzovbb/website/2502a0daf85b741641cff36757d7243ef48f8be8-3840x2160.png"}
  ],
  "last_verified": "2026-06-10"
}
