#!/usr/bin/env python3
"""
reliability_sim.py - Proof that AI reliability compounds DOWNWARD over a chain of tasks.

A "job" = a chain of N independent tasks. Let AI run the whole job autonomously and the
job only succeeds when EVERY task succeeds: P(job) = r**N. That collapses fast.

A Tallyfy-style control layer gates each task: a checkpoint catches a failure and re-runs
it, up to a few attempts. Per-task success climbs to 1-(1-r)**attempts, so the job stays
reliable even as N grows.

Pure standard library. Seeded, so anyone re-running it gets the same numbers.
"""
import random

random.seed(42)
TRIALS = 100_000


def chain_success(n, r, trials=TRIALS):
    """Autonomous chain: the job succeeds only if all n tasks succeed."""
    wins = 0
    for _ in range(trials):
        if all(random.random() < r for _ in range(n)):
            wins += 1
    return wins / trials


def gated_success(n, r, attempts=3, trials=TRIALS):
    """Gated chain: each task gets up to `attempts` tries before the job fails."""
    wins = 0
    for _ in range(trials):
        ok = True
        for _ in range(n):
            if not any(random.random() < r for _ in range(attempts)):
                ok = False
                break
        if ok:
            wins += 1
    return wins / trials


R = 0.90
print(f"Per-task AI reliability: {R:.0%}    Trials per row: {TRIALS:,}\n")
print("AI alone, chained end-to-end  (one failed task kills the whole job)")
print(f"{'tasks':>6}   {'predicted r^n':>14}   {'simulated':>10}")
for n in (1, 3, 5, 10, 20):
    print(f"{n:>6}   {R ** n:>13.1%}   {chain_success(n, R):>10.1%}")

print("\nWith per-task checkpoints + retry  (the Tallyfy pattern, up to 3 tries/task)")
for n in (10, 20):
    print(f"{n:>6} tasks   job success {gated_success(n, R):.1%}")

print("\nTakeaway: a 10-step job run blind is a coin flip (~35%).")
print("Define, track and gate each task and the same job clears ~99%.")