{"bench":"chakravyuh-bench-v0","ranked_by":"f1","n_entries":3,"entries":[{"method":"Chakravyuh-Qwen2.5-LoRA v2","submitter":"chakravyuh-team","detection":0.993,"fpr":0.067,"f1":0.99,"bench_version":"chakravyuh-bench-v0","n_evaluated":174,"artifact_url":"https://huggingface.co/ujjwalpardeshi/chakravyuh-analyzer-lora-v2","notes":"Bootstrap 95% CIs at logs/bootstrap_v2.json: detection [0.979, 1.000], FPR [0.000, 0.167], F1 [0.976, 1.000].","submitted_at":"2026-04-25T10:49:50+00:00","seeded":true},{"method":"Chakravyuh-Qwen2.5-LoRA v1 (reward-hacked)","submitter":"chakravyuh-team","detection":1.0,"fpr":0.36,"f1":0.96,"bench_version":"chakravyuh-bench-v0","n_evaluated":135,"artifact_url":"https://github.com/UjjwalPardeshi/Chakravyuh/blob/main/logs/eval_v2.json","notes":"Diagnostic baseline: this is the textbook reward-hacking fingerprint. Kept on the board to motivate v2.","submitted_at":"2026-04-25T10:49:50+00:00","seeded":true},{"method":"Scripted baseline (rule-based)","submitter":"chakravyuh-team","detection":0.701,"fpr":0.29,"f1":0.795,"bench_version":"chakravyuh-bench-v0","n_evaluated":174,"artifact_url":"https://github.com/UjjwalPardeshi/Chakravyuh/blob/main/data/chakravyuh-bench-v0/baselines.json","notes":"11-signal taxonomy + legit-SMS allowlist. Threshold = 0.50.","submitted_at":"2026-04-25T10:49:50+00:00","seeded":true}]}