Each target repository carries a manifest pinned to a commit SHA. An is_vulnerable: false entry is a false-positive trap; acceptable_cwes absorbs reasonable CWE ambiguity.
ground-truth.json — example entries
{
"schema_version": "1.0",
"repo_id": "realvuln-pygoat",
"repo_url": "https://github.com/adeyosemanputra/pygoat",
"commit_sha": "a1b2c3…", // pinned: prevents ground-truth drift
"type": 1, "language": "python", "framework": "django",
"authorship": "human_authored",
"authorship_confidence": "high",
"authorship_evidence": "pre-LLM project, established 2018",
"findings": [
{
"id": "pygoat-014", "is_vulnerable": true,
"vulnerability_class": "sql_injection",
"primary_cwe": "CWE-89",
"acceptable_cwes": ["CWE-89", "CWE-564", "CWE-943"],
"file": "introduction/views.py",
"location": { "start_line": 42, "end_line": 48, "function": "sql_lab" },
"severity": "high",
"evidence": { "source": "manual_review", "cve_id": null,
"description": "SQL injection via unsanitized parameter" }
},
{
"id": "pygoat-fp-003", "is_vulnerable": false, // false-positive trap
"vulnerability_class": "sql_injection",
"primary_cwe": "CWE-89",
"evidence": { "source": "manual_review",
"description": "ORM filter() — auto-parameterized, safe" }
}
]
}