{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Harvested Plays Catalog — Parquet Schema",
  "description": "Schema for the harvested-plays.parquet dataset. One row per harvested Solution Play.",
  "license": "CC0-1.0",
  "columns": [
    { "name": "play_slug", "type": "string", "description": "Solution Play slug (e.g. enterprise-rag)" },
    { "name": "variety", "type": "string", "description": "Cloud variety (azure, aws, gcp, multi-cloud)" },
    { "name": "source_repo", "type": "string", "description": "GitHub repo URL the play was harvested from" },
    { "name": "upstream_commit_sha", "type": "string", "description": "Upstream commit SHA at harvest time (40-char hex)" },
    { "name": "pipeline_version", "type": "string", "description": "Harvest pipeline version used (e.g. 1.0.0)" },
    { "name": "confidence_aggregate", "type": "float", "description": "Aggregate confidence score (0.0–1.0)" },
    { "name": "llm_steps_count", "type": "int32", "description": "Number of LLM extraction + scaffolding steps" },
    { "name": "avm_modules_count", "type": "int32", "description": "Number of AVM modules in the composed infrastructure" },
    { "name": "waf_aggregate_score", "type": "float", "nullable": true, "description": "WAF compliance aggregate score (null if not run)" },
    { "name": "cost_estimate_usd", "type": "float", "nullable": true, "description": "Monthly cost estimate in USD (null if not computed)" },
    { "name": "harvested_at", "type": "timestamp", "description": "ISO 8601 timestamp of harvest completion" },
    { "name": "primary_cloud", "type": "string", "description": "Primary cloud provider detected in repo facts" },
    { "name": "domain", "type": "string", "description": "Domain classification (e.g. rag, microservices, data-platform)" }
  ]
}
