
#!/usr/bin/env python3
"""
Media verifier for #OpTanzania
- Scans a directory for images/videos
- Extracts EXIF (images), ffprobe (if available), computes SHA256
- Emits a CSV + JSONL with verification metadata
"""
import os, sys, json, csv, subprocess, argparse, hashlib, shutil
from datetime import datetime
from pathlib import Path

IMAGE_EXT = {".jpg",".jpeg",".png",".webp",".tiff",".bmp"}
VIDEO_EXT = {".mp4",".mov",".mkv",".webm",".avi",".m4v"}

def sha256_file(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            h.update(chunk)
    return h.hexdigest()

def exiftool_available():
    return shutil.which("exiftool") is not None

def ffprobe_available():
    return shutil.which("ffprobe") is not None

def exiftool_read(path):
    try:
        res = subprocess.run(["exiftool","-json",path], capture_output=True, text=True, check=True)
        data = json.loads(res.stdout)[0]
        return data
    except Exception as e:
        return {"exif_error": str(e)}

def ffprobe_read(path):
    try:
        res = subprocess.run(["ffprobe","-v","quiet","-print_format","json","-show_format","-show_streams",path],
                             capture_output=True, text=True, check=True)
        return json.loads(res.stdout)
    except Exception as e:
        return {"ffprobe_error": str(e)}

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--in", dest="inp", required=True, help="Input directory")
    ap.add_argument("--out", dest="out", required=True, help="Output directory for verified copies")
    ap.add_argument("--csv", dest="csv_path", default="processed/timeline_csv/verified_media.csv")
    args = ap.parse_args()

    os.makedirs(args.out, exist_ok=True)
    os.makedirs(os.path.dirname(args.csv_path), exist_ok=True)

    rows = []
    jsonl_path = os.path.splitext(args.csv_path)[0] + ".jsonl"
    with open(jsonl_path, "w", encoding="utf-8") as jlf:
        for root, _, files in os.walk(args.inp):
            for fn in files:
                p = Path(root) / fn
                ext = p.suffix.lower()
                if ext not in IMAGE_EXT | VIDEO_EXT:
                    continue
                meta = {
                    "path": str(p),
                    "sha256": sha256_file(p),
                    "verified_at": datetime.utcnow().isoformat() + "Z",
                    "type": "video" if ext in VIDEO_EXT else "image"
                }
                if ext in IMAGE_EXT and exiftool_available():
                    meta["exif"] = exiftool_read(str(p))
                if ext in VIDEO_EXT and ffprobe_available():
                    meta["ffprobe"] = ffprobe_read(str(p))
                # Copy to verified dir (by hash)
                out_name = meta["sha256"] + ext
                out_path = Path(args.out) / out_name
                if not out_path.exists():
                    with open(p, "rb") as src, open(out_path, "wb") as dst:
                        dst.write(src.read())
                rows.append([meta["path"], meta["type"], meta["sha256"]])
                jlf.write(json.dumps(meta) + "\n")

    with open(args.csv_path, "w", newline="", encoding="utf-8") as cf:
        cw = csv.writer(cf)
        cw.writerow(["path","type","sha256"])
        cw.writerows(rows)

    print(f"[verifier] Wrote {args.csv_path} and {jsonl_path}")
    print(f"[verifier] Verified files stored in {args.out}")

if __name__ == "__main__":
    main()
