import requests, csv, os, time, random, subprocess
from urllib.parse import unquote
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Safari/537.36"
}

def banner():
    print(r"""
╔══════════════════════════════════════════════╗
║     🛡️  JEDI SECURITY - DORK ENGINE 2.0 🛡️     ║
╠══════════════════════════════════════════════╣
║ Engine:     Yahoo (Multithreaded)            ║
║ Functions:  Filetype Dork + Auto Downloader  ║
║ Built by:   Sue // JediSec                   ║
╚══════════════════════════════════════════════╝
""")

def extract_real_url(href):
    try:
        if "RU=" in href:
            start = href.index("RU=") + 3
            return unquote(href[start:].split("/RK=")[0])
    except:
        return None
    return None

def fetch_page(domain, filetype, keyword, page):
    results = set()
    query = f"site:{domain} filetype:{filetype} {keyword}".strip()
    offset = page * 10 + 1
    base_url = "https://search.yahoo.com/search"
    params = {"p": query, "b": str(offset)}

    try:
        res = requests.get(base_url, headers=HEADERS, params=params, timeout=10)
        soup = BeautifulSoup(res.text, "html.parser")
        anchors = soup.find_all("a", href=True)
    except Exception as e:
        print(f"[!] Error page {page+1}: {e}")
        return results

    for a in anchors:
        href = a["href"]
        if href.startswith("https://r.search.yahoo.com/") and f".{filetype}" in href:
            real = extract_real_url(href)
            if real and real.endswith(f".{filetype}"):
                results.add(real)

    print(f"[+] Page {page+1}: {len(results)} links")
    return results

def yahoo_dork_multithread(domain, filetype, keyword, max_pages=50, threads=10):
    all_results = set()
    with ThreadPoolExecutor(max_workers=threads) as executor:
        futures = [executor.submit(fetch_page, domain, filetype, keyword, p) for p in range(max_pages)]
        for future in as_completed(futures):
            page_links = future.result()
            all_results.update(page_links)
    return all_results

def save_csv(domain, urls):
    fn = f"{domain.replace('.', '_')}.csv"
    existing = set()
    if os.path.exists(fn):
        with open(fn, newline="") as f:
            reader = csv.reader(f)
            for row in reader:
                if row: existing.add(row[0])

    combined = existing.union(urls)
    with open(fn, "w", newline="") as f:
        writer = csv.writer(f)
        for url in sorted(combined):
            writer.writerow([url])

    print(f"\n[✓] Saved {len(urls)} new URLs.")
    print(f"[📁] File: {fn} — Total: {len(combined)}\n")

def download_file(url, output_dir, failed_log):
    try:
        local_name = os.path.join(output_dir, os.path.basename(url.split("?")[0]))
        if os.path.exists(local_name): return
        r = requests.get(url, stream=True, timeout=15)
        if r.status_code != 200:
            raise Exception(f"HTTP {r.status_code}")
        with open(local_name, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"[⬇] {os.path.basename(local_name)}")
    except Exception as e:
        try:
            cmd = f'wget -q -O "{local_name}" "{url}"'
            subprocess.run(cmd, shell=True, timeout=20)
            if os.path.exists(local_name):
                print(f"[⬇][wget] {os.path.basename(local_name)}")
                return
        except:
            pass
        print(f"[X] Failed: {url} ({e})")
        with open(failed_log, "a") as fail_log:
            fail_log.write(url + "\n")

def start_downloads(csv_path, threads=10):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    domain = os.path.basename(csv_path).split(".")[0]
    output_dir = os.path.join("downloads", f"{domain}_{timestamp}")
    failed_log = os.path.join(output_dir, "failed_downloads.txt")

    os.makedirs(output_dir, exist_ok=True)
    urls = []
    with open(csv_path, newline='') as f:
        reader = csv.reader(f)
        for row in reader:
            if row: urls.append(row[0])

    print(f"[📦] Starting threaded downloads of {len(urls)} files into {output_dir}/\n")

    with ThreadPoolExecutor(max_workers=threads) as executor:
        futures = [executor.submit(download_file, url, output_dir, failed_log) for url in urls]
        for _ in as_completed(futures): pass

    print(f"\n[✓] Downloads complete. Files in: {output_dir}/")
    if os.path.exists(failed_log):
        print(f"[⚠] Failed links logged in: {failed_log}")

    ask_zip = input("🗜️  Zip downloaded folder? (y/n): ").strip().lower()
    if ask_zip == "y":
        zip_name = f"{output_dir}.zip"
        subprocess.run(["zip", "-r", zip_name, output_dir])
        print(f"[📦] Zipped as: {zip_name}")

def main():
    banner()
    domain   = input(" 🌐 Domain (e.g., nasa.gov): ").strip()
    filetype = input(" 📄 Filetype (e.g., pdf, docx): ").strip()
    keyword  = input(" 🔍 Keyword (optional): ").strip()
    pages    = int(input(" 📑 Pages to scrape (e.g., 50): ").strip() or 25)
    threads  = int(input(" ⚙️ Threads (e.g., 10): ").strip() or 10)

    print("\n[🔎] Launching multithreaded dorker...\n")
    results = yahoo_dork_multithread(domain, filetype, keyword, pages, threads)

    if results:
        save_csv(domain, results)
        ask = input(" ⬇ Download files from CSV? (y/n): ").strip().lower()
        if ask == "y":
            csv_path = f"{domain.replace('.', '_')}.csv"
            start_downloads(csv_path, threads=threads)
    else:
        print("⚠ No results found.")

if __name__ == "__main__":
    main()
