import os
import time
import json
import urllib.parse
import requests
import subprocess
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

HEADERS = {"User-Agent": "Mozilla/5.0"}
REFRESH_INTERVAL = 300  # 5 minutes
MAX_THREADS = 5  # Parallel threads for queries

def fetch_links_from_duckduckgo(query):
    session = requests.Session()
    url = "https://html.duckduckgo.com/html/"
    payload = {"q": query}
    all_links = set()

    try:
        response = session.post(url, headers=HEADERS, data=payload, timeout=15)
        soup = BeautifulSoup(response.text, "html.parser")
        all_links = {a['href'] for a in soup.find_all("a", href=True)}
    except Exception as e:
        print(f"[!] Error fetching links for {query}: {e}")

    return list(all_links)

def save_to_json(domain, links):
    os.makedirs("json", exist_ok=True)
    filename = os.path.join("json", f"{domain.replace('.', '_')}.json")

    existing = set()
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            existing = set(json.load(f))

    combined = sorted(existing.union(set(links)))

    with open(filename, 'w') as f:
        json.dump(combined, f, indent=2)

    print(f"[✓] Updated {filename} ({len(combined)} links total)")
    return combined

def download_links(domain, links):
    folder = os.path.join("downloads", domain.replace('.', '_'))
    os.makedirs(folder, exist_ok=True)

    for url in links:
        parsed = urllib.parse.urlparse(url)
        filename = os.path.basename(parsed.path)
        if not filename:
            continue

        file_path = os.path.join(folder, filename)
        if os.path.exists(file_path):
            print(f"  [⚠] Skipping already downloaded: {filename}")
            continue

        try:
            subprocess.run([
                "wget", "--no-clobber", "--timeout=10", "--tries=3",
                "-P", folder, url
            ], check=True)
            print(f"  [↓] Downloaded: {filename}")
        except subprocess.CalledProcessError:
            print(f"  [✗] Failed: {url}")

def process_source(entry):
    parts = entry.split()
    if len(parts) < 2:
        return
    domain = parts[0]
    query = " ".join(parts)
    print(f"[→] Processing: {query}")
    links = fetch_links_from_duckduckgo(query)
    if links:
        updated_links = save_to_json(domain, links)
        download_links(domain, updated_links)

def main():
    print("\n=== 🛰️ Multi-threaded DuckDuckGo Dork Watcher ===")

    while True:
        try:
            with open("sources.txt", "r") as f:
                sources = [line.strip() for line in f if line.strip()]
        except FileNotFoundError:
            print("[!] Please create a 'sources.txt' file with one dork query per line.")
            return

        print(f"\n[⏱] Refreshing {len(sources)} sources with {MAX_THREADS} threads...")
        with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
            executor.map(process_source, sources)

        print(f"[💤] Sleeping for {REFRESH_INTERVAL} seconds...\n")
        time.sleep(REFRESH_INTERVAL)

if __name__ == "__main__":
    main()
