import requests, os, threading, json, csv, re
from bs4 import BeautifulSoup
from tqdm import tqdm
from fake_useragent import UserAgent
from urllib.parse import quote_plus, urlparse
from concurrent.futures import ThreadPoolExecutor

# === CONFIGURATION === #
DORKS = [
    'site:.mil filetype:pdf',
    'site:.gov filetype:xls',
    'intitle:"index of" "confidential"',
    'site:raytheon.com filetype:docx',
]
RESULTS_PER_DORK = 100
THREADS = 10
DOWNLOAD_FILES = True
ALLOWED_TYPES = ['pdf', 'doc', 'docx', 'xls', 'xlsx', 'zip']
EXPORT_DIR = 'output'
DOWNLOAD_DIR = os.path.join(EXPORT_DIR, 'downloads')
HEADERS = {'User-Agent': UserAgent().random}
# ====================== #

os.makedirs(DOWNLOAD_DIR, exist_ok=True)
found_urls = set()
export_data = []

def bing_search(dork):
    for start in range(0, RESULTS_PER_DORK, 10):
        q = quote_plus(dork)
        url = f"https://www.bing.com/search?q={q}&first={start}"
        try:
            res = requests.get(url, headers=HEADERS, timeout=10)
            soup = BeautifulSoup(res.text, 'html.parser')
            for a in soup.find_all('a', href=True):
                link = a['href']
                if re.search(r'\.(' + '|'.join(ALLOWED_TYPES) + r')($|\?)', link, re.IGNORECASE):
                    found_urls.add(link)
        except Exception as e:
            print(f"[ERROR] Bing query failed: {e}")

def download_file(url):
    try:
        r = requests.get(url, headers=HEADERS, timeout=15, stream=True)
        if r.status_code == 200:
            ext = url.split('.')[-1].split('?')[0]
            domain = urlparse(url).netloc.replace('.', '_')
            folder = os.path.join(DOWNLOAD_DIR, domain, ext)
            os.makedirs(folder, exist_ok=True)
            filename = os.path.join(folder, url.split('/')[-1].split('?')[0])
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
    except Exception as e:
        print(f"[DL FAIL] {url} - {e}")

# === START === #
print("[🔍] Starting Bing Dork Scan...")
for dork in tqdm(DORKS, desc="Dorking"):
    bing_search(dork)

found_urls = list(found_urls)
print(f"[✔️] Found {len(found_urls)} unique files.")

# Export results
with open(os.path.join(EXPORT_DIR, 'results.csv'), 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['URL'])
    for url in found_urls:
        writer.writerow([url])

with open(os.path.join(EXPORT_DIR, 'results.json'), 'w') as jf:
    json.dump({'results': found_urls}, jf, indent=2)

# Download
if DOWNLOAD_FILES:
    print(f"[⬇️] Downloading {len(found_urls)} files...")
    with ThreadPoolExecutor(max_workers=THREADS) as executor:
        list(tqdm(executor.map(download_file, found_urls), total=len(found_urls)))

print("[🏁] Done.")