import os
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import urllib.parse
import time
import re
from urllib.parse import urljoin, urlparse
import sys
from datetime import datetime

# Matrix-themed interface
def matrix_print(text, speed=0.05):
    for char in text:
        print(char, end='', flush=True)
        time.sleep(speed)
    print()

def clear_screen():
    os.system('clear')

def display_banner():
    banner = """
    ╔════════════════════════════════════════════╗
    ║        MATRIX WEB SCRAPER v1.1             ║
    ║        Powered by xAI Technology           ║
    ╚════════════════════════════════════════════╝
    """
    clear_screen()
    matrix_print(banner, 0.01)

# Search engine configuration (only Yandex)
SEARCH_ENGINE = {'name': 'Yandex', 'base_url': 'https://yandex.com/search/?text='}

# Available file type dorks
DORKS = {
    '1': 'pdf',
    '2': 'doc',
    '3': 'docx',
    '4': 'jpeg',
    '5': 'gif',
    '6': 'mp4'
}

# Headers for requests
ua = UserAgent()
HEADERS = {'User-Agent': ua.random}

def get_user_input():
    display_banner()
    matrix_print("Enter the target domain (e.g., example.com):")
    domain = input("> ").strip().lower()
    if not domain:
        matrix_print("Error: Domain cannot be empty.")
        sys.exit(1)

    matrix_print("\nSelect file type(s) (comma-separated, e.g., 1,2,3):")
    matrix_print("1. PDF\n2. DOC\n3. DOCX\n4. JPEG\n5. GIF\n6. MP4")
    dork_choices = input("> ").strip().split(',')
    dorks = [DORKS.get(choice.strip(), None) for choice in dork_choices]
    dorks = [d for d in dorks if d is not None]
    if not dorks:
        matrix_print("Error: No valid file types selected.")
        sys.exit(1)

    return domain, dorks

def create_directory_structure(domain, file_types):
    base_path = f"/storage/emulated/0/Download/{domain}"
    if not os.path.exists(base_path):
        os.makedirs(base_path)
    for file_type in file_types:
        file_path = os.path.join(base_path, file_type)
        if not os.path.exists(file_path):
            os.makedirs(file_path)
    return base_path

def resolve_url(url):
    try:
        response = requests.head(url, headers=HEADERS, allow_redirects=True, timeout=5)
        return response.url
    except requests.RequestException:
        return url

def scrape_urls(query, domain, max_pages=10):
    urls = set()
    page = 1
    while page <= max_pages:
        try:
            search_url = f"{SEARCH_ENGINE['base_url']}{urllib.parse.quote(query)}&page={page}"
            matrix_print(f"Scraping Yandex page {page}...")
            response = requests.get(search_url, headers=HEADERS, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            page_urls = set()
            for link in soup.find_all('a', href=True):
                href = link['href']
                if domain in urlparse(href).netloc:
                    resolved_url = resolve_url(href)
                    if resolved_url:
                        page_urls.add(resolved_url)
            if not page_urls:  # No new URLs found, stop pagination
                matrix_print(f"No more results found on page {page}.")
                break
            urls.update(page_urls)
            matrix_print(f"Found {len(page_urls)} URLs on page {page}")
            page += 1
            time.sleep(2)  # Delay to avoid rate limiting
        except Exception as e:
            matrix_print(f"Error scraping Yandex page {page}: {str(e)}")
            break
    return urls

def download_file(url, base_path, file_type):
    try:
        response = requests.get(url, headers=HEADERS, timeout=10, stream=True)
        if response.status_code == 200:
            filename = os.path.basename(urlparse(url).path)
            if not filename:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = f"file_{timestamp}.{file_type}"
            file_path = os.path.join(base_path, file_type, filename)
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
            matrix_print(f"Downloaded: {filename}")
        else:
            matrix_print(f"Failed to download {url}: Status {response.status_code}")
    except Exception as e:
        matrix_print(f"Error downloading {url}: {str(e)}")

def main():
    domain, dorks = get_user_input()
    base_path = create_directory_structure(domain, dorks)
    matrix_print(f"\nScraping files for {domain} using Yandex...")

    for dork in dorks:
        query = f"site:{domain} filetype:{dork}"
        matrix_print(f"\nSearching for {dork.upper()} files...")
        all_urls = scrape_urls(query, domain)
        matrix_print(f"\nTotal unique URLs for {dork.upper()}: {len(all_urls)}")
        for url in all_urls:
            if url.lower().endswith(f".{dork}"):
                download_file(url, base_path, dork)

    matrix_print("\nScraping and downloading completed.")
    matrix_print(f"Files saved in: /storage/emulated/0/Download/{domain}")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        matrix_print("\nOperation cancelled by user.")
        sys.exit(0)
    except Exception as e:
        matrix_print(f"Fatal error: {str(e)}")
        sys.exit(1)
