#!/usr/bin/env python3
import urllib.request
import urllib.parse
import re
import json
import os
import sys
import tempfile
import zipfile
import argparse
import concurrent.futures

BASE_URL = "https://yiffer.xyz"
PICS_BASE_URL = "https://pics.yiffer.xyz"

def fetch_html(url):
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'})
    try:
        with urllib.request.urlopen(req) as response:
            return response.read().decode('utf-8')
    except urllib.error.HTTPError as e:
        if e.code == 404:
            return None
        raise

def parse_turbo_stream(html):
    """Extracts and parses the Remix turbo-stream JSON structure."""
    if not html:
        return []
    
    enqueues = re.findall(r'streamController\.enqueue\((.*?)\);', html, re.DOTALL)
    arr = []
    for e in enqueues:
        try:
            # First loads decodes the JS string literal
            decoded_str = json.loads(e)
            # Second loads parses the JSON array string
            part_arr = json.loads(decoded_str)
            arr.extend(part_arr)
        except Exception:
            pass
    return arr

def browse_comics(page=1, search=None):
    """Browse or search comics from yiffer.xyz."""
    url = f"{BASE_URL}/browse?page={page}"
    if search:
        url += f"&search={urllib.parse.quote(search)}"
        
    print(f"Fetching: {url}")
    html = fetch_html(url)
    arr = parse_turbo_stream(html)

    try:
        comics_idx = arr.index("comicsAndAds")
        comics_arr = arr[comics_idx + 1]
    except ValueError:
        print("Error: Could not find comics listing in the stream.")
        return [], 0

    try:
        id_key = f"_{arr.index('id')}"
        name_key = f"_{arr.index('name')}"
        pages_key = f"_{arr.index('numberOfPages')}"
    except ValueError:
        print("Error: Could not locate required keys in the stream.")
        return [], 0
        
    total_comics = 0
    try:
        tc_idx = arr.index("totalNumComics")
        val = arr[tc_idx + 1]
        if isinstance(val, int):
            total_comics = val
    except ValueError:
        pass

    comics = []
    for ptr in comics_arr:
        item_obj = arr[ptr]
        if isinstance(item_obj, dict) and id_key in item_obj and name_key in item_obj:
            c_id = arr[item_obj[id_key]]
            c_name = arr[item_obj[name_key]]
            c_pages = arr[item_obj[pages_key]] if pages_key in item_obj else 0
            comics.append({"id": c_id, "name": c_name, "pages": c_pages})

    return comics, total_comics

def get_comic_data(comic_name):
    """Get metadata and page tokens for a specific comic."""
    url = f"{BASE_URL}/c/{urllib.parse.quote(comic_name)}"
    html = fetch_html(url)
    
    if not html:
        print(f"Comic '{comic_name}' not found.")
        return None
        
    arr = parse_turbo_stream(html)
    if not arr:
        print("Failed to parse comic stream.")
        return None

    try:
        id_idx = arr.index("id")
        comic_id = arr[id_idx + 1]
    except ValueError:
        print("Could not find comic ID.")
        return None

    try:
        name_idx = arr.index("name")
        comic_name_real = arr[name_idx + 1]
    except ValueError:
        comic_name_real = comic_name

    try:
        pages_idx = arr.index("pages")
        pages_arr = arr[pages_idx + 1]
    except ValueError:
        print("Could not find pages array.")
        return None

    try:
        token_str_idx = arr.index("token")
        token_key = f"_{token_str_idx}"
    except ValueError:
        print("Could not find token string in array.")
        return None

    tokens = []
    for ptr in pages_arr:
        page_obj = arr[ptr]
        if token_key in page_obj:
            target_ptr = page_obj[token_key]
            tokens.append(arr[target_ptr])

    return {
        "id": comic_id, 
        "name": comic_name_real, 
        "pages": tokens
    }

def download_image(url, output_path):
    """Download an image from the given URL and save it to output_path."""
    try:
        req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        with urllib.request.urlopen(req) as response:
            with open(output_path, 'wb') as f:
                f.write(response.read())
        return True, "Success"
    except Exception as e:
        return False, str(e)

def download_comic(comic_name, output_dir, max_workers=5):
    """Download all pages of a comic directly into a CBZ file."""
    print(f"Fetching metadata for '{comic_name}'...")
    comic_data = get_comic_data(comic_name)
    
    if not comic_data:
        return
        
    c_id = comic_data['id']
    c_name = comic_data['name']
    tokens = comic_data['pages']
    num_pages = len(tokens)
    
    print(f"Found comic: {c_name} (ID: {c_id}) with {num_pages} pages.")
    
    safe_name = re.sub(r'[\\/*?:"<>|]', "", c_name)
    os.makedirs(output_dir, exist_ok=True)
    cbz_filepath = os.path.join(output_dir, f"{safe_name}.cbz")
    
    if os.path.exists(cbz_filepath) and os.path.getsize(cbz_filepath) > 0:
        print(f"Skipping: {cbz_filepath} already exists.")
        return
        
    print(f"Downloading {num_pages} pages into temporary directory before zipping...")

    with tempfile.TemporaryDirectory() as tmp_dir:
        tasks = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            for i, token in enumerate(tokens, 1):
                url = f"{PICS_BASE_URL}/comics/{c_id}/{token}.jpg"
                filename = f"{i:03d}.jpg"
                filepath = os.path.join(tmp_dir, filename)
                
                tasks.append(
                    (i, num_pages, filename, executor.submit(download_image, url, filepath))
                )
                
            all_successful = True
            for i, num_pages, filename, future in tasks:
                success, msg = future.result()
                if success:
                    print(f"[{i:03d}/{num_pages:03d}] Downloaded: {filename}")
                else:
                    print(f"[{i:03d}/{num_pages:03d}] FAILED: {filename} - {msg}")
                    all_successful = False
                    
        if all_successful:
            print(f"Packaging into {cbz_filepath}...")
            with zipfile.ZipFile(cbz_filepath, 'w', zipfile.ZIP_STORED) as cbz:
                for i in range(1, num_pages + 1):
                    filename = f"{i:03d}.jpg"
                    filepath = os.path.join(tmp_dir, filename)
                    cbz.write(filepath, arcname=filename)
            print(f"\nDownload complete! Saved to {cbz_filepath}")
        else:
            print("\nDownload incomplete due to failures. Skipping CBZ creation.")


def main():
    parser = argparse.ArgumentParser(description="Scraper for Yiffer.xyz comics")
    subparsers = parser.add_subparsers(dest="command", help="Command to run")

    # Browse/Search Command
    cmd_browse = subparsers.add_parser("browse", help="Browse or search for comics")
    cmd_browse.add_argument("--page", type=int, default=1, help="Page number to view (default 1)")
    cmd_browse.add_argument("--search", type=str, default="", help="Search query")

    # Download Command
    cmd_download = subparsers.add_parser("download", help="Download a specific comic by name")
    cmd_download.add_argument("name", type=str, help="Exact name of the comic to download (case-insensitive, exact match on URL)")
    cmd_download.add_argument("--output", type=str, default="downloads", help="Output directory to save the comic (default 'downloads')")
    cmd_download.add_argument("--workers", type=int, default=5, help="Number of concurrent downloads (default 5)")

    # Bulk Download Command
    cmd_recent = subparsers.add_parser("download-recent", help="Download a batch of the most recent comics")
    cmd_recent.add_argument("--count", type=int, default=5, help="Number of recent comics to download (default 5)")
    cmd_recent.add_argument("--output", type=str, default="downloads", help="Output directory to save the comics")
    cmd_recent.add_argument("--workers", type=int, default=5, help="Number of concurrent downloads (default 5)")

    args = parser.parse_args()

    if args.command == "browse":
        print(f"--- Yiffer.xyz Browse (Page {args.page}) {'Search: ' + args.search if args.search else ''} ---")
        comics, total = browse_comics(args.page, args.search)
        total = int(total) if total else 0
        if total > 0:
            print(f"Total comics found: {total}")
        print("-" * 60)
        for i, c in enumerate(comics, 1):
            print(f"{i:2d}. {c['name']} ({c['pages']} pages) [ID: {c['id']}]")
        print("-" * 60)
    
    elif args.command == "download":
        download_comic(args.name, args.output, args.workers)

    elif args.command == "download-recent":
        # Always fetch page 1 to get the most recent ones
        print(f"Fetching the {args.count} most recent comics...")
        comics, _ = browse_comics(1, "")
        if not comics:
            print("Could not retrieve comics list.")
            return
            
        comics_to_download = comics[:args.count]
        for idx, comic in enumerate(comics_to_download, 1):
            print(f"\n--- [{idx}/{len(comics_to_download)}] Downloading: {comic['name']} ---")
            download_comic(comic['name'], args.output, args.workers)

    else:
        parser.print_help()

if __name__ == "__main__":
    main()