#!/usr/bin/env python3
"""
Twitter In-Depth Tweet Analysis

Fetches a tweet with top 25 comments and generates a markdown report.

Usage:
    twitter-analyze <tweet_url_or_id>

Examples:
    twitter-analyze https://x.com/karpathy/status/2004607146781278521
    twitter-analyze 2004607146781278521

Output:
    analyses/@username-YYYY-MM-DD-first-words-of-tweet.md
"""

import sys
import os
import re
import asyncio
import argparse
import time
import random
from datetime import datetime, timezone
from pathlib import Path

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

# Activate venv
venv_path = Path(__file__).parent.parent / "venv" / "lib" / "python3.12" / "site-packages"
sys.path.insert(0, str(venv_path))

from twikit import Client
from twikit.errors import TooManyRequests
import filetype

BASE_DIR = Path(__file__).parent.parent
COOKIES_FILE = BASE_DIR / "PHASES" / "PHASE-01-INGEST" / "cookies.json"
ANALYSES_DIR = BASE_DIR / "analyses"


def extract_tweet_id(input_str):
    """Extract tweet ID from URL or return as-is if already an ID."""
    match = re.search(r'/status/(\d+)', input_str)
    if match:
        return match.group(1)
    if input_str.isdigit():
        return input_str
    raise ValueError(f"Cannot extract tweet ID from: {input_str}")


def sanitize_for_filename(text, max_length=30):
    """Create safe filename from text."""
    clean = re.sub(r'[^\w\s-]', '', text.lower())
    clean = re.sub(r'\s+', '-', clean)
    return clean[:max_length].rstrip('-')


def create_session_with_retries():
    """Create requests session with retry logic."""
    session = requests.Session()
    retry = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=(429, 500, 502, 503, 504),
        allowed_methods=["GET"],
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    session.headers.update({"User-Agent": "twitter-pipeline/1.0"})
    return session


def detect_extension(content):
    """Detect image format from content bytes."""
    kind = filetype.guess(content)
    if kind:
        return f".{kind.extension}"
    return ".jpg"


def download_image(session, url, filepath_base):
    """Download image with proper extension detection."""
    try:
        response = session.get(url, timeout=30)
        response.raise_for_status()
        content = response.content

        ext = detect_extension(content)
        filepath = filepath_base.with_suffix(ext)

        with open(filepath, 'wb') as f:
            f.write(content)

        return True, filepath, None
    except Exception as e:
        return False, None, f"{type(e).__name__}: {e}"


async def call_with_rate_limit_handling(awaitable_factory, max_retries=3):
    """
    Wrap twikit calls with rate limit handling.
    On TooManyRequests, wait until reset time + jitter, then retry.
    """
    for attempt in range(1, max_retries + 1):
        try:
            return await awaitable_factory()
        except TooManyRequests as e:
            if attempt == max_retries:
                raise

            now = int(time.time())
            reset = getattr(e, 'rate_limit_reset', None)

            if reset and reset > now:
                wait = reset - now + random.randint(10, 30)
            else:
                wait = 15 * 60 + random.randint(10, 60)

            print(f"  Rate limited! Waiting {wait}s (attempt {attempt}/{max_retries})...")
            await asyncio.sleep(wait)

    return None


async def analyze_tweet(tweet_id):
    """Fetch tweet and comments, generate markdown report."""
    client = Client('en-US')
    client.load_cookies(str(COOKIES_FILE))

    # Create session for image downloads
    session = create_session_with_retries()

    print(f"Fetching tweet {tweet_id}...")

    # Get tweet details
    try:
        tweets = await call_with_rate_limit_handling(
            lambda: client.get_tweets_by_ids([tweet_id])
        )
        if not tweets:
            print("Error: Tweet not found")
            sys.exit(1)
        tweet = tweets[0]
    except Exception as e:
        print(f"Error fetching tweet: {e}")
        sys.exit(1)

    username = tweet.user.screen_name
    name = tweet.user.name
    content = tweet.text

    # Parse date with proper UTC conversion
    dt = getattr(tweet, 'created_at_datetime', None)
    if dt:
        # Convert to UTC
        dt_utc = dt.astimezone(timezone.utc)
        date_iso = dt_utc.strftime('%Y-%m-%d')
        date_full = dt_utc.strftime('%B %d, %Y at %H:%M UTC')
    else:
        # Fallback: parse created_at string
        try:
            dt = datetime.strptime(tweet.created_at, '%a %b %d %H:%M:%S %z %Y')
            dt_utc = dt.astimezone(timezone.utc)
            date_iso = dt_utc.strftime('%Y-%m-%d')
            date_full = dt_utc.strftime('%B %d, %Y at %H:%M UTC')
        except:
            date_iso = datetime.now(timezone.utc).strftime('%Y-%m-%d')
            date_full = tweet.created_at

    # Generate filename
    first_words = sanitize_for_filename(content)
    filename = f"@{username}-{date_iso}-{first_words}.md"
    output_path = ANALYSES_DIR / filename

    # Create images directory for this analysis
    images_dir = ANALYSES_DIR / "images" / f"{username}-{tweet_id}"
    images_dir.mkdir(parents=True, exist_ok=True)

    print(f"Fetching replies...")

    # Get replies using conversation_id search
    replies = []
    try:
        query = f"conversation_id:{tweet_id}"
        results = await call_with_rate_limit_handling(
            lambda: client.search_tweet(query, 'Top', count=20)
        )

        if results:
            # Collect replies (excluding the original tweet)
            for reply in results:
                if reply.id != tweet_id:
                    replies.append(reply)

            # Try to get more if needed
            if len(replies) < 25 and hasattr(results, 'next'):
                try:
                    more_results = await call_with_rate_limit_handling(
                        lambda r=results: r.next()
                    )
                    if more_results:
                        for reply in more_results:
                            if reply.id != tweet_id and len(replies) < 25:
                                replies.append(reply)
                except:
                    pass

        print(f"  Found {len(replies)} replies")
    except Exception as e:
        print(f"  Could not fetch replies: {e}")

    # Sort replies by likes (descending)
    replies.sort(key=lambda x: x.favorite_count, reverse=True)
    replies = replies[:25]  # Keep top 25

    # Download tweet images
    downloaded_images = []
    has_video = False

    if tweet.media:
        print(f"Downloading {len(tweet.media)} media files...")
        for idx, media in enumerate(tweet.media[:4], 1):
            media_type = type(media).__name__
            if media_type == 'Photo':
                filename_base = f"{username}-tweetid-{tweet_id}-image{idx}"
                filepath_base = images_dir / filename_base

                success, final_path, error = download_image(session, media.media_url, filepath_base)
                if success:
                    downloaded_images.append({
                        'filename': final_path.name,
                        'path': str(final_path),
                        'url': media.media_url
                    })
                else:
                    print(f"  Failed to download: {error}")
            elif media_type in ('Video', 'AnimatedGif'):
                has_video = True

    # Build markdown
    md = []
    md.append(f"# @{username} - {date_iso}\n")
    md.append(f"**Author:** {name} ([@{username}](https://x.com/{username}))\n")
    md.append(f"**Posted:** {date_full}\n")
    md.append(f"**Tweet URL:** https://x.com/{username}/status/{tweet_id}\n")
    md.append("")

    md.append("## Original Tweet\n")
    content_lines = content.split('\n')
    for line in content_lines:
        md.append(f"> {line}")
    md.append("")

    md.append(f"**Stats:** {tweet.favorite_count:,} likes · {tweet.retweet_count:,} retweets · {tweet.reply_count:,} replies\n")
    md.append("")

    # Media section
    md.append("## Media\n")
    if downloaded_images:
        md.append(f"**Images:** {len(downloaded_images)}\n")
        for img in downloaded_images:
            md.append(f"- `{img['filename']}`")
            md.append(f"  - Path: `{img['path']}`")
            md.append(f"  - URL: {img['url']}")
            md.append(f"  - **Description:** [TO BE FILLED BY CLAUDE]")
            md.append("")
    elif has_video:
        md.append("**Video:** Yes (not downloaded)\n")
    else:
        md.append("**Media:** None\n")
    md.append("")

    md.append("---\n")

    # Top comments
    md.append(f"## Top Comments ({len(replies)} of {tweet.reply_count:,} replies)\n")
    md.append("*Sorted by likes. Note: Replies to replies not included (future enhancement).*\n")
    md.append("")

    if replies:
        for idx, reply in enumerate(replies, 1):
            md.append(f"### {idx}. @{reply.user.screen_name} ({reply.favorite_count:,} likes)\n")
            reply_content = reply.text.replace('\n', '\n> ')
            md.append(f"> {reply_content}\n")
            md.append("")
    else:
        md.append("*No replies fetched or available.*\n")

    md.append("---\n")
    md.append(f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}*\n")

    # Write file
    ANALYSES_DIR.mkdir(exist_ok=True)
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(md))

    print(f"\nDone! Output: {output_path}")
    print(f"  - {len(downloaded_images)} images downloaded to {images_dir}")
    print(f"  - {len(replies)} comments included")

    return output_path


def main():
    parser = argparse.ArgumentParser(description='In-depth tweet analysis')
    parser.add_argument('tweet', help='Tweet URL or ID')

    args = parser.parse_args()

    try:
        tweet_id = extract_tweet_id(args.tweet)
    except ValueError as e:
        print(f"Error: {e}")
        sys.exit(1)

    print(f"Twitter In-Depth Analysis")
    print(f"  Tweet ID: {tweet_id}")
    print()

    asyncio.run(analyze_tweet(tweet_id))


if __name__ == '__main__':
    main()