import requests
import subprocess
import uuid
import os
import json

from bs4 import BeautifulSoup

def fetch_page_html(url: str) -> str:
    response = requests.get(url, timeout=10)
    response.raise_for_status()
    return response.text

def extract_text_from_html(html: str) -> str:
    soup = BeautifulSoup(html, "html.parser")
    return soup.get_text(separator="\n")
    
def fetch_page_html_scrapy(url: str) -> str:
    job_id = str(uuid.uuid4())
    output_file = f"/tmp/{job_id}.json"

    command = [
        "scrapy", "crawl", "funder",
        "-a", f"url={url}",
        "-o", output_file,
        "-t", "json"
    ]

    project_root = os.path.join(os.path.dirname(__file__), "funderspider")
    result = subprocess.run(command, cwd=project_root, capture_output=True, text=True)

    if result.returncode != 0:
        raise RuntimeError(f"Scrapy failed: {result.stderr}")

    with open(output_file, "r") as f:
        data = json.load(f)
    os.remove(output_file)

    if not data or 'html' not in data[0]:
        raise ValueError("Scrapy did not return valid HTML.")

    return data[0]['html']