"""Agentic RAG Platform — Hugging Face Spaces Demo.

This is a standalone Gradio app that showcases the Agentic RAG Platform.
When a backend API is available, it connects to it for live responses.
Otherwise, it runs in demo mode with pre-built sample responses.
"""

import os
import json
import gradio as gr

# ── Configuration ────────────────────────────────────────────────────────────
API_BASE_URL = os.getenv("API_BASE_URL", "")
GITHUB_URL = "https://github.com/rishu2207/agentic-rag-platform"
PORTFOLIO_URL = "https://rishu2207.github.io/agentic-rag-platform"

# ── Pre-built demo responses (used when no live backend is available) ──────
DEMO_RESPONSES = {
    "What is retrieval-augmented generation and when is it a better choice than fine-tuning?": {
        "answer": (
            "**Retrieval-Augmented Generation (RAG)** is a framework that enhances "
            "large language models by retrieving relevant documents from an external "
            "knowledge base before generating a response.\n\n"
            "### Key Advantages over Fine-tuning:\n\n"
            "1. **No retraining needed** — RAG can access up-to-date information "
            "without expensive model retraining cycles.\n"
            "2. **Grounded answers** — Responses are anchored to retrieved source "
            "documents, reducing hallucination.\n"
            "3. **Domain adaptability** — Swap the retrieval corpus to adapt to new "
            "domains instantly.\n"
            "4. **Cost-effective** — Avoids GPU-intensive fine-tuning while maintaining "
            "high-quality outputs.\n\n"
            "### When to choose RAG over fine-tuning:\n"
            "- Your knowledge base changes frequently\n"
            "- You need traceable, citation-backed answers\n"
            "- You have limited compute for training\n"
            "- Domain coverage is broad but shallow\n\n"
            "**Sources:** Lewis et al. (2020) *\"Retrieval-Augmented Generation for "
            "Knowledge-Intensive NLP Tasks\"* [arXiv:2005.11401]"
        ),
        "reasoning_steps": [
            {"node": "guardrail", "detail": "Score: 95/100 — on-topic AI/ML research question"},
            {"node": "retrieve", "detail": "Hybrid search returned 5 chunks (BM25 + kNN with RRF)"},
            {"node": "grade_documents", "detail": "4/5 chunks graded as relevant"},
            {"node": "generate", "detail": "Generated grounded answer with 1 citation"},
        ],
        "sources": ["https://arxiv.org/abs/2005.11401", "https://arxiv.org/abs/2312.10997"],
    },
    "How does reciprocal rank fusion combine BM25 and dense-vector scores?": {
        "answer": (
            "**Reciprocal Rank Fusion (RRF)** is a simple yet effective method for "
            "combining ranked lists from different retrieval systems.\n\n"
            "### How it works:\n\n"
            "Given a document *d* appearing at rank *r* in a ranked list, the RRF "
            "score is:\n\n"
            "```\n"
            "RRF(d) = Σ 1 / (k + r_i)\n"
            "```\n\n"
            "where *k* is a constant (typically 60) and *r_i* is the rank of document "
            "*d* in the *i*-th ranking.\n\n"
            "### In this platform:\n"
            "1. **BM25** retrieves top-k by keyword relevance\n"
            "2. **k-NN** retrieves top-k by cosine similarity (Jina v3 1024-dim)\n"
            "3. **RRF pipeline** in OpenSearch merges both rankings\n"
            "4. The fused list is returned as the final retrieval result\n\n"
            "### Limitations:\n"
            "- RRF ignores the actual scores, only using ranks\n"
            "- Equal weighting may not be optimal for all query types\n"
            "- The constant *k* is a hyperparameter that needs tuning\n\n"
            "**Sources:** Cormack et al. (2009) *\"Reciprocal Rank Fusion outperforms "
            "Condorcet and individual Rank Learning Methods\"*"
        ),
        "reasoning_steps": [
            {"node": "guardrail", "detail": "Score: 92/100 — valid information retrieval question"},
            {"node": "retrieve", "detail": "Hybrid search returned 5 chunks"},
            {"node": "grade_documents", "detail": "5/5 chunks graded as relevant"},
            {"node": "generate", "detail": "Generated detailed answer with formula"},
        ],
        "sources": ["https://arxiv.org/abs/2210.11934"],
    },
}

DEFAULT_ANSWER = (
    "⚠️ **Demo Mode** — This Space is running without a live backend.\n\n"
    "In production, this query would be processed by the full Agentic RAG pipeline:\n\n"
    "1. **Guardrail** validates the query is in-scope\n"
    "2. **Hybrid search** retrieves passages via BM25 + dense vectors\n"
    "3. **Document grading** filters irrelevant chunks\n"
    "4. **Query rewriting** retries when evidence is weak\n"
    "5. **Answer generation** produces grounded responses with citations\n\n"
    "👉 Try one of the **example questions** below for a full demo response!\n\n"
    f"🔗 [View the full project on GitHub]({GITHUB_URL})"
)


def find_closest_demo(query: str) -> dict | None:
    """Find the closest matching demo response."""
    q = query.lower().strip()
    for key, val in DEMO_RESPONSES.items():
        if any(word in q for word in key.lower().split()[:4]):
            return val
    return None


async def ask_question(
    query: str,
    top_k: int = 5,
    use_hybrid: bool = True,
):
    """Process a question — live API if available, otherwise demo mode."""
    if not query.strip():
        return "Please enter a question.", "", ""

    # Try live API first
    if API_BASE_URL:
        try:
            import httpx
            payload = {
                "query": query,
                "top_k": top_k,
                "use_hybrid": use_hybrid,
            }
            async with httpx.AsyncClient(timeout=60.0) as client:
                res = await client.post(
                    f"{API_BASE_URL}/ask-agentic", json=payload
                )
                if res.status_code == 200:
                    data = res.json()
                    answer = data.get("answer", "No answer returned.")
                    sources = "\n".join(
                        f"- [{s.split('/')[-1]}]({s})"
                        for s in data.get("sources", [])
                    ) or "No sources."
                    reasoning = "\n".join(
                        f"**{step.get('node', '?')}**: {step.get('detail', '')}"
                        for step in data.get("reasoning_steps", [])
                    ) or "No reasoning steps."
                    return answer, sources, reasoning
        except Exception:
            pass  # Fall through to demo mode

    # Demo mode
    demo = find_closest_demo(query)
    if demo:
        answer = demo["answer"]
        sources = "\n".join(
            f"- [{s.split('/')[-1]}]({s})" for s in demo.get("sources", [])
        )
        reasoning = "\n".join(
            f"**{step['node']}**: {step['detail']}"
            for step in demo.get("reasoning_steps", [])
        )
        return answer, sources, reasoning

    return DEFAULT_ANSWER, "Demo mode — no live sources", "Demo mode — pipeline not connected"


# ── Gradio Interface ─────────────────────────────────────────────────────────
DESCRIPTION = """
# 🔬 Agentic RAG Platform

**Production-grade AI research assistant** over ArXiv cs.AI papers.

This demo showcases the project architecture. Try the example questions below
for sample agentic RAG responses, or connect a live backend for real-time answers.

| Component | Technology |
|-----------|-----------|
| **Backend** | FastAPI + Python 3.12 |
| **Agent** | LangGraph (7-node state machine) |
| **Search** | OpenSearch hybrid (BM25 + kNN + RRF) |
| **LLM** | Ollama / OpenAI (pluggable) |
| **Frontend** | Next.js 14 + this Gradio demo |
| **Observability** | Langfuse v3 |

"""

ARTICLE = f"""
---
**Links:** [GitHub Repository]({GITHUB_URL}) · [Portfolio Site]({PORTFOLIO_URL}) · [LinkedIn](https://www.linkedin.com/in/rishu2207/)

Built by **Rishabh Sharma** — MIT License
"""

with gr.Blocks(
    title="Agentic RAG Platform",
    theme=gr.themes.Soft(
        primary_hue=gr.themes.colors.purple,
        secondary_hue=gr.themes.colors.teal,
        neutral_hue=gr.themes.colors.slate,
    ),
) as demo:
    gr.Markdown(DESCRIPTION)

    with gr.Row():
        with gr.Column(scale=3):
            query_input = gr.Textbox(
                label="🔍 Your Research Question",
                placeholder="Ask about AI/ML research...",
                lines=2,
                max_lines=4,
            )
        with gr.Column(scale=1):
            submit_btn = gr.Button("Ask Agent →", variant="primary", size="lg")

    with gr.Accordion("⚙️ Advanced Options", open=False):
        with gr.Row():
            top_k = gr.Slider(1, 10, value=5, step=1, label="Chunks to retrieve")
            use_hybrid = gr.Checkbox(value=True, label="Hybrid search (BM25 + vectors)")

    with gr.Row():
        with gr.Column(scale=2):
            answer_output = gr.Markdown(label="📝 Answer", value="*Ask a question to get started!*")
        with gr.Column(scale=1):
            sources_output = gr.Markdown(label="📚 Sources", value="")
            reasoning_output = gr.Markdown(label="🧠 Agent Reasoning", value="")

    gr.Examples(
        examples=[
            ["What is retrieval-augmented generation and when is it a better choice than fine-tuning?"],
            ["How does reciprocal rank fusion combine BM25 and dense-vector scores?"],
            ["What are the latest self-correcting or agentic RAG workflows?"],
            ["How do you evaluate retrieval quality and faithfulness of a RAG system?"],
        ],
        inputs=[query_input],
    )

    submit_btn.click(
        fn=ask_question,
        inputs=[query_input, top_k, use_hybrid],
        outputs=[answer_output, sources_output, reasoning_output],
    )
    query_input.submit(
        fn=ask_question,
        inputs=[query_input, top_k, use_hybrid],
        outputs=[answer_output, sources_output, reasoning_output],
    )

    gr.Markdown(ARTICLE)

if __name__ == "__main__":
    demo.launch()