#!/usr/bin/env python3
"""
Chandra OCR 웹 데모
파일을 업로드하면 문서를 분석해서 JSON 또는 HTML로 결과를 보여줍니다.
"""

import json
import tempfile
from pathlib import Path
from flask import Flask, request, render_template_string, jsonify
from bs4 import BeautifulSoup
import subprocess
import os

app = Flask(__name__)

# HTML 템플릿
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>Chandra OCR Demo</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            background-color: white;
            padding: 30px;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        h1 {
            color: #333;
            border-bottom: 2px solid #4CAF50;
            padding-bottom: 10px;
        }
        .upload-section {
            margin: 20px 0;
            padding: 20px;
            border: 2px dashed #ccc;
            border-radius: 4px;
            text-align: center;
        }
        input[type="file"] {
            margin: 10px 0;
        }
        button {
            background-color: #4CAF50;
            color: white;
            padding: 10px 20px;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
        }
        button:hover {
            background-color: #45a049;
        }
        .results {
            margin-top: 30px;
        }
        .tab {
            overflow: hidden;
            border: 1px solid #ccc;
            background-color: #f1f1f1;
        }
        .tab button {
            background-color: inherit;
            float: left;
            border: none;
            outline: none;
            cursor: pointer;
            padding: 14px 16px;
            transition: 0.3s;
            color: black;
        }
        .tab button:hover {
            background-color: #ddd;
        }
        .tab button.active {
            background-color: #4CAF50;
            color: white;
        }
        .tabcontent {
            display: none;
            padding: 20px;
            border: 1px solid #ccc;
            border-top: none;
        }
        pre {
            background-color: #f4f4f4;
            padding: 15px;
            border-radius: 4px;
            overflow-x: auto;
        }
        .html-preview {
            border: 1px solid #ddd;
            padding: 15px;
            background-color: white;
        }
        .loading {
            display: none;
            text-align: center;
            padding: 20px;
        }
        .spinner {
            border: 4px solid #f3f3f3;
            border-top: 4px solid #4CAF50;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
            margin: 0 auto;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>🔍 Chandra OCR Demo</h1>
        <p>PDF 또는 이미지 파일을 업로드하면 문서를 분석해서 JSON 및 HTML 형태로 변환합니다.</p>

        <div class="upload-section">
            <h3>파일 업로드</h3>
            <form id="uploadForm" enctype="multipart/form-data">
                <input type="file" name="file" id="fileInput" accept=".pdf,.png,.jpg,.jpeg" required>
                <br><br>
                <button type="submit">분석 시작</button>
            </form>
        </div>

        <div class="loading" id="loading">
            <div class="spinner"></div>
            <p>문서를 분석하는 중입니다. 잠시만 기다려주세요...</p>
        </div>

        <div class="results" id="results" style="display: none;">
            <h2>분석 결과</h2>
            <div class="tab">
                <button class="tablinks active" onclick="openTab(event, 'JSON')">구조화된 JSON</button>
                <button class="tablinks" onclick="openTab(event, 'HTML')">HTML 미리보기</button>
                <button class="tablinks" onclick="openTab(event, 'Markdown')">Markdown</button>
                <button class="tablinks" onclick="openTab(event, 'Metadata')">메타데이터</button>
            </div>

            <div id="JSON" class="tabcontent" style="display: block;">
                <pre id="jsonContent"></pre>
            </div>

            <div id="HTML" class="tabcontent">
                <div class="html-preview" id="htmlContent"></div>
            </div>

            <div id="Markdown" class="tabcontent">
                <pre id="markdownContent"></pre>
            </div>

            <div id="Metadata" class="tabcontent">
                <pre id="metadataContent"></pre>
            </div>
        </div>
    </div>

    <script>
        function openTab(evt, tabName) {
            var i, tabcontent, tablinks;
            tabcontent = document.getElementsByClassName("tabcontent");
            for (i = 0; i < tabcontent.length; i++) {
                tabcontent[i].style.display = "none";
            }
            tablinks = document.getElementsByClassName("tablinks");
            for (i = 0; i < tablinks.length; i++) {
                tablinks[i].className = tablinks[i].className.replace(" active", "");
            }
            document.getElementById(tabName).style.display = "block";
            evt.currentTarget.className += " active";
        }

        document.getElementById('uploadForm').addEventListener('submit', async function(e) {
            e.preventDefault();

            const fileInput = document.getElementById('fileInput');
            const file = fileInput.files[0];

            if (!file) {
                alert('파일을 선택해주세요.');
                return;
            }

            const formData = new FormData();
            formData.append('file', file);

            document.getElementById('loading').style.display = 'block';
            document.getElementById('results').style.display = 'none';

            try {
                const response = await fetch('/upload', {
                    method: 'POST',
                    body: formData
                });

                const result = await response.json();

                if (result.error) {
                    alert('오류: ' + result.error);
                    return;
                }

                document.getElementById('jsonContent').textContent = JSON.stringify(result.structured_json, null, 2);
                document.getElementById('htmlContent').innerHTML = result.html;
                document.getElementById('markdownContent').textContent = result.markdown;
                document.getElementById('metadataContent').textContent = JSON.stringify(result.metadata, null, 2);

                document.getElementById('loading').style.display = 'none';
                document.getElementById('results').style.display = 'block';
            } catch (error) {
                alert('오류가 발생했습니다: ' + error.message);
                document.getElementById('loading').style.display = 'none';
            }
        });
    </script>
</body>
</html>
"""


def html_to_structured_json(html_content):
    """HTML을 구조화된 JSON으로 변환"""
    soup = BeautifulSoup(html_content, 'html.parser')

    result = {
        "title": "",
        "tables": [],
        "paragraphs": [],
        "lists": []
    }

    # 제목 추출
    title = soup.find(['h1', 'h2', 'h3', 'h4'])
    if title:
        result["title"] = title.get_text(strip=True)

    # 테이블 추출
    for table in soup.find_all('table'):
        table_data = {
            "headers": [],
            "rows": []
        }

        thead = table.find('thead')
        if thead:
            for tr in thead.find_all('tr'):
                header_row = []
                for th in tr.find_all('th'):
                    header_row.append({
                        "text": th.get_text(strip=True),
                        "rowspan": int(th.get('rowspan', 1)),
                        "colspan": int(th.get('colspan', 1))
                    })
                table_data["headers"].append(header_row)

        tbody = table.find('tbody')
        if tbody:
            for tr in tbody.find_all('tr'):
                row = []
                for td in tr.find_all(['td', 'th']):
                    cell = {
                        "text": td.get_text(strip=True),
                        "rowspan": int(td.get('rowspan', 1)),
                        "colspan": int(td.get('colspan', 1))
                    }
                    checkbox = td.find('input', {'type': 'checkbox'})
                    if checkbox:
                        cell["has_checkbox"] = True
                    row.append(cell)
                table_data["rows"].append(row)

        result["tables"].append(table_data)

    # 단락 추출
    for p in soup.find_all('p'):
        text = p.get_text(strip=True)
        if text:
            result["paragraphs"].append(text)

    # 리스트 추출
    for ol in soup.find_all(['ol', 'ul']):
        list_items = []
        for li in ol.find_all('li', recursive=False):
            list_items.append(li.get_text(strip=True))
        result["lists"].append({
            "type": "ordered" if ol.name == 'ol' else "unordered",
            "items": list_items
        })

    return result


@app.route('/')
def index():
    return render_template_string(HTML_TEMPLATE)


@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({'error': '파일이 업로드되지 않았습니다.'}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': '파일이 선택되지 않았습니다.'}), 400

    try:
        # 임시 디렉토리 생성
        with tempfile.TemporaryDirectory() as temp_dir:
            # 업로드된 파일 저장
            input_path = Path(temp_dir) / file.filename
            file.save(input_path)

            # 출력 디렉토리
            output_dir = Path(temp_dir) / "output"
            output_dir.mkdir()

            # Chandra 실행
            cmd = [
                "chandra",
                str(input_path),
                str(output_dir),
                "--method", "hf"
            ]

            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=300
            )

            if result.returncode != 0:
                return jsonify({'error': f'처리 중 오류가 발생했습니다: {result.stderr}'}), 500

            # 결과 파일 찾기
            stem = input_path.stem
            result_dir = output_dir / stem

            html_file = result_dir / f"{stem}.html"
            md_file = result_dir / f"{stem}.md"
            metadata_file = result_dir / f"{stem}_metadata.json"

            if not html_file.exists():
                return jsonify({'error': '결과 파일을 찾을 수 없습니다.'}), 500

            # 파일 읽기
            with open(html_file, 'r', encoding='utf-8') as f:
                html_content = f.read()

            with open(md_file, 'r', encoding='utf-8') as f:
                markdown_content = f.read()

            with open(metadata_file, 'r', encoding='utf-8') as f:
                metadata = json.load(f)

            # 구조화된 JSON 생성
            structured_json = html_to_structured_json(html_content)

            return jsonify({
                'html': html_content,
                'markdown': markdown_content,
                'metadata': metadata,
                'structured_json': structured_json
            })

    except subprocess.TimeoutExpired:
        return jsonify({'error': '처리 시간이 초과되었습니다.'}), 500
    except Exception as e:
        return jsonify({'error': str(e)}), 500


if __name__ == '__main__':
    print("=" * 60)
    print("Chandra OCR 웹 데모 시작")
    print("=" * 60)
    print("브라우저에서 http://localhost:5000 으로 접속하세요.")
    print("=" * 60)
    app.run(host='0.0.0.0', port=5000, debug=False)
