import { Langfuse } from 'langfuse';

async function testDataset() {
    // Langfuse 클라이언트 초기화
    const langfuse = new Langfuse({
        publicKey: 'pk-lf-cd5256f7-a511-4dd5-bfca-c317aa60576c',
        secretKey: 'sk-lf-7cc24a7f-f4a7-4c00-bfa5-df8ee7032777',
        baseUrl: 'http://localhost:3002',
        flushAt: 1,
        debug: true
    });

    try {
        // 데이터셋 생성
        const datasetName = 'code-assistance-dataset';
        const dataset = await langfuse.createDataset({
            name: datasetName,
            description: 'Dataset for testing code assistance prompts'
        });
        console.log('Dataset created:', dataset);

        // 테스트 케이스 정의
        const testCases = [
            {
                id: 'test-case-1',
                input: "How can I fix a memory leak in my Node.js application?",
                expectedOutput: "To fix a memory leak in Node.js, follow these steps:\n1. Use Chrome DevTools to take heap snapshots\n2. Monitor memory usage with process.memoryUsage()\n3. Check for event listeners and remove them when not needed\n4. Close database connections properly\n5. Use WeakMap or WeakSet for references"
            },
            {
                id: 'test-case-2',
                input: "What's the difference between Promise.all and Promise.allSettled?",
                expectedOutput: "Promise.all and Promise.allSettled handle multiple promises differently:\n1. Promise.all:\n- Returns when all promises resolve\n- Rejects immediately if any promise rejects\n2. Promise.allSettled:\n- Always returns when all promises complete\n- Returns status and value/reason for each promise"
            },
            {
                id: 'test-case-3',
                input: "How do I implement error handling in async/await?",
                expectedOutput: "To implement error handling in async/await:\n1. Use try/catch blocks\n2. The catch block handles any errors or rejections\n3. Can add finally block for cleanup\n4. Can create error handling middleware for Express"
            }
        ];

        // 데이터셋에 아이템 추가
        for (const testCase of testCases) {
            const item = await langfuse.createDatasetItem({
                datasetName: datasetName,
                input: testCase.input,
                expectedOutput: testCase.expectedOutput,
                metadata: {
                    testCaseId: testCase.id
                }
            });
            console.log(`Added item to dataset: ${testCase.id}`, item);
        }

        // Dataset 평가를 위한 트레이스
        const trace = await langfuse.trace({
            name: 'Dataset Evaluation',
            tags: ['dataset-evaluation'],
            metadata: {
                datasetName: datasetName,
                totalCases: testCases.length
            }
        });

        // 각 테스트 케이스 실행
        for (const testCase of testCases) {
            console.log(`Testing case: ${testCase.id}`);
            
            // LLM 호출 시뮬레이션
            const generation = await trace.generation({
                name: `code-assistance-${testCase.id}`,
                model: 'gpt-4',
                modelParameters: {
                    temperature: 0.7,
                    max_tokens: 1000
                },
                input: testCase.input,
                output: testCase.expectedOutput,
                metadata: {
                    datasetName: datasetName,
                    testCaseId: testCase.id
                }
            });

            // 응답 평가
            const similarity = 0.92;
            await trace.score({
                name: 'response-quality',
                value: similarity,
                metadata: {
                    metric: 'similarity',
                    testCaseId: testCase.id,
                    datasetName: datasetName
                }
            });

            console.log(`Completed test case: ${testCase.id} with score: ${similarity}`);
        }

        // 전체 평가 결과
        await trace.score({
            name: 'overall-dataset-quality',
            value: 0.92,
            metadata: {
                metric: 'average-similarity',
                datasetName: datasetName,
                totalCases: testCases.length
            }
        });

        console.log('Dataset evaluation completed');

        // Langfuse 데이터 전송
        await langfuse.flush();

    } catch (error) {
        console.error('Error:', error);
    } finally {
        // Langfuse 연결 종료
        await langfuse.shutdownAsync();
    }
}

testDataset().catch(console.error); 