package pdfcontent

import (
	"fmt"

	"pdf-wizard/internal/pdfprocessor"
)

// ExtractPDFContent extracts text from a PDF using pdftotext -bbox-layout and
// injects form field markers at their respective positions.
//
// The function:
// 1. Extracts form field positions from the PDF using pdfcpu
// 2. Extracts text with coordinates using pdftotext -bbox
// 3. Detects and flattens multi-column layouts to single-column order
// 4. Injects form field markers at their physical positions
// 5. Outputs formatted markdown
func ExtractPDFContent(pdfPath string) (string, error) {
	// Step 1: Extract form fields
	rawFields, err := pdfprocessor.ExtractFormFields(pdfPath)
	if err != nil {
		return "", fmt.Errorf("extracting form fields: %w", err)
	}

	// Convert to form field positions with annotation labels
	var fields []FormFieldPos
	for i, rf := range rawFields {
		fields = append(fields, FormFieldPos{
			Name:   rf.Name,
			PosX:   rf.PosX,
			PosY:   rf.PosY,
			Width:  rf.Width,
			Height: rf.Height,
			Page:   rf.Page,
			Label:  fmt.Sprintf(`[field idx="%d" type="%s"]`, i, rf.Type),
		})
	}

	// Step 2: Extract text with coordinates using pdftotext
	pages, err := ExtractText(pdfPath)
	if err != nil {
		return "", fmt.Errorf("extracting text: %w", err)
	}

	if len(pages) == 0 {
		return "", fmt.Errorf("no text content found in PDF")
	}

	// Step 3: Convert field coordinates from PDF user space (Y from bottom)
	// to pdftotext screen space (Y from top). Use center Y for both fields
	// and text items so they align properly.
	for pi := range pages {
		pageHeight := pages[pi].Height
		for fi := range fields {
			if fields[fi].Page == pi+1 {
				// pdfcpu PosY is bottom-left in PDF coords; center = PosY + Height/2
				// screen center Y = pageHeight - pdfCenterY
				fields[fi].PosY = pageHeight - fields[fi].PosY - fields[fi].Height/2
			}
		}
	}

	// Step 4: Convert to markdown with field markers injected
	markdown := ToMarkdown(pages, fields)

	return markdown, nil
}