package pdfcontent

import (
	"math"
	"regexp"
	"slices"
	"sort"
	"strings"
)

var orderedListRe = regexp.MustCompile(`^[0-9a-zA-Z]\.$`)
var parenNumberRe = regexp.MustCompile(`^\([0-9]+\)$`)

func isList(text string) bool {
	return orderedListRe.MatchString(text) || parenNumberRe.MatchString(text)
}

func isBulletChar(s string) bool {
	// Common bullet/list-marker characters from PDFs
	for _, c := range []string{"•", "‣", "⁃", "◦", "●", "▪", "▫", "-", "–", "—", "✦", "✧", "✩", "✪", "★", "☆", "¤", "§", "¶"} {
		if s == c {
			return true
		}
	}
	// Symbol font bullets (U+E000–U+F8FF private use range, commonly used by pdftotext)
	if len(s) == 1 {
		r := []rune(s)[0]
		if r >= 0xE000 && r <= 0xF8FF {
			return true
		}
	}
	// Multi-byte private-use / CJK extension characters that are single-grapheme bullets
	if len([]rune(s)) == 1 && len(s) >= 3 {
		r := []rune(s)[0]
		if (r >= 0xE000 && r <= 0xF8FF) || (r >= 0xF0000 && r <= 0xFFFFD) {
			return true
		}
	}
	return false
}

type FormatRule func(items []TextItem, index int, sb *strings.Builder) int

var FormatRules = []FormatRule{
	lineBreakFormatRule,
	removeFieldLinesFormatRule,
	bulletListFormatRule,
	headingFormatRule,
	boldFormatRule,
	inlineTextFormatRule,
}

// lineBreakFormatRule handles adding a line break after paragraph blocks
func lineBreakFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	item := items[index]

	// Y=-1 separator between columns — produce blank line
	if item.Y < 0 {
		sb.WriteString("\n\n")
		return 1
	}

	if index > 0 {
		lastItem := items[index-1]

		// Previous item was a Y=-1 column separator — ensure gap
		if lastItem.Y < 0 {
			return 0
		}

		if lastItem.Page != item.Page {
			// Look back to find the last item on the previous page with
			// the highest OrigY (bottom of content on that page)
			prevPage := lastItem.Page
			lastOrigY := lastItem.OrigY
			for j := index - 1; j >= 0; j-- {
				if items[j].Page != prevPage {
					break
				}
				if items[j].OrigY > lastOrigY {
					lastOrigY = items[j].OrigY
				}
			}
			// If trailing blank space > 25% of page, insert double break
			pageHeight := lastItem.PageHeight
			if pageHeight > 0 && lastOrigY < pageHeight*0.75 {
				sb.WriteString("\n\n---\n\n")
			} else {
				sb.WriteString("\n\n")
			}
			return 0
		}

		origYDiff := item.OrigY - lastItem.OrigY
		flatYDiff := math.Abs(item.Y - lastItem.Y)
		threshold := item.FontSize

		// Paragraph break: large flattened Y gap, or large OrigY gap
		// (but not when either item is a form field, since form fields
		// can have different OrigY while sharing the same flattened Y).
		if flatYDiff > threshold*2 || (origYDiff > threshold*2 && !item.FormField && !lastItem.FormField) {
			sb.WriteString("\n\n")
			return 0
		}

		// Line break: items on different flattened lines, but only when
		// a form field is involved or text items are far apart in OrigY.
		// Adjacent text items close in OrigY stay on the same line.
		if flatYDiff > threshold && (item.FormField || lastItem.FormField || origYDiff > 12) {
			sb.WriteString("\n")
			return 0
		}

		// Same flattened line but different original Y — items from different
		// PDF lines merged onto one flattened line (e.g., after column
		// flattening or block merging). Separate them so bullet chars,
		// wrapped text, etc. land on the correct output line.
		// EXCEPTION: form fields should NOT trigger line breaks here.
		// Form fields share the flattened Y of their matched text but may
		// have different OrigY — they should stay on the same output line.
		// EXCEPTION: bullets and list markers should NOT be separated from
		// the preceding text by a line break — they anchor to the start of
		// their own bullet text, and the preceding text belongs to a different bullet.
		// EXCEPTION: short text items that are close in original X should NOT
		// be broken apart — they're likely part of the same PDF line with
		// slight Y drift (e.g., "of business" next to "{name").
		if flatYDiff < threshold && origYDiff > math.Max(threshold*0.4, 3) && !item.FormField && !lastItem.FormField {
			if !isBulletChar(item.Text) && !isList(item.Text) {
				shortItem := len(item.Text) <= 10
				shortLast := len(lastItem.Text) <= 10
				xClose := math.Abs(item.OrigX-lastItem.OrigX) < 100
				if !(shortItem && shortLast && xClose) {
					sb.WriteString("\n")
					return 0
				}
			}
		}
	}

	if isList(item.Text) {
		return 0
	}
	return 0
}

func inlineTextFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	sb.WriteString(items[index].Text + " ")
	return 1
}

func bulletListFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	cItem := items[index]
	cItemText := strings.TrimSpace(cItem.Text)

	// Consume bullet characters — write markdown bullet prefix
	if isBulletChar(cItemText) {
		indentX := math.Floor(cItem.OrigX - 72)
		if indentX <= 0 {
			sb.WriteString("- ")
		} else {
			spaces := int(math.Round(indentX/18.0)) * 2
			if spaces > 12 {
				spaces = 12
			}
			sb.WriteString(strings.Repeat(" ", spaces) + "- ")
		}
		return 1
	}

	// Consume ordered list markers (a., b., 1., 2., etc.) — write marker prefix
	if orderedListRe.MatchString(cItemText) {
		indentX := math.Floor(cItem.OrigX - 72)
		if indentX <= 0 {
			sb.WriteString(cItemText + " ")
		} else {
			spaces := int(math.Round(indentX/18.0)) * 2
			if spaces > 12 {
				spaces = 12
			}
			sb.WriteString(strings.Repeat(" ", spaces) + cItemText + " ")
		}
		return 1
	}

	// Parenthesized numbers like (1), (2) — write indent prefix, keep marker in text
	if parenNumberRe.MatchString(cItemText) {
		indentX := math.Floor(cItem.OrigX - 72)
		if indentX > 0 {
			spaces := int(math.Round(indentX/18.0)) * 2
			if spaces > 12 {
				spaces = 12
			}
			sb.WriteString(strings.Repeat(" ", spaces))
		}
		return 0
	}

	return 0
}

func headingFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	item := items[index]
	itemHeadingLevel := getTextItemHeading(item)
	if itemHeadingLevel == 0 {
		return 0
	}

	if index > 0 {
		lastItem := items[index-1]
		lastItemHeadingLevel := getTextItemHeading(lastItem)

		if lastItemHeadingLevel == itemHeadingLevel && lastItem.Y == item.Y && lastItem.Page == item.Page {
			sb.WriteString(item.Text + " ")
			return 1
		}
	}

	sb.WriteString(strings.Repeat("#", itemHeadingLevel) + " " + item.Text + " ")
	return 1
}

func getTextItemHeading(item TextItem) int {
	if item.FontSize > 18.0 {
		return 1
	}
	if item.FontSize > 16.0 {
		return 2
	}
	return 0
}

const boldFontSize = 15.0

func boldFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	item := items[index]
	if item.FontSize <= boldFontSize {
		return 0
	}

	// Look ahead to find all consecutive bold items on the same line/page
	end := index + 1
	for end < len(items) && end <= index+100 {
		next := items[end]
		if next.FontSize <= boldFontSize {
			break
		}
		if next.Page != item.Page || next.Y != item.Y {
			break
		}
		end++
	}

	// Group all bold text into a single **...** block
	var parts []string
	for i := index; i < end; i++ {
		parts = append(parts, items[i].Text)
	}
	sb.WriteString("**" + strings.Join(parts, " ") + "** ")

	return end - index
}

func removeFieldLinesFormatRule(items []TextItem, index int, sb *strings.Builder) int {
	itemText := strings.TrimSpace(items[index].Text)
	if strings.Count(itemText, "_") > 2 || strings.Count(itemText, "_.") > 0 {
		return 1
	}
	return 0
}

// FormFieldPos represents a form field's position on a page for markdown injection.
type FormFieldPos struct {
	Name   string  // PDF field name
	PosX   float64 // X position (bottom-left of field rect)
	PosY   float64 // Y position (bottom-left of field rect)
	Width  float64
	Height float64
	Page   int
	Label  string // Annotation label like "+AA"
}

// ToMarkdown converts pdftotext page data and form fields into markdown.
//
// Pipeline:
//  1. Convert blocks/lines/words to TextItems (with OrigY preserved)
//  2. Flatten columns using pre-grouped lines from blocks
//  3. Match form fields to nearest text item by original Y position
//  4. Inject form fields at their computed flattened Y positions
func ToMarkdown(pages []Page, fields []FormFieldPos) string {
	// Group fields by page (1-based)
	pageFields := make(map[int][]FormFieldPos)
	for _, f := range fields {
		pageFields[f.Page] = append(pageFields[f.Page], f)
	}

	// Compute average font size across all pages for heading detection
	sum := 0.0
	count := 0
	for _, page := range pages {
		for _, flow := range page.Flows {
			for _, block := range flow.Blocks {
				for _, line := range block.Lines {
					for _, w := range line.Words {
						sum += w.YMax - w.YMin
						count++
					}
				}
			}
		}
	}
	bodyFontSize := 12.0
	if count > 0 {
		bodyFontSize = sum / float64(count)
	}

	var sb strings.Builder
	allItems := make([]TextItem, 0)

	for pi, page := range pages {
		// Convert blocks to grouped lines preserving block boundaries
		pageItems, blockLines := blocksToGroupedLines(page, pi+1, 50, 90)
		avgFontSize := averageFontSize(pageItems)

		// Merge narrow blocks with the same column layout and split
		// into left/right columns. Wide blocks are flattened normally.
		merged := mergeSameColumnBlocks(blockLines, page.Width)
		var flattened []TextItem
		lineHeight := math.Ceil(avgFontSize) + 4
		flatLine := 0
		// Pre-compute last OrigY of each block for gap detection
		lastOrigYs := make([]float64, len(merged))
		for bi, m := range merged {
			if len(m.Lines) > 0 && len(m.Lines[len(m.Lines)-1]) > 0 {
				lastOrigYs[bi] = m.Lines[len(m.Lines)-1][len(m.Lines[len(m.Lines)-1])-1].OrigY
			}
		}
		// Pre-compute first OrigY of each block
		firstOrigYs := make([]float64, len(merged))
		for bi, m := range merged {
			if len(m.Lines) > 0 && len(m.Lines[0]) > 0 {
				firstOrigYs[bi] = m.Lines[0][0].OrigY
			}
		}

		for bi, m := range merged {
			isMC := isMultiColumnBlock(m)
			if isMC {
				// Separate multi-column block from preceding content
				if len(flattened) > 0 {
					flattened = append(flattened, TextItem{Text: "", Y: -1, Page: pi + 1, PageHeight: page.Height})
				}
				// Multi-column merged block: split into N columns
				columns := splitBlockColumns(m)
				if len(columns) < 2 {
					// Not truly multi-column (wide lines rejected) — fall through
					f := FlattenLines(m.Lines, page.Width, avgFontSize)
					for _, it := range f {
						it.Y += float64(flatLine) * lineHeight
						flattened = append(flattened, it)
					}
					flatLine += len(m.Lines)
					if bi == len(merged)-1 || (firstOrigYs[bi+1]-lastOrigYs[bi] > lineHeight) {
						flatLine++
					}
				} else {
					for ci, colLines := range columns {
						if ci > 0 {
							flattened = append(flattened, TextItem{Text: "", Y: -1, Page: pi + 1, PageHeight: page.Height})
						}
						for _, line := range colLines {
							flattened = append(flattened, assignLinePositions(line, float64(flatLine)*lineHeight)...)
							flatLine++
						}
					}
					flatLine++ // paragraph gap after column block
					// Separate from following blocks
					flattened = append(flattened, TextItem{Text: "", Y: -1, Page: pi + 1, PageHeight: page.Height})
				}
			} else {
				// Normal block: flatten normally
				f := FlattenLines(m.Lines, page.Width, avgFontSize)
				for _, it := range f {
					it.Y += float64(flatLine) * lineHeight
					flattened = append(flattened, it)
				}
				flatLine += len(m.Lines)
				// Add paragraph gap (+1) only if this is the last block or
				// the next block has a significant OrigY gap (not a continuation)
				if bi == len(merged)-1 || (firstOrigYs[bi+1]-lastOrigYs[bi] > lineHeight) {
					flatLine++
				}
			}
		}

		// Build index of text items for form field matching
		posIndex := buildPositionIndex(flattened)

		// Inject form fields matched to nearest text item by original X+Y
		fieldsOnPage := pageFields[pi+1]
	for _, f := range fieldsOnPage {
		flattenedY := matchFormFieldToText(f.PosX, f.PosY, posIndex, bodyFontSize)
		// Checkbox form fields (small, roughly square) have their PDF X-position
		// at the left edge of the checkbox, which often aligns with the "(" character
		// in "( ) Label" patterns. Due to coordinate system differences between pdfcpu
		// and pdftotext, the checkbox X may be slightly less than "("'s X, causing it
		// to sort before "(" in mergeByY. Offset checkbox OrigX right by 2pt to ensure
		// it appears inside the parentheses.
		origX := f.PosX
		if f.Width <= 25 && f.Height <= 25 {
			origX += 2
		}
		flattened = append(flattened, TextItem{
				Text:       f.Label,
				X:          math.Floor(f.PosX),
				Y:          flattenedY,
				XMax:       math.Floor(f.PosX + f.Width),
				YMax:       flattenedY + (f.Height / 2),
				OrigX:      origX,
				OrigY:      f.PosY,
				FontSize:   bodyFontSize,
				Page:       f.Page,
				PageHeight: page.Height,
				FormField:  true,
			})
		}

		// Merge form fields into their correct Y positions within each
		// column block, preserving Y=-1 separators between columns.
		flattened = mergeFormFields(flattened, bodyFontSize)

		allItems = slices.DeleteFunc(append(allItems, flattened...), func(item TextItem) bool {
			// Keep Y=-1 column separators; remove other Page=0 items
			return item.Page == 0 && item.Y >= 0
		})

	}

	writeMarkdownItems(&sb, allItems, bodyFontSize)
	return sb.String()
}

// buildPositionIndex creates a slice of text items with original and flattened
// coordinates, for form field placement after column flattening.
func buildPositionIndex(items []TextItem) []TextItem {
	var idx []TextItem
	for _, item := range items {
		if item.FormField || item.Text == "" {
			continue
		}
		idx = append(idx, item)
	}
	return idx
}

// matchFormFieldToText finds the best flattened Y position for a form field
// by matching its original X+Y to the nearest text item's original X+Y.
// Uses column detection based on X-position gaps to match within same column.
// Only matches within maxMatchDist Y distance to prevent cross-block matching.
// When the match is found but the Y distance exceeds inlineDist, the field
// is placed on the next flattened line (closestY + lineHeight) rather than
// inline with the text.
func matchFormFieldToText(fieldOrigX, fieldOrigY float64, idx []TextItem, bodyFontSize float64) float64 {
	bestY := math.Floor(fieldOrigY)
	if len(idx) == 0 {
		return bestY
	}

	// Use all items as candidates; rely on maxOrigYRange + yTolerance
	// to prevent matching to distant page sections.
	candidates := idx

	// maxMatchDist: only match to text within ~2.5 line heights for inline
	// placement. Prevents form fields from matching to text in different
	// block groups (e.g., signature checkbox matching a full-width paragraph).
	const maxMatchDist = 30.0
	// inlineDist: maximum Y distance for truly inline placement. Fields
	// farther than this are placed on the next flattened line below the
	// matched text, rather than sharing the same line.
	const inlineDist = 15.0

	lineHeight := math.Ceil(bodyFontSize) + 4
	// maxInlineX: maximum X distance for inline placement. Fields far from
	// their matched text in X (e.g., state checkbox at right edge matching
	// city text at left edge) should be placed on the next flattened line.
	const maxInlineX = 100.0

	// Two-pass matching: first find the best Y-distance, then among all
	// candidates within a Y-tolerance, prefer the one with minimum X-distance.
	// This prevents a checkbox at X=310 from matching text at X=200 on the
	// same line (due to tiny FontSize-driven Y-center differences) when a
	// perfect X-match like "(" at X=310 exists at nearly the same Y.
	const yTolerance = 3.0
	// maxOrigYRange: limit candidate text to within this OrigY range of the
	// form field. This prevents matching to text from distant page sections
	// (e.g., instructions) that happen to share a similar Y-coordinate.
	const maxOrigYRange = 30.0

	bestDist := math.MaxFloat64
	for _, item := range candidates {
		// Skip mixed underscore+text words (e.g., "name}___,") that have
		// significant underscore content. Pure underscore-only words are
		// kept as valid candidates — they serve as Y-position anchors for
		// form fields on form underline lines.
		uc := strings.Count(item.Text, "_")
		if uc > 2 && len(item.Text) > uc {
			continue
		}
		if math.Abs(fieldOrigY-item.OrigY) > maxOrigYRange {
			continue
		}
		textCenterY := item.OrigY + item.FontSize/2
		yDist := math.Abs(fieldOrigY - textCenterY)
		if yDist < bestDist {
			bestDist = yDist
		}
	}

	closestY := math.Floor(fieldOrigY) // fallback: closest text item's flattened Y
	closestOrigX := math.MaxFloat64
	for _, item := range candidates {
		uc := strings.Count(item.Text, "_")
		if uc > 2 && len(item.Text) > uc {
			continue
		}
		if math.Abs(fieldOrigY-item.OrigY) > maxOrigYRange {
			continue
		}
		textCenterY := item.OrigY + item.FontSize/2
		yDist := math.Abs(fieldOrigY - textCenterY)
		if yDist > bestDist+yTolerance {
			continue
		}
		xDist := math.Abs(fieldOrigX - item.OrigX)
		if xDist < math.Abs(fieldOrigX-closestOrigX) {
			closestY = item.Y
			closestOrigX = item.OrigX
		}
	}

	if bestDist > maxMatchDist {
		// No text within inline range — use closest text item's flattened Y
		// to keep the form field within the flattened coordinate space.
		// mergeFormFields will place it on its own line (no text at that Y).
		return closestY
	}

	if bestDist > inlineDist {
		// Match found but too far for inline — only bump to next flattened
		// line if the field is BELOW the matched text. Fields positioned
		// ABOVE the text (e.g., checkbox before column header) stay inline.
		closestBaseline := math.MaxFloat64
		for _, item := range candidates {
			textCenterY := item.OrigY + item.FontSize/2
			yDist := math.Abs(fieldOrigY - textCenterY)
			if yDist == bestDist {
				closestBaseline = item.OrigY
				break
			}
		}
		if fieldOrigY > closestBaseline {
			// Field is below the text baseline — bump to next line
			return closestY + lineHeight
		}
	}

	if math.Abs(fieldOrigX-closestOrigX) > maxInlineX {
		// Match found but too far for inline X — bump to next flattened
		// line. This handles cases where form fields far to the right
		// (e.g., state checkbox) match text far to the left (e.g., city label)
		// because they share the same Y.
		return closestY + lineHeight
	}

	return closestY
}

// repositionBullets moves bullet/list-marker items that are stranded at the
// end of one flattened line to the start of the next flattened line, so they
// appear before their associated text rather than after the previous item's
// wrapped text. Bullets and their text sometimes land on different flattened
// lines (due to Y-offsets in pdftotext extraction or column flattening).
func repositionBullets(items []TextItem, bodyFontSize float64) []TextItem {
	items = append([]TextItem(nil), items...) // copy
	threshold := bodyFontSize

	for i := 0; i < len(items); i++ {
		cur := items[i]
		if !isBulletChar(cur.Text) && !isList(cur.Text) {
			continue
		}

		// Find previous non-separator text item
		prevIdx := i - 1
		for prevIdx >= 0 && items[prevIdx].Y < 0 {
			prevIdx--
		}
		if prevIdx < 0 {
			continue // first item on page
		}
		prev := items[prevIdx]

		// Find next text item (bullet's associated text)
		nextIdx := i + 1
		for nextIdx < len(items) && items[nextIdx].Y < 0 {
			nextIdx++
		}
		if nextIdx >= len(items) {
			continue // no text after bullet
		}
		next := items[nextIdx]

		// Bullet is on same flat line as previous text, but on a different
		// flat line from its own text. This means the bullet got stranded
		// at the end of the previous item's wrapped text. Move it to the
		// next flat line where its text lives.
		sameLineAsPrev := math.Abs(cur.Y-prev.Y) < threshold
		differentLineFromNext := math.Abs(cur.Y-next.Y) >= threshold
		origYMatchesNext := math.Abs(next.OrigY-cur.OrigY) <= threshold*2

		if sameLineAsPrev && differentLineFromNext && origYMatchesNext {
			// Rebuild: items[:i] + items[i+1:nextIdx] + [bullet] + items[nextIdx:]
			bullet := cur
			bullet.Y = next.Y // update Y to match text's line
			items = append(items[:i], append(items[i+1:nextIdx], append([]TextItem{bullet}, items[nextIdx:]...)...)...)
			// nextIdx shifts by 0 (removed 1, added 1)
		}
	}
	return items
}

// fixOrphanedBullets post-processes the rendered markdown to handle bullet
// markers that got separated from their associated text. Common causes:
// bullets at the end of a text block after column flattening, or bullets
// that appear inline at the end of a wrapped text line.
//
// Strategy:
//  1. Remove orphan bullets (standalone "-" lines and trailing "-" at end of text lines)
//  2. Prepend collected bullets to paragraphs in bullet sections that need them
func fixOrphanedBullets(md string) string {
	lines := strings.Split(md, "\n")

	var cleaned []string
	orphanCount := 0

	// Phase 1: collect orphan bullets and clean lines
	for _, line := range lines {
		t := strings.TrimSpace(line)

		// Standalone orphan bullet line (just whitespace + "-")
		if t == "-" || t == "- " {
			orphanCount++
			continue
		}

		// Trailing bullet at end of line: "text...     -"
		// Requires at least 2 spaces before the dash to distinguish from
		// inline dashes like "or - and"
		stripped := strings.TrimRight(line, " \t\r")
		if strings.HasSuffix(stripped, "-") && len(stripped) > 4 {
			before := stripped[:len(stripped)-1]
			if strings.HasSuffix(before, "  ") {
				beforeDash := strings.TrimRight(before, " \t")
				if beforeDash != "" && !strings.HasSuffix(beforeDash, "-") {
					orphanCount++
					cleaned = append(cleaned, beforeDash)
					continue
				}
			}
		}

		cleaned = append(cleaned, line)
	}

	if orphanCount == 0 {
		return md
	}

	// Phase 2: prepend orphan bullets to paragraphs that need them
	result := make([]string, 0, len(cleaned))
	orphanIdx := 0
	lastNonEmptyWasBullet := false
	seenFirstBullet := false // only add orphans AFTER seeing first "- " line

	// continuationMaxLen: after a bullet line, short lines are wrapped
	// continuations of that bullet. Longer lines are likely new bullet items.
	const continuationMaxLen = 20

	for _, line := range cleaned {
		t := strings.TrimSpace(line)

		if t == "" {
			result = append(result, line)
			continue
		}

		if strings.HasPrefix(t, "- ") || t == "-" {
			lastNonEmptyWasBullet = true
			seenFirstBullet = true
			result = append(result, line)
			continue
		}

		if !seenFirstBullet {
			// Haven't entered a bullet section yet — don't add bullets
			result = append(result, line)
			continue
		}

		if lastNonEmptyWasBullet {
			if len(t) < continuationMaxLen {
				// Short line — likely a wrapped continuation of the bullet
				lastNonEmptyWasBullet = false
				result = append(result, line)
			} else if orphanIdx < orphanCount {
				// Long line — likely a new bullet item
				orphanIdx++
				indent := ""
				for _, c := range line {
					if c == ' ' {
						indent += " "
					} else {
						break
					}
				}
				result = append(result, indent+"- "+t)
				lastNonEmptyWasBullet = true
			} else {
				result = append(result, line)
			}
		} else if orphanIdx < orphanCount {
			// New bullet item after a continuation line
			orphanIdx++
			indent := ""
			for _, c := range line {
				if c == ' ' {
					indent += " "
				} else {
					break
				}
			}
			result = append(result, indent+"- "+t)
			lastNonEmptyWasBullet = true
		} else {
			result = append(result, line)
		}
	}

	return strings.Join(result, "\n")
}

// writeMarkdownItems converts text items to markdown.
// Items are expected to be in correct line order (from FlattenColumns).
// Within each line, items are sorted by X for proper left-to-right text.
// The Y=-1 separator between columns is treated as a blank line.
func writeMarkdownItems(sb *strings.Builder, items []TextItem, bodyFontSize float64) {
	if len(items) == 0 {
		return
	}

	items = repositionBullets(items, bodyFontSize)

	for i := 0; i < len(items); {
		consumed := false
		for _, rule := range FormatRules {
			n := rule(items, i, sb)
			if n > 0 {
				i += n
				consumed = true
				break
			}
		}
		if !consumed {
			i++
		}
	}

	// Post-process: fix orphaned bullets that couldn't be repositioned
	md := sb.String()
	md = fixOrphanedBullets(md)
	sb.Reset()
	sb.WriteString(md)
}

// splitColumnBlocks splits items at Y=-1 separators into column blocks.
func splitColumnBlocks(items []TextItem) [][]TextItem {
	var blocks [][]TextItem
	var current []TextItem
	for _, item := range items {
		if item.Y < 0 {
			if len(current) > 0 {
				blocks = append(blocks, current)
				current = nil
			}
		} else {
			current = append(current, item)
		}
	}
	if len(current) > 0 {
		blocks = append(blocks, current)
	}
	return blocks
}

// mergeFormFields inserts form field items (marked FormField=true, appended
// at the end) into their correct Y positions within each column block,
// preserving Y=-1 separators.
func mergeFormFields(items []TextItem, fontSize float64) []TextItem {
	// Separate form fields from text items
	var formFields []TextItem
	var textItems []TextItem
	for _, item := range items {
		if item.FormField {
			formFields = append(formFields, item)
		} else {
			textItems = append(textItems, item)
		}
	}
	if len(formFields) == 0 {
		return items
	}

	// Split text items into column blocks at Y=-1 separators
	type blockInfo struct {
		items []TextItem
		isSep bool
	}
	var blocks []blockInfo
	var current []TextItem
	for _, item := range textItems {
		if item.Y < 0 {
			blocks = append(blocks, blockInfo{items: current, isSep: false})
			blocks = append(blocks, blockInfo{isSep: true})
			current = nil
		} else {
			current = append(current, item)
		}
	}
	blocks = append(blocks, blockInfo{items: current, isSep: false})

	// Compute Y ranges, OrigX ranges, and column split boundaries for each block
	type blockMeta struct {
		yMin        float64
		yMax        float64
		origYMin    float64 // original Y range, unchanged by column splitting
		origYMax    float64
		xMin        float64
		xMax        float64
		lineYs      []float64
		rightSplitX float64 // fields >= rightSplitX should go to the next column
	}
	var metas []blockMeta
	for bi, b := range blocks {
		if b.isSep {
			metas = append(metas, blockMeta{})
			continue
		}
		if len(b.items) == 0 {
			metas = append(metas, blockMeta{yMin: math.MaxFloat64, yMax: -math.MaxFloat64, origYMin: math.MaxFloat64, origYMax: -math.MaxFloat64})
			continue
		}
		yMin, yMax := b.items[0].Y, b.items[0].Y
		origYMin, origYMax := b.items[0].OrigY, b.items[0].OrigY
		xMin, xMax := b.items[0].OrigX, b.items[0].OrigX
		ySet := make(map[float64]bool)
		for _, it := range b.items {
			if it.Y < yMin {
				yMin = it.Y
			}
			if it.Y > yMax {
				yMax = it.Y
			}
			if it.OrigY < origYMin {
				origYMin = it.OrigY
			}
			if it.OrigY > origYMax {
				origYMax = it.OrigY
			}
			if it.OrigX < xMin {
				xMin = it.OrigX
			}
			if it.OrigX > xMax {
				xMax = it.OrigX
			}
			ySet[it.Y] = true
		}
		var lineYs []float64
		for y := range ySet {
			lineYs = append(lineYs, y)
		}

		// Infer column split boundaries from adjacent separator blocks.
		// When two column blocks are separated by Y=-1, use the adjacent
		// block's edge as the split boundary. This prevents overlap where
		// left column text extends into right column territory.
		// Only apply to LEFT column blocks (xMax < next block's xMin).
		rightSplitX := 0.0 // fields >= rightSplitX should go to the next block
		if bi+1 < len(blocks) && blocks[bi+1].isSep && bi+2 < len(blocks) {
			next := blocks[bi+2]
			if !next.isSep && len(next.items) > 0 {
				nextXMin := next.items[0].OrigX
				for _, it := range next.items {
					if it.OrigX < nextXMin {
						nextXMin = it.OrigX
					}
				}
				// Only apply rightSplitX to left column block
				if nextXMin > xMax {
					rightSplitX = nextXMin
				}
			}
		}

		// Cap effective xMax at right split boundary
		effectiveXMax := xMax
		if rightSplitX > 0 && rightSplitX < xMax {
			effectiveXMax = rightSplitX
		}

		metas = append(metas, blockMeta{yMin: yMin, yMax: yMax, origYMin: origYMin, origYMax: origYMax, xMin: xMin, xMax: effectiveXMax, lineYs: lineYs, rightSplitX: rightSplitX})
	}

	// Assign each form field to the best block, and remap its flatY
	// to match the nearest text item's flatY within that block.
	// This is necessary because form field matching happens before column
	// splitting, so the field's flatY may not match any text line in the
	// correct column block after splitting.

	for _, ff := range formFields {
		bestIdx := -1
		bestScore := -math.MaxFloat64

		for bi, m := range metas {
			if blocks[bi].isSep {
				continue
			}

			// Y distance: use OrigY (unchanged by column splitting)
			// instead of flatY, so fields match the correct column block.
			yDist := 0.0
			if ff.OrigY < m.origYMin {
				yDist = m.origYMin - ff.OrigY
			} else if ff.OrigY > m.origYMax {
				yDist = ff.OrigY - m.origYMax
			}

			// Hard Y-distance cutoff: skip blocks that are too far away.
			// This prevents narrow column blocks from absorbing fields
			// that belong to adjacent full-width blocks.
			if yDist > 30 {
				continue
			}

			// Column split enforcement: if this block has a right split
			// boundary and the form field is on or past it, hard-reject.
			// This prevents left-column blocks from absorbing right-column
			// form fields when text extends across the column gap.
			if m.rightSplitX > 0 && ff.OrigX >= m.rightSplitX {
				continue
			}

			// X distance: penalize if outside block's X range (using OrigX)
			xDist := 0.0
			if ff.OrigX < m.xMin {
				xDist = m.xMin - ff.OrigX
			} else if ff.OrigX > m.xMax {
				xDist = ff.OrigX - m.xMax
			}

			// X boundary distance: prefer block whose left edge (xMin) is
			// closest to the field's X. This correctly assigns form fields
			// to their column.
			xMinDist := math.Abs(ff.OrigX - m.xMin)

			// Prefer narrow blocks (column blocks) when X fits
			blockWidth := m.xMax - m.xMin
			narrowBonus := 0.0
			if xDist == 0 && blockWidth < 300 {
				narrowBonus = 100
			}

			score := -xMinDist + narrowBonus - yDist*10 - xDist
			if score > bestScore {
				bestScore = score
				bestIdx = bi
			}
		}

		if bestIdx >= 0 && !blocks[bestIdx].isSep {
			// Remap form field's flatY to the nearest text item's flatY
			// within this block. First, check if the field's matched flatY
			// already has text items at that Y — if so, keep it. Only remap
			// if the field came from a different column block (its flatY
			// doesn't match any text in this block).
			existingFlatY := ff.Y
			hasMatchAtY := false
			for _, item := range blocks[bestIdx].items {
				if math.Abs(item.Y-existingFlatY) < fontSize*0.5 {
					hasMatchAtY = true
					break
				}
			}
			if !hasMatchAtY {
				// Remap: use raw OrigY distance for matching.
				// Text center Y (OrigY + fontSize/2) can misplace fields that
				// sit between two text lines (e.g., email field between line 1
				// and line 2 of a multi-line block) — center-based matching
				// pulls them toward the line with the closer center, but the
				// field often belongs to the lower line. Raw OrigY distance
				// correctly snaps inter-line fields to the physically nearer line.
				bestFlatY := existingFlatY
				bestRawDist := math.MaxFloat64
				for _, item := range blocks[bestIdx].items {
					rawDist := math.Abs(ff.OrigY - item.OrigY)
					if rawDist < bestRawDist {
						bestRawDist = rawDist
						bestFlatY = item.Y
					}
				}
				ff.Y = bestFlatY
			}
			blocks[bestIdx].items = mergeByY(blocks[bestIdx].items, []TextItem{ff}, fontSize)
		}
	}

	// Reconstruct
	var result []TextItem
	for bi, b := range blocks {
		if b.isSep {
			// Infer page from adjacent block items
			pg := 0
			if bi > 0 && len(blocks[bi-1].items) > 0 {
				pg = blocks[bi-1].items[0].Page
			}
			result = append(result, TextItem{Text: "", Y: -1, Page: pg})
		} else {
			result = append(result, b.items...)
		}
	}
	return result
}

// mergeByY merges form fields into text items, preserving Y order.
// Items with similar Y are kept adjacent so writeMarkdownItems groups them on the same line.
// Within each Y-level, items (text + form fields) are sorted by OrigX for proper left-to-right order.
// Wide text input fields (width > 50) are placed at the end of the line only when
// the field's X position is to the right of ALL text items on the line. If there's
// text further right (multi-label lines like "Case No.: [F] Division: [F]"),
// fields are sorted by OrigX to interleave correctly between labels.
func mergeByY(texts []TextItem, formFFs []TextItem, fontSize float64) []TextItem {
	threshold := fontSize * 0.5

	sort.Slice(texts, func(i, j int) bool { return texts[i].Y < texts[j].Y })
	sort.Slice(formFFs, func(i, j int) bool { return formFFs[i].Y < formFFs[j].Y })

	var result []TextItem
	tIdx, fIdx := 0, 0

	for tIdx < len(texts) || fIdx < len(formFFs) {
		if fIdx >= len(formFFs) {
			result = append(result, texts[tIdx:]...)
			break
		}
		if tIdx >= len(texts) {
			result = append(result, formFFs[fIdx:]...)
			break
		}

		if texts[tIdx].Y <= formFFs[fIdx].Y {
			// Text comes first or is near form field.
			// Collect all text at this Y level.
			currentY := texts[tIdx].Y
			var textAtY []TextItem
			for tIdx < len(texts) && math.Abs(texts[tIdx].Y-currentY) < threshold {
				textAtY = append(textAtY, texts[tIdx])
				tIdx++
			}
			// Compute max text X on this line for trailing decision
			maxTextX := 0.0
			for _, t := range textAtY {
				if t.XMax > maxTextX {
					maxTextX = t.XMax
				}
			}
			// Collect form fields at this Y
			var ffAtY []TextItem
			var ffTrailing []TextItem
			for fIdx < len(formFFs) && math.Abs(formFFs[fIdx].Y-currentY) < threshold {
				fieldWidth := formFFs[fIdx].XMax - formFFs[fIdx].X
				if fieldWidth > 50 && formFFs[fIdx].X > maxTextX {
					ffTrailing = append(ffTrailing, formFFs[fIdx])
				} else {
					ffAtY = append(ffAtY, formFFs[fIdx])
				}
				fIdx++
			}
			var lineItems []TextItem
			lineItems = append(lineItems, textAtY...)
			lineItems = append(lineItems, ffAtY...)
			sort.SliceStable(lineItems, func(i, j int) bool {
				return lineItems[i].OrigX < lineItems[j].OrigX
			})
			result = append(result, lineItems...)
			result = append(result, ffTrailing...)
		} else {
			// Form field comes before text — collect both at this Y.
			currentY := formFFs[fIdx].Y
			// First, collect all form fields and text at this Y
			var ffAtY []TextItem
			var ffTrailing []TextItem
			var textAtY []TextItem
			for fIdx < len(formFFs) && math.Abs(formFFs[fIdx].Y-currentY) < threshold {
				ffAtY = append(ffAtY, formFFs[fIdx]) // temporarily add all
				fIdx++
			}
			for tIdx < len(texts) && math.Abs(texts[tIdx].Y-currentY) < threshold {
				textAtY = append(textAtY, texts[tIdx])
				tIdx++
			}
			// Compute max text X for trailing decision
			maxTextX := 0.0
			for _, t := range textAtY {
				if t.XMax > maxTextX {
					maxTextX = t.XMax
				}
			}
			// Reclassify: only trail if field is wide AND starts right of all text
			ffAtY, ffTrailing = nil, nil
			for _, ff := range ffAtY {
				fieldWidth := ff.XMax - ff.X
				if fieldWidth > 50 && ff.X > maxTextX {
					ffTrailing = append(ffTrailing, ff)
				} else {
					ffAtY = append(ffAtY, ff)
				}
			}
			var lineItems []TextItem
			lineItems = append(lineItems, textAtY...)
			lineItems = append(lineItems, ffAtY...)
			sort.SliceStable(lineItems, func(i, j int) bool {
				return lineItems[i].OrigX < lineItems[j].OrigX
			})
			result = append(result, lineItems...)
			result = append(result, ffTrailing...)
		}
	}
	return result
}