package pdfcontent

import (
	"cmp"
	"math"
	"slices"
)

// FlattenColumns detects multi-column layouts and reorders text for single-column reading.
//
// Algorithm:
//  1. Group items into lines by Y proximity
//  2. For each line, find large horizontal gaps (>15% of page width)
//  3. Detect "column break regions" — X ranges where gaps consistently appear
//     across 3+ consecutive lines
//  4. Split multi-column lines at break regions into left/right parts
//  5. Output all left parts top-to-bottom, then all right parts top-to-bottom
//     (preserving single-column lines in their original positions)
// FlattenLines takes pre-grouped lines (e.g., from pdftotext -bbox-layout blocks)
// and flattens multi-column layouts to single-column reading order. Returns items
// with flattened Y/X positions and preserved OrigY for form field matching.
func FlattenLines(lines [][]TextItem, pageWidth float64, avgFontSize float64) []TextItem {
	if len(lines) == 0 {
		return nil
	}
	if len(lines) < 3 {
		return assignFlattenedPositions(lines, avgFontSize)
	}

	// Step 2: Find gaps in each line
	minGap := pageWidth * 0.15
	var allGaps []lineGap
	for li, line := range lines {
		gaps := findLineGaps(line, minGap)
		for _, g := range gaps {
			allGaps = append(allGaps, lineGap{
				lineIndex: li,
				gapStart:  g.start,
				gapEnd:    g.end,
				gapWidth:  g.width,
			})
		}
	}

	// Step 3: Find column break regions
	breakRegions := findBreakRegions(allGaps, lines, pageWidth)

	if len(breakRegions) == 0 {
		return assignFlattenedPositions(lines, avgFontSize)
	}

	// Step 4: Split lines and reorder
	return reorderWithBreaks(lines, breakRegions, avgFontSize, pageWidth)
}

// FlattenColumns detects multi-column layouts and reorders text for single-column reading.
// Groups items into lines by Y proximity, then delegates to FlattenLines.
func FlattenColumns(items []TextItem, pageWidth float64) []TextItem {
	if len(items) <= 1 {
		return items
	}

	avgFontSize := averageFontSize(items)
	lineThreshold := avgFontSize * 0.5
	lines := groupIntoLines(items, lineThreshold)

	return FlattenLines(lines, pageWidth, avgFontSize)
}

// gap represents a horizontal gap in a line
type gap struct {
	start float64
	end   float64
	width float64
}

// lineGap tracks a gap found on a specific line.
type lineGap struct {
	lineIndex int
	gapStart  float64
	gapEnd    float64
	gapWidth  float64
}

// breakRegion represents a consistent column break at a specific X position
type breakRegion struct {
	xMin float64
	xMax float64
	// Line indices that have a gap in this region, in order
	lineIndices []int
}

func averageFontSize(items []TextItem) float64 {
	if len(items) == 0 {
		return 12
	}
	sum := 0.0
	for _, it := range items {
		sum += it.FontSize
	}
	return sum / float64(len(items))
}

func groupIntoLines(items []TextItem, threshold float64) [][]TextItem {
	// Sort by Y, then X
	sorted := make([]TextItem, len(items))
	copy(sorted, items)
	sortByYThenX(sorted)

	var lines [][]TextItem
	currentLine := []TextItem{sorted[0]}
	for i := 1; i < len(sorted); i++ {
		if math.Abs(sorted[i].Y-currentLine[0].Y) < threshold {
			currentLine = append(currentLine, sorted[i])
		} else {
			// Sort line by X
			sortByX(currentLine)
			lines = append(lines, currentLine)
			currentLine = []TextItem{sorted[i]}
		}
	}
	sortByX(currentLine)
	lines = append(lines, currentLine)
	return lines
}

func sortByYThenX(items []TextItem) {
	for i := 1; i < len(items); i++ {
		for j := i; j > 0 && items[j].Y < items[j-1].Y; j-- {
			items[j], items[j-1] = items[j-1], items[j]
		}
	}
}

func sortByX(items []TextItem) {
	for i := 1; i < len(items); i++ {
		for j := i; j > 0 && items[j].X < items[j-1].X; j-- {
			items[j], items[j-1] = items[j-1], items[j]
		}
	}
}

// isUnderscoreText returns true if the text is mostly underscores
// (form underline decorations like "________," or "________").
func isUnderscoreText(text string) bool {
	uc := 0
	for _, r := range text {
		if r == '_' {
			uc++
		}
	}
	return uc >= 3 && float64(uc)/float64(len(text)) > 0.5
}

func findLineGaps(line []TextItem, minGap float64) []gap {
	if len(line) <= 1 {
		return nil
	}
	var rawGaps []gap
	for i := 1; i < len(line); i++ {
		// Use edge-to-edge gap: right edge of previous word to left edge of next word
		gw := line[i].X - line[i-1].XMax
		if gw >= minGap {
			// Skip gaps bridged by underscore decorations. These are form
			// underline fields (e.g., "_____files_____") that span the gap
			// between left and right text on the same logical line. They are
			// not genuine column breaks.
			gapStart := line[i-1].XMax
			gapEnd := line[i].X
			gapBridged := false
			for j := i - 1; j < len(line) && j >= 0; j++ {
				if line[j].XMax > gapStart && line[j].X < gapEnd {
					if isUnderscoreText(line[j].Text) {
						gapBridged = true
						break
					}
				}
				if line[j].XMax > gapEnd {
					break
				}
			}
			if !gapBridged {
				rawGaps = append(rawGaps, gap{
					start: gapStart,
					end:   gapEnd,
					width: gw,
				})
			}
		}
	}
	// Merge adjacent/overlapping gaps: if two gaps are close enough
	// (gap between them < minGap), treat them as one column break.
	if len(rawGaps) <= 1 {
		return rawGaps
	}
	var merged []gap
	merged = append(merged, rawGaps[0])
	for i := 1; i < len(rawGaps); i++ {
		prev := &merged[len(merged)-1]
		cur := rawGaps[i]
		if cur.start-prev.end < minGap {
			// Merge: extend previous gap
			prev.end = cur.end
			prev.width = prev.end - prev.start
		} else {
			merged = append(merged, cur)
		}
	}
	return merged
}

// findBreakRegions identifies X ranges where gaps consistently appear
// across consecutive lines (indicating true column breaks).
func findBreakRegions(allGaps []lineGap, lines [][]TextItem, pageWidth float64) []breakRegion {
	if len(allGaps) == 0 {
		return nil
	}

	// Collect unique gap center positions (rounded)
	type gapInfo struct {
		center      float64
		lineIndices []int
	}

	// Group gaps by approximate center X (within 15% of page width)
	var gapCenters []gapInfo
	for _, g := range allGaps {
		center := (g.gapStart + g.gapEnd) / 2
		found := false
		for ci := range gapCenters {
			if math.Abs(center-gapCenters[ci].center) < pageWidth*0.15 {
				gapCenters[ci].lineIndices = append(gapCenters[ci].lineIndices, g.lineIndex)
				found = true
				break
			}
		}
		if !found {
			gapCenters = append(gapCenters, gapInfo{
				center:      center,
				lineIndices: []int{g.lineIndex},
			})
		}
	}

	// Keep only centers with 2+ consecutive lines having gaps
	var regions []breakRegion
	for _, gc := range gapCenters {
		if consecutiveCount(gc.lineIndices) >= 2 {
			// Find the actual gap boundaries from the lines
			xMin := pageWidth
			xMax := 0.0
			for _, li := range gc.lineIndices {
				for _, g := range findLineGaps(lines[li], pageWidth*0.05) {
					center := (g.start + g.end) / 2
					if math.Abs(center-gc.center) < pageWidth*0.1 {
						if g.start < xMin {
							xMin = g.start
						}
						if g.end > xMax {
							xMax = g.end
						}
					}
				}
			}
			regions = append(regions, breakRegion{
				xMin:        xMin,
				xMax:        xMax,
				lineIndices: gc.lineIndices,
			})
		}
	}

	return regions
}

// consecutiveCount finds the longest run of consecutive integers in a sorted list.
func consecutiveCount(indices []int) int {
	if len(indices) == 0 {
		return 0
	}
	// Sort
	for i := 1; i < len(indices); i++ {
		for j := i; j > 0 && indices[j] < indices[j-1]; j-- {
			indices[j], indices[j-1] = indices[j-1], indices[j]
		}
	}
	// Deduplicate
	deduped := []int{indices[0]}
	for i := 1; i < len(indices); i++ {
		if indices[i] != indices[i-1] {
			deduped = append(deduped, indices[i])
		}
	}

	maxConsec := 1
	currentConsec := 1
	for i := 1; i < len(deduped); i++ {
		if deduped[i] == deduped[i-1]+1 {
			currentConsec++
			if currentConsec > maxConsec {
				maxConsec = currentConsec
			}
		} else {
			currentConsec = 1
		}
	}
	return maxConsec
}

// reorderWithBreaks splits lines at break regions and reorders for single-column output.
//
// Strategy: group consecutive lines that all have column breaks into "column blocks".
// Extend blocks to adjacent lines that also span the break. Within each block,
// output left column top-to-bottom, then right column. Lines without breaks
// (between blocks) are output as-is.
//
// As items are reordered, their Y values are updated to reflect their position
// in the flattened output. This allows downstream formatting logic (paragraph
// detection, heading rules, etc.) to work correctly on the rearranged text.
func reorderWithBreaks(lines [][]TextItem, regions []breakRegion, fontSize float64, pageWidth float64) []TextItem {
	if len(regions) == 0 {
		return assignFlattenedPositions(lines, fontSize)
	}

	// Build set of line indices that have a confirmed break
	breakLineSet := make(map[int]bool)
	for _, r := range regions {
		for _, li := range r.lineIndices {
			breakLineSet[li] = true
		}
	}

	// Extend break blocks: include adjacent lines that span the break region
	// (have items on both sides of the break)
	for _, r := range regions {
		start, end := startLineRange(r.lineIndices)
		// Extend upward
		for start > 0 && spansBreak(lines[start-1], r, pageWidth) {
			start--
			breakLineSet[start] = true
		}
		// Extend downward
		for end < len(lines)-1 && spansBreak(lines[end+1], r, pageWidth) {
			end++
			breakLineSet[end] = true
		}
	}

	// Group into segments: consecutive lines with breaks form "column blocks"
	type segment struct {
		startLine int
		endLine   int // inclusive
		isBlock   bool
	}
	var segments []segment
	i := 0
	for i < len(lines) {
		if breakLineSet[i] {
			start := i
			for i < len(lines) && breakLineSet[i] {
				i++
			}
			segments = append(segments, segment{startLine: start, endLine: i - 1, isBlock: true})
		} else {
			start := i
			for i < len(lines) && !breakLineSet[i] {
				i++
			}
			segments = append(segments, segment{startLine: start, endLine: i - 1, isBlock: false})
		}
	}

	// lineHeight is the vertical spacing between consecutive lines in flattened output.
	// Must be > 16 so paragraph gaps (> 2*lineHeight) are detectable by format rules.
	lineHeight := math.Ceil(fontSize) + 4
	flattenedLine := 0

	var result []TextItem
	for _, seg := range segments {
		if seg.isBlock {
			// Blank line before column block
			result = append(result, TextItem{Text: "", Y: -1})
			// Split each line in block at break region, collect left/right parts
			var leftLines [][]TextItem
			var rightLines [][]TextItem
			for li := seg.startLine; li <= seg.endLine; li++ {
				left, right := splitLine(lines[li], regions)
				if len(left) > 0 {
					leftLines = append(leftLines, left)
				}
				if len(right) > 0 {
					rightLines = append(rightLines, right)
				}
			}
			// Output left column top-to-bottom
			for _, line := range leftLines {
				flattenedY := float64(flattenedLine) * lineHeight
				flattened := assignLinePositions(line, flattenedY)
				result = append(result, flattened...)
				flattenedLine++
			}
			// Add blank line between columns if both exist
			if len(leftLines) > 0 && len(rightLines) > 0 {
				result = append(result, TextItem{Text: "", Y: -1})
			}
			// Output right column top-to-bottom
			for _, line := range rightLines {
				flattenedY := float64(flattenedLine) * lineHeight
				flattened := assignLinePositions(line, flattenedY)
				result = append(result, flattened...)
				flattenedLine++
			}
			// Blank line after column block — skip a line to create paragraph gap
			flattenedLine++
			result = append(result, TextItem{Text: "", Y: -1})
		} else {
			// Normal lines, output with flattened Y positions
			for li := seg.startLine; li <= seg.endLine; li++ {
				flattenedY := float64(flattenedLine) * lineHeight
				flattened := assignLinePositions(lines[li], flattenedY)
				result = append(result, flattened...)
				flattenedLine++
			}
		}
	}

	return result
}

// assignFlattenedPositions assigns sequential Y positions to items across all lines,
// preserving line grouping. Used for the no-column-detected path.
func assignFlattenedPositions(lines [][]TextItem, fontSize float64) []TextItem {
	lineHeight := math.Ceil(fontSize) + 4

	// Merge lines at the same Y position into single lines.
	// pdftotext sometimes splits a single PDF text line into multiple
	// <line> elements (e.g., around underscores or form fields),
	// causing text like "{name of business}" to fragment across
	// separate flattened Y positions.
	merged := mergeSameYLinesForFlat(lines, fontSize*0.5)

	var result []TextItem
	for li, line := range merged {
		flattenedY := float64(li) * lineHeight
		flattened := assignLinePositions(line, flattenedY)
		result = append(result, flattened...)
	}
	return result
}

// mergeSameYLinesForFlat merges lines that share the same Y position
// (within tolerance) into single combined lines, sorted by X.
func mergeSameYLinesForFlat(lines [][]TextItem, yTol float64) [][]TextItem {
	if len(lines) <= 1 {
		return lines
	}
	var result [][]TextItem
	var current []TextItem
	if len(lines[0]) == 0 {
		return lines
	}
	currentY := lines[0][0].Y
	for _, line := range lines {
		if len(line) == 0 {
			continue
		}
		if math.Abs(line[0].Y-currentY) <= yTol {
			current = append(current, line...)
		} else {
			if len(current) > 0 {
				sortByX(current)
				result = append(result, current)
			}
			current = line
			currentY = line[0].Y
		}
	}
	if len(current) > 0 {
		sortByX(current)
		result = append(result, current)
	}
	return result
}

// assignLinePositions creates a copy of the line items with Y set to the given
// flattened Y value and X normalized to a single-column layout. OrigY is preserved.
func assignLinePositions(line []TextItem, flattenedY float64) []TextItem {
	if len(line) == 0 {
		return nil
	}
	// Find the leftmost X to use as column baseline
	minX := line[0].X
	for _, item := range line {
		if item.X < minX {
			minX = item.X
		}
	}
	result := make([]TextItem, len(line))
	for i, item := range line {
		result[i] = TextItem{
			Text:       item.Text,
			X:          math.Floor(item.X - minX),
			Y:          flattenedY,
			XMax:       math.Floor(item.XMax - minX),
			YMax:       flattenedY + (item.YMax - item.Y),
			OrigX:      item.OrigX,
			OrigY:      item.OrigY,
			FontSize:   item.FontSize,
			Page:       item.Page,
			PageHeight: item.PageHeight,
			FormField:  item.FormField,
		}
	}
	return result
}

// startLineRange returns the min and max consecutive line indices from a list.
func startLineRange(indices []int) (int, int) {
	if len(indices) == 0 {
		return 0, 0
	}
	// Sort
	sorted := make([]int, len(indices))
	copy(sorted, indices)
	for i := 1; i < len(sorted); i++ {
		for j := i; j > 0 && sorted[j] < sorted[j-1]; j-- {
			sorted[j], sorted[j-1] = sorted[j-1], sorted[j]
		}
	}
	// Find longest consecutive run
	minLine := sorted[0]
	maxLine := sorted[0]
	runStart := sorted[0]
	runEnd := sorted[0]
	bestLen := 1
	for i := 1; i < len(sorted); i++ {
		if sorted[i] == sorted[i-1]+1 {
			runEnd = sorted[i]
			if runEnd-runStart+1 > bestLen {
				bestLen = runEnd - runStart + 1
				minLine = runStart
				maxLine = runEnd
			}
		} else {
			runStart = sorted[i]
			runEnd = sorted[i]
		}
	}
	return minLine, maxLine
}

// spansBreak checks if a line belongs in a column block.
// Returns true when:
//  1. The line has items on both sides AND a significant gap at the break position
//     (standard two-column row like "Name: +AH ... Name: +AI")
//  2. ALL content stays within one column AND is narrow (<40% page width)
//     (single-column row like "Florida Bar Number:" that only appears on one side)
//
// Full-width paragraph lines fail both checks — they span the entire page.
func spansBreak(line []TextItem, r breakRegion, pageWidth float64) bool {
	mid := (r.xMin + r.xMax) / 2
	hasLeft := false
	hasRight := false
	gapAtBreak := 0.0
	prevXMax := 0.0
	minX := math.MaxFloat64
	maxX := 0.0
	for _, item := range line {
		if item.XMax < mid {
			hasLeft = true
		} else if item.X > mid {
			hasRight = true
		} else {
			// Item spans the break midpoint — bridges both columns
			hasLeft = true
			hasRight = true
		}
		if prevXMax > 0 {
			gw := item.X - prevXMax
			if prevXMax < r.xMax && item.X > r.xMin {
				gapAtBreak = gw
			}
		}
		prevXMax = item.XMax
		if item.X < minX {
			minX = item.X
		}
		if item.XMax > maxX {
			maxX = item.XMax
		}
	}
	// Case 1: items on both sides with significant gap at break
	if hasLeft && hasRight && gapAtBreak >= (r.xMax-r.xMin)*0.3 {
		return true
	}
	// Case 2: all content on exactly one side, narrow enough to be a column entry
	contentWidth := maxX - minX
	if contentWidth < pageWidth*0.4 && (hasLeft != hasRight) {
		return true
	}
	return false
}

// splitLine splits a line at break regions into left and right parts.
// Uses midpoint of break region as the split boundary.
func splitLine(line []TextItem, regions []breakRegion) (left, right []TextItem) {
	for _, item := range line {
		// Use midpoint of the primary break region
		mid := (regions[0].xMin + regions[0].xMax) / 2
		if item.X < mid {
			left = append(left, item)
		} else {
			right = append(right, item)
		}
	}
	return left, right
}

// mergeSameColumnBlocks groups narrow blocks (width < 40% page width) that
// share the same column layout AND are vertically proximate into combined
// blocks. Lines from different columns at the same Y position are kept
// separate so FlattenLines can detect column breaks. Wide blocks
// (full-width paragraphs) are returned unchanged.
func mergeSameColumnBlocks(blocks []BlockLines, pageWidth float64) []BlockLines {
	if len(blocks) == 0 {
		return blocks
	}

	// Sort blocks by Y position, then xMin for stable ordering
	slices.SortFunc(blocks, func(a, b BlockLines) int {
		if cmp.Compare(blockMinY(a), blockMinY(b)) != 0 {
			return cmp.Compare(blockMinY(a), blockMinY(b))
		}
		return cmp.Compare(a.XMin, b.XMin)
	})

	// Separate narrow (single-column) blocks from wide (full-width) blocks
	var narrow []BlockLines
	var wide []BlockLines
	for _, bl := range blocks {
		if blockMaxWidth(bl) < pageWidth*0.4 {
			narrow = append(narrow, bl)
		} else {
			wide = append(wide, bl)
		}
	}

	if len(narrow) == 0 {
		return blocks
	}

	// Cluster narrow blocks by Y proximity: only blocks within yClusterGap
	// of each other form a merge group. This prevents distant blocks (e.g.,
	// signature area at Y=621) from merging with column sections (Y=439-567).
	yClusterGap := 30.0 // 30pt — enough to bridge inter-block gaps but not paragraphs
	// Track cluster Y ranges to avoid recomputing on each iteration
	type clusterRange struct {
		blocks []BlockLines
		yMin   float64
		yMax   float64
	}
	var clusters []clusterRange
	for _, bl := range narrow {
		yMin := blockMinY(bl)
		yMax := blockMaxY(bl)
		placed := false
		for ci := range clusters {
			if yMin <= clusters[ci].yMax+yClusterGap && yMax >= clusters[ci].yMin-yClusterGap {
				clusters[ci].blocks = append(clusters[ci].blocks, bl)
				if yMin < clusters[ci].yMin {
					clusters[ci].yMin = yMin
				}
				if yMax > clusters[ci].yMax {
					clusters[ci].yMax = yMax
				}
				placed = true
				break
			}
		}
		if !placed {
			clusters = append(clusters, clusterRange{
				blocks: []BlockLines{bl},
				yMin:   yMin,
				yMax:   yMax,
			})
		}
	}

	// Process each cluster independently
	var result []BlockLines
	for _, cr := range clusters {
		clusterResult := mergeOneColumnCluster(cr.blocks, pageWidth)
		result = append(result, clusterResult...)
	}

	// Merge wide blocks into the result at proper Y positions
	for _, wb := range wide {
		inserted := false
		var sorted []BlockLines
		for _, r := range result {
			if !inserted && blockMinY(r) > blockMinY(wb) {
				sorted = append(sorted, wb)
				inserted = true
			}
			sorted = append(sorted, r)
		}
		if !inserted {
			sorted = append(sorted, wb)
		}
		result = sorted
	}
	return result
}

// blockMaxY returns the maximum Y of all items in a block.
func blockMaxY(bl BlockLines) float64 {
	maxY := 0.0
	for _, line := range bl.Lines {
		for _, item := range line {
			if item.Y > maxY {
				maxY = item.Y
			}
		}
	}
	return maxY
}

// mergeOneColumnCluster merges blocks in one Y-proximate cluster.
// Only merges when there are 2+ columns with significant horizontal gaps
// (median gap > 60pt). Otherwise returns blocks unchanged.
func mergeOneColumnCluster(cluster []BlockLines, pageWidth float64) []BlockLines {
	// Check for multi-column layout (2+ columns with significant gap)
	xMins := make([]float64, len(cluster))
	for i, bl := range cluster {
		xMins[i] = bl.XMin
	}
	sorted := make([]float64, len(xMins))
	copy(sorted, xMins)
	for i := 1; i < len(sorted); i++ {
		for j := i; j > 0 && sorted[j] < sorted[j-1]; j-- {
			sorted[j], sorted[j-1] = sorted[j-1], sorted[j]
		}
	}
	clusters := clusterFloats(sorted, 20)
	if len(clusters) < 2 {
		return cluster // not multi-column
	}
	if medianGap(clusters) < 60 {
		return cluster // gaps too small for true columns
	}

	// Collect all lines from the cluster and merge lines at the same Y
	allLines := collectAllLines(cluster)
	mergedLines := mergeSameYLines(allLines, clusters, 8)

	return []BlockLines{{XMin: clusters[0], Lines: mergedLines}}
}

// collectAllLines gathers all lines from narrow blocks and sorts by Y.
func collectAllLines(narrow []BlockLines) [][]TextItem {
	var all [][]TextItem
	for _, bl := range narrow {
		all = append(all, bl.Lines...)
	}
	slices.SortFunc(all, func(a, b []TextItem) int {
		return cmp.Compare(a[0].Y, b[0].Y)
	})
	return all
}

// mergeSameYLines combines lines that share the same Y position (within tolerance)
// into single lines. Each original line is treated as an atomic group — all items
// from the same original line stay together. Groups are sorted by the leftmost X
// of each group, so that FlattenLines sees proper multi-column lines.
// Lines from different columns at the same Y are kept separate so that
// splitBlockColumns can assign them to the correct column.
func mergeSameYLines(lines [][]TextItem, columnXMins []float64, yTol float64) [][]TextItem {
	if len(lines) == 0 {
		return nil
	}
	// Group lines by Y position
	type yGroup struct {
		yRef  float64
		lines [][]TextItem
	}
	var groups []yGroup
	used := make([]bool, len(lines))
	for i, line := range lines {
		if used[i] {
			continue
		}
		g := yGroup{yRef: line[0].Y, lines: [][]TextItem{line}}
		used[i] = true
		for j := i + 1; j < len(lines); j++ {
			if used[j] {
				continue
			}
			if math.Abs(lines[j][0].Y-line[0].Y) <= yTol {
				g.lines = append(g.lines, lines[j])
				used[j] = true
			}
		}
		groups = append(groups, g)
	}

	// For each group with 2+ lines (from different columns), keep lines separate
	// but sort by X. Don't concatenate — splitBlockColumns will assign each line
	// to the correct column based on its X position.
	var result [][]TextItem
	for _, g := range groups {
		if len(g.lines) <= 1 {
			result = append(result, g.lines[0])
			continue
		}
		// Sort lines by their leftmost X to determine column order
		slices.SortFunc(g.lines, func(a, b []TextItem) int {
			return cmp.Compare(minX(a), minX(b))
		})
		// Keep each line separate (don't concatenate across columns)
		for _, line := range g.lines {
			result = append(result, line)
		}
	}
	return result
}

// minX returns the minimum X in a line.
func minX(line []TextItem) float64 {
	if len(line) == 0 {
		return 0
	}
	m := line[0].X
	for _, it := range line {
		if it.X < m {
			m = it.X
		}
	}
	return m
}

// blockMaxWidth returns the maximum line width in a block.
func blockMaxWidth(bl BlockLines) float64 {
	maxW := 0.0
	for _, line := range bl.Lines {
		minX, maxX := line[0].X, line[0].XMax
		for _, it := range line {
			if it.X < minX {
				minX = it.X
			}
			if it.XMax > maxX {
				maxX = it.XMax
			}
		}
		if maxX-minX > maxW {
			maxW = maxX - minX
		}
	}
	return maxW
}

// blockMinY returns the minimum Y of all items in a block.
func blockMinY(bl BlockLines) float64 {
	if len(bl.Lines) == 0 {
		return math.MaxFloat64
	}
	return bl.Lines[0][0].Y
}

// medianGap returns the median gap between adjacent sorted values.
func medianGap(sorted []float64) float64 {
	if len(sorted) < 2 {
		return 0
	}
	gaps := make([]float64, len(sorted)-1)
	for i := 1; i < len(sorted); i++ {
		gaps[i-1] = sorted[i] - sorted[i-1]
	}
	for i := 1; i < len(gaps); i++ {
		for j := i; j > 0 && gaps[j] < gaps[j-1]; j-- {
			gaps[j], gaps[j-1] = gaps[j-1], gaps[j]
		}
	}
	if len(gaps)%2 == 0 {
		return (gaps[len(gaps)/2-1] + gaps[len(gaps)/2]) / 2
	}
	return gaps[len(gaps)/2]
}

// clusterFloats groups sorted values into natural clusters where adjacent
// values are within tol of each other. Returns the cluster midpoints.
func clusterFloats(sorted []float64, tol float64) []float64 {
	if len(sorted) == 0 {
		return nil
	}
	var clusters [][]float64
	var current []float64
	for _, v := range sorted {
		if len(current) == 0 || v-current[len(current)-1] <= tol {
			current = append(current, v)
		} else {
			clusters = append(clusters, current)
			current = []float64{v}
		}
	}
	clusters = append(clusters, current)
	// Return cluster midpoints
	mids := make([]float64, len(clusters))
	for i, c := range clusters {
		mids[i] = (c[0] + c[len(c)-1]) / 2
	}
	return mids
}

// isMultiColumnBlock checks if a merged block has lines at 2+ distinct X
// positions separated by significant gaps (indicating multi-column layout).
// Uses two methods:
//  1. Line-start clustering: groups lines by their leftmost X position
//  2. Intra-line gap detection: finds large horizontal gaps within combined lines
// Either method can trigger multi-column detection.
func isMultiColumnBlock(bl BlockLines) bool {
	// Method 1: Check line-start X positions for distinct column groups.
	// A true multi-column block has lines that start at exactly 2 distinct X
	// positions separated by a significant gap (>=100pt). Blocks with 3+
	// scattered line-start positions are full-width blocks with varied
	// indentation, not multi-column.
	xMins := make([]float64, 0, len(bl.Lines))
	for _, line := range bl.Lines {
		if len(line) > 0 {
			xMins = append(xMins, line[0].X)
		}
	}
	sorted := make([]float64, len(xMins))
	copy(sorted, xMins)
	for i := 1; i < len(sorted); i++ {
		for j := i; j > 0 && sorted[j] < sorted[j-1]; j-- {
			sorted[j], sorted[j-1] = sorted[j-1], sorted[j]
		}
	}
	clusters := clusterFloats(sorted, 30)
	if len(clusters) == 2 && clusters[1]-clusters[0] >= 100 {
		// Count lines per cluster — reject if one cluster has only 1 line.
		// A true multi-column layout has meaningful content in both columns.
		// Single-line outliers (e.g., "Dated:" at left margin followed by
		// right-aligned signature fields, or one indented continuation line)
		// are not multi-column layouts.
		c0, c1 := 0, 0
		for _, x := range xMins {
			if x <= (clusters[0]+clusters[1])/2 {
				c0++
			} else {
				c1++
			}
		}
		if c0 >= 2 && c1 >= 2 {
			return true
		}
	}

	return false
}

// splitBlockColumns splits lines in a multi-column merged block into
// separate columns. Handles both separate-column lines (each line belongs
// to one column) and combined-column lines (items from multiple columns
// on the same line, split at the column boundary).
// Returns 2 columns ordered left to right. Each column is [][]TextItem.
func splitBlockColumns(bl BlockLines) [][][]TextItem {
	if !isMultiColumnBlock(bl) {
		return [][][]TextItem{bl.Lines}
	}

	// Use line-start X clustering to find column boundaries,
	// consistent with isMultiColumnBlock Method 1.
	xMins := make([]float64, 0, len(bl.Lines))
	for _, line := range bl.Lines {
		if len(line) > 0 {
			xMins = append(xMins, line[0].X)
		}
	}
	sorted := make([]float64, len(xMins))
	copy(sorted, xMins)
	for i := 1; i < len(sorted); i++ {
		for j := i; j > 0 && sorted[j] < sorted[j-1]; j-- {
			sorted[j], sorted[j-1] = sorted[j-1], sorted[j]
		}
	}
	clusters := clusterFloats(sorted, 30)
	if len(clusters) < 2 {
		return [][][]TextItem{bl.Lines}
	}

	// Find the largest gap between clusters — this is the column boundary
	maxGap := 0.0
	splitIdx := 0
	for i := 1; i < len(clusters); i++ {
		gap := clusters[i] - clusters[i-1]
		if gap > maxGap {
			maxGap = gap
			splitIdx = i
		}
	}
	splitX := (clusters[splitIdx-1] + clusters[splitIdx]) / 2

	// Split items within each line at the column boundary.
	// However, only split individual items when the line genuinely has
	// content from both columns. If all items are continuous text from one
	// column (e.g., "Termination of Parental Rights" where "Rights" at X=223
	// crosses splitX=201), assign the entire line to the column of its start.
	var leftCol, rightCol [][]TextItem
	for _, line := range bl.Lines {
		if len(line) == 0 {
			continue
		}
		// Classify items by their X (left edge), not XMax, so bridging items
		// (e.g., "Contempt" at X=114, XMax=282 crossing splitX=201) are still
		// considered left-side items.
		hasLeft := false
		hasRight := false
		lastLeftXMax := 0.0
		firstRightX := math.MaxFloat64
		for _, item := range line {
			if item.X < splitX {
				hasLeft = true
				if item.XMax > lastLeftXMax {
					lastLeftXMax = item.XMax
				}
			} else {
				hasRight = true
				if item.X < firstRightX {
					firstRightX = item.X
				}
			}
		}

		if hasLeft && hasRight {
			// Check the actual gap between left and right items.
			// A real column break has a significant gap (e.g., 50+ pt).
			// Continuous text crossing splitX has a small gap (e.g., 3 pt
			// between "Contempt" at XMax=282 and "Proceedings" at X=285).
			gapAtSplit := firstRightX - lastLeftXMax
			if gapAtSplit > 50 {
				// Real gap — split items at boundary
				var leftLine, rightLine []TextItem
				for _, item := range line {
					if item.X < splitX {
						leftLine = append(leftLine, item)
					} else {
						rightLine = append(rightLine, item)
					}
				}
				if len(leftLine) > 0 {
					leftCol = append(leftCol, leftLine)
				}
				if len(rightLine) > 0 {
					rightCol = append(rightCol, rightLine)
				}
			} else {
				// Small gap — continuous text, assign to column of line start
				if hasLeft {
					leftCol = append(leftCol, line)
				} else {
					rightCol = append(rightCol, line)
				}
			}
		} else {
			// All items on one side — assign to that column
			if hasLeft {
				leftCol = append(leftCol, line)
			} else {
				rightCol = append(rightCol, line)
			}
		}
	}
	sortLinesByY(leftCol)
	sortLinesByY(rightCol)
	return [][][]TextItem{leftCol, rightCol}
}

// sortLinesByY sorts a slice of lines by their first item's Y coordinate.
func sortLinesByY(lines [][]TextItem) {
	for i := 1; i < len(lines); i++ {
		for j := i; j > 0 && lines[j][0].Y < lines[j-1][0].Y; j-- {
			lines[j], lines[j-1] = lines[j-1], lines[j]
		}
	}
}

// uniqueSortedFloats returns sorted unique floats within tolerance.
func uniqueSortedFloats(vals []float64, tol float64) []float64 {
	if len(vals) == 0 {
		return nil
	}
	sorted := make([]float64, len(vals))
	copy(sorted, vals)
	slices.Sort(sorted)
	unique := []float64{sorted[0]}
	for i := 1; i < len(sorted); i++ {
		if sorted[i]-unique[len(unique)-1] > tol {
			unique = append(unique, sorted[i])
		}
	}
	return unique
}