package pdfcontent import ( "fmt" "math" "strings" "testing" ) func TestOrigYLineBreak(t *testing.T) { // Simulates the case where items from different original PDF lines // land on the same flattened Y (e.g., after column flattening). // The lineBreakFormatRule should insert a \n when OrigY differs // significantly even though flattened Y is the same. items := []TextItem{ {Text: "it", X: 0, Y: 16, OrigY: 200, OrigX: 50, FontSize: 12, Page: 1}, {Text: "or", X: 20, Y: 16, OrigY: 215, OrigX: 400, FontSize: 12, Page: 1}, {Text: "affects", X: 40, Y: 16, OrigY: 200, OrigX: 100, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) // "or" (OrigY=215) should be on a different line from "it" (OrigY=200) // The origYDiff of 15 exceeds threshold*0.4 (4.8) if strings.Contains(result, "it or ") { t.Errorf("Expected line break between items with different OrigY on same flat Y. Got: %q", result) } } func TestSameOrigYNoBreak(t *testing.T) { // Items on the same original line should NOT get line breaks items := []TextItem{ {Text: "Hello", X: 0, Y: 16, OrigY: 200, OrigX: 50, FontSize: 12, Page: 1}, {Text: "World", X: 40, Y: 16, OrigY: 200, OrigX: 100, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) if strings.Contains(result, "\n") { t.Errorf("Same-OrigY items should NOT get line breaks. Got: %q", result) } if !strings.Contains(result, "Hello World") { t.Errorf("Expected 'Hello World' on same line. Got: %q", result) } } func TestBulletCharDetection(t *testing.T) { bullets := []string{"•", "‣", "⁃", "◦", "●", "▪", "-"} for _, b := range bullets { if !isBulletChar(b) { t.Errorf("Expected %q (U+%04X) to be detected as bullet", b, []rune(b)[0]) } } nonBullets := []string{"hello", "1.", "(1)", "A", "or", ";"} for _, nb := range nonBullets { if isBulletChar(nb) { t.Errorf("Expected %q to NOT be detected as bullet", nb) } } } func TestBulletListRendering(t *testing.T) { // Bullet followed by text on same line items := []TextItem{ {Text: "•", X: 0, Y: 0, OrigY: 100, OrigX: 72, FontSize: 12, Page: 1}, {Text: "First", X: 10, Y: 0, OrigY: 100, OrigX: 90, FontSize: 12, Page: 1}, {Text: "item", X: 50, Y: 0, OrigY: 100, OrigX: 130, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) if !strings.Contains(result, "- ") { t.Errorf("Expected markdown bullet '- ' for • character. Got: %q", result) } // The bullet should be consumed and replaced with "- " if strings.Contains(result, "•") { t.Errorf("Bullet character should be consumed, not passed through. Got: %q", result) } } func TestYMinusOneSeparator(t *testing.T) { // Y=-1 separator should produce double newline items := []TextItem{ {Text: "Left", X: 0, Y: 0, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "", Y: -1, Page: 1}, // column separator {Text: "Right", X: 0, Y: 16, OrigY: 100, OrigX: 400, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) if !strings.Contains(result, "\n\n") { t.Errorf("Y=-1 separator should produce double newline. Got: %q", result) } } func TestParagraphBreakLargeOrigYGap(t *testing.T) { // Large OrigY gap should produce paragraph break (double newline) items := []TextItem{ {Text: "First", X: 0, Y: 0, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "paragraph", X: 0, Y: 16, OrigY: 100, OrigX: 100, FontSize: 12, Page: 1}, {Text: "Second", X: 0, Y: 48, OrigY: 200, OrigX: 50, FontSize: 12, Page: 1}, {Text: "paragraph", X: 0, Y: 48, OrigY: 200, OrigX: 100, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) // Gap of 100 between OrigY=100 and OrigY=200 exceeds threshold*2 (24) if !strings.Contains(result, "\n\n") { t.Errorf("Expected paragraph break between paragraphs. Got: %q", result) } } func TestFormFieldNoOrigYBreak(t *testing.T) { // Form fields should NOT trigger OrigY-based line breaks items := []TextItem{ {Text: "Name:", X: 0, Y: 16, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "[F0]", X: 40, Y: 16, OrigY: 150, OrigX: 100, FontSize: 12, Page: 1, FormField: true}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) // "Name:" and "[F0]" should stay on same line despite OrigY diff if strings.Contains(result, "Name:\n") { t.Errorf("Form field should not trigger line break with preceding text. Got: %q", result) } } func TestSmallOrigYDiffNoBreak(t *testing.T) { // Very small OrigY differences (descenders, baseline shifts) should NOT break items := []TextItem{ {Text: "The", X: 0, Y: 16, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "quick", X: 20, Y: 16, OrigY: 101, OrigX: 70, FontSize: 12, Page: 1}, {Text: "brown", X: 50, Y: 16, OrigY: 100, OrigX: 100, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) // All on same line, origYDiff of 1 is below threshold*0.4 (4.8) if strings.Contains(result, "\n") { t.Errorf("Small OrigY differences should not cause line breaks. Got: %q", result) } } func TestRepositionBullets(t *testing.T) { // Bullet stranded at end of one flattened line, text on the next line // Bullet at Y=0 (same flat line as "or"), but OrigY=150 (matches "Second") items := []TextItem{ {Text: "or", X: 0, Y: 0, OrigY: 100, OrigX: 300, FontSize: 12, Page: 1}, {Text: "•", X: 40, Y: 0, OrigY: 150, OrigX: 72, FontSize: 12, Page: 1}, {Text: "Second", X: 10, Y: 16, OrigY: 150, OrigX: 90, FontSize: 12, Page: 1}, {Text: "item", X: 50, Y: 16, OrigY: 150, OrigX: 130, FontSize: 12, Page: 1}, } result := repositionBullets(items, 12) // After: [or, •(Y=16), Second, item] // Bullet removed from pos 1, inserted before Second (now pos 2) if result[1].Text != "•" { t.Errorf("Expected bullet at position 1, got %q", result[1].Text) } if result[1].Y != 16 { t.Errorf("Bullet Y should be 16, got %.0f", result[1].Y) } if result[2].Text != "Second" { t.Errorf("Expected 'Second' at position 2, got %q", result[2].Text) } } func TestRepositionBulletsNoMoveWhenSameLine(t *testing.T) { // Bullet and text on same flattened line — should NOT move items := []TextItem{ {Text: "or", X: 0, Y: 0, OrigY: 100, OrigX: 300, FontSize: 12, Page: 1}, {Text: "•", X: 10, Y: 0, OrigY: 150, OrigX: 72, FontSize: 12, Page: 1}, {Text: "text", X: 50, Y: 0, OrigY: 150, OrigX: 90, FontSize: 12, Page: 1}, } result := repositionBullets(items, 12) // Bullet should stay at position 1 if result[1].Text != "•" { t.Errorf("Bullet should not move when on same flattened line. Got %q at pos 1", result[1].Text) } } func TestRepositionBulletsAfterSeparator(t *testing.T) { // Bullet after Y=-1 column separator — should NOT move items := []TextItem{ {Text: "Left", X: 0, Y: 0, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "", Y: -1, Page: 1}, {Text: "•", X: 0, Y: 16, OrigY: 150, OrigX: 72, FontSize: 12, Page: 1}, {Text: "text", X: 10, Y: 16, OrigY: 150, OrigX: 90, FontSize: 12, Page: 1}, } result := repositionBullets(items, 12) // Bullet should stay at position 2 if result[2].Text != "•" { t.Errorf("Bullet after separator should not move. Got %q at pos 2", result[2].Text) } } func TestRepositionBulletsMultiple(t *testing.T) { // Multiple bullets each stranded at end of previous flattened line items := []TextItem{ {Text: "first", X: 0, Y: 0, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "end", X: 40, Y: 0, OrigY: 100, OrigX: 200, FontSize: 12, Page: 1}, // Bullet 1 stranded on line Y=0 (belongs to line Y=16) {Text: "•", X: 0, Y: 0, OrigY: 150, OrigX: 72, FontSize: 12, Page: 1}, {Text: "second", X: 10, Y: 16, OrigY: 150, OrigX: 90, FontSize: 12, Page: 1}, {Text: "end", X: 50, Y: 16, OrigY: 150, OrigX: 200, FontSize: 12, Page: 1}, // Bullet 2 stranded on line Y=16 (belongs to line Y=32) {Text: "•", X: 0, Y: 16, OrigY: 200, OrigX: 72, FontSize: 12, Page: 1}, {Text: "third", X: 10, Y: 32, OrigY: 200, OrigX: 90, FontSize: 12, Page: 1}, } result := repositionBullets(items, 12) labels := make([]string, len(result)) for i, item := range result { labels[i] = fmt.Sprintf("%d:%s(Y=%.0f)", i, item.Text, item.Y) } t.Logf("Result: %v", labels) // Expected: [first, end, •(Y=16), second, end, •(Y=32), third] if result[2].Text != "•" || result[2].Y != 16 { t.Errorf("Bullet 1 should be at pos 2 with Y=16, got %q at Y=%.0f", result[2].Text, result[2].Y) } if result[5].Text != "•" || result[5].Y != 32 { t.Errorf("Bullet 2 should be at pos 5 with Y=32, got %q at Y=%.0f", result[5].Text, result[5].Y) } } func TestBulletFullFlow(t *testing.T) { // End-to-end: bullets stranded at end of previous line (same Y as // previous text, different OrigY). repositionBullets moves them to // the next line where their text lives. items := []TextItem{ // Previous paragraph ends on line Y=0 {Text: "related", X: 0, Y: 0, OrigY: 100, OrigX: 50, FontSize: 12, Page: 1}, {Text: "if:", X: 30, Y: 0, OrigY: 100, OrigX: 80, FontSize: 12, Page: 1}, // Bullet 1 stranded at end of line Y=0 (same flat line as "if:") {Text: "•", X: 60, Y: 0, OrigY: 150, OrigX: 72, FontSize: 12, Page: 1}, // Bullet 1's text on next line Y=16 {Text: "it", X: 10, Y: 16, OrigY: 150, OrigX: 90, FontSize: 12, Page: 1}, {Text: "involves", X: 30, Y: 16, OrigY: 150, OrigX: 110, FontSize: 12, Page: 1}, // Bullet 2 stranded at end of line Y=16 (same flat line as "involves") {Text: "•", X: 60, Y: 16, OrigY: 200, OrigX: 72, FontSize: 12, Page: 1}, // Bullet 2's text on next line Y=32 {Text: "it", X: 10, Y: 32, OrigY: 200, OrigX: 90, FontSize: 12, Page: 1}, {Text: "affects", X: 30, Y: 32, OrigY: 200, OrigX: 110, FontSize: 12, Page: 1}, } var sb strings.Builder writeMarkdownItems(&sb, items, 12) result := sb.String() t.Logf("Result: %q", result) // Each bullet should be followed by its text on the same line if !strings.Contains(result, "- it involves") { t.Errorf("First bullet item should render as '- it involves'. Got: %q", result) } if !strings.Contains(result, "- it affects") { t.Errorf("Second bullet item should render as '- it affects'. Got: %q", result) } } func TestFixOrphanedBulletsBasic(t *testing.T) { // Simulates the actual PDF output: first bullet correct, rest orphaned md := `A case is considered related if: - it involves the same parties, children, or issues and is pending when the family law case is filed; or it affects the court's jurisdiction to proceed; or an order in the related case may conflict with an order on the same issues in the new case; or an order in the new case may conflict with an order in the earlier case. - - - This form is used to provide the required notice to the court.` result := fixOrphanedBullets(md) t.Logf("Result:\n%s", result) // Each bullet item should start with "- " if !strings.Contains(result, "- it affects") { t.Errorf("Expected '- it affects' in output. Got:\n%s", result) } if !strings.Contains(result, "- an order in the related case") { t.Errorf("Expected '- an order in the related case' in output. Got:\n%s", result) } if !strings.Contains(result, "- an order in the new case") { t.Errorf("Expected '- an order in the new case' in output. Got:\n%s", result) } // Orphan bullets should be removed if strings.Contains(result, " - \n") || strings.Contains(result, "\n - \n") { t.Errorf("Orphan bullets should be removed. Got:\n%s", result) } // "This form is..." should NOT have a bullet if strings.Contains(result, "- This form") { t.Errorf("Non-bullet paragraph should not get bullet. Got:\n%s", result) } } func TestFixOrphanedBulletsNoOrphans(t *testing.T) { // No orphan bullets — output unchanged md := "- first item\n- second item\n\nRegular paragraph" result := fixOrphanedBullets(md) if result != md { t.Errorf("Should not modify when no orphans. Got:\n%s", result) } } func TestFixOrphanedBulletsContinuation(t *testing.T) { // Continuation lines should NOT get bullets md := "- bullet one text\ncontinuation of bullet one\nnew paragraph" result := fixOrphanedBullets(md) t.Logf("Result: %q", result) // "continuation" should not have a bullet prepended if strings.Contains(result, "- continuation") { t.Errorf("Continuation should not get bullet. Got: %q", result) } } func TestMathMax(t *testing.T) { // Verify the math.Max threshold works for various font sizes for _, fs := range []float64{8, 10, 12, 14, 16} { threshold := fs minThreshold := math.Max(threshold*0.4, 3) if fs == 8 { if minThreshold < 3.0 || minThreshold > 3.5 { t.Errorf("Font size %v: expected minThreshold ~3.2, got %v", fs, minThreshold) } } if fs == 12 { if minThreshold < 4.5 || minThreshold > 5.0 { t.Errorf("Font size %v: expected minThreshold ~4.8, got %v", fs, minThreshold) } } } }