package annotator import ( "fmt" "sort" "strings" "pdf-form-api/models" "pdf-form-api/renderer" ) type FieldContext struct { FieldName string FieldType string Choices []string DefaultValue string Page int Rect string Position string Surrounding []string } func BuildFieldContexts(fields []models.FormField, pages []renderer.PageInfo) map[int][]FieldContext { grouped := groupByPage(fields, pages) for _, pageFields := range grouped { sort.Slice(pageFields, func(i, j int) bool { return parseY(pageFields[i].Rect) > parseY(pageFields[j].Rect) }) for i := range pageFields { ctx := &pageFields[i] ctx.Position = positionDescription(i, len(pageFields)) ctx.Surrounding = surroundingFieldNames(pageFields) } } return grouped } func groupByPage(fields []models.FormField, pages []renderer.PageInfo) map[int][]FieldContext { grouped := make(map[int][]FieldContext) for _, f := range fields { page := f.Page if page == 0 { page = 1 } grouped[page] = append(grouped[page], FieldContext{ FieldName: f.Name, FieldType: string(f.Type), Choices: f.Choices, DefaultValue: f.DefaultVal, Page: page, Rect: f.Rect, }) } return grouped } func positionDescription(index, total int) string { if total <= 1 { return "the only field on this page" } if index == 0 { return "at the top of the page" } if index == total-1 { return "at the bottom of the page" } if index*2 < total { return "in the upper half of the page" } return "in the lower half of the page" } func surroundingFieldNames(pageFields []FieldContext) []string { var names []string for _, f := range pageFields { names = append(names, f.FieldName) } return names } func parseY(rect string) float64 { parts := strings.Fields(rect) if len(parts) >= 2 { var y float64 fmt.Sscanf(parts[1], "%f", &y) return y } return 0 } func PagePrompt(pageNum int, contexts []FieldContext) string { var sb strings.Builder sb.WriteString(fmt.Sprintf("Page %d contains the following form fields:\n\n", pageNum)) for _, ctx := range contexts { sb.WriteString(fmt.Sprintf("- Field: %s\n", ctx.FieldName)) sb.WriteString(fmt.Sprintf(" Type: %s\n", ctx.FieldType)) if ctx.DefaultValue != "" { sb.WriteString(fmt.Sprintf(" Default: %s\n", ctx.DefaultValue)) } if len(ctx.Choices) > 0 { sb.WriteString(fmt.Sprintf(" Options: %s\n", strings.Join(ctx.Choices, ", "))) } sb.WriteString(fmt.Sprintf(" Position: %s\n", ctx.Position)) if len(ctx.Surrounding) > 0 { sb.WriteString(fmt.Sprintf(" Other fields on this page: %s\n", strings.Join(ctx.Surrounding, ", "))) } sb.WriteString("\n") } sb.WriteString("Generate a clear, natural-language question for each field that would guide a user to provide the right value.\n\n") sb.WriteString("Return a JSON array with one object per field in the same order:\n") sb.WriteString("[\n {\"name\": \"FIELD_NAME\", \"question\": \"The question here\"},\n ...\n]\n") return sb.String() } type VisionFieldMeta struct { ID int64 Name string FieldType string Choices []string DefaultValue string Label string Page int } func BuildVisionPrompt(fields []VisionFieldMeta, pageCount int) string { var sb strings.Builder sb.WriteString(fmt.Sprintf("You are looking at %d page(s) of a PDF form. Each form field has been marked with a red annotation bar containing a unique label.\n\n", pageCount)) // Include field metadata for reference sb.WriteString("## FIELD METADATA\n\n") for _, f := range fields { sb.WriteString(fmt.Sprintf("- Label: %s | Name: %s | Type: %s | Page: %d\n", f.Label, f.Name, f.FieldType, f.Page)) if f.DefaultValue != "" { sb.WriteString(fmt.Sprintf(" Default: %s\n", f.DefaultValue)) } if len(f.Choices) > 0 { sb.WriteString(fmt.Sprintf(" Choices: %s\n", strings.Join(f.Choices, ", "))) } } sb.WriteString("\n") sb.WriteString("## HOW TO IDENTIFY EACH FIELD\n\n") sb.WriteString("1. Look at the red annotation bar on the image to find the label.\n") sb.WriteString("2. Read the printed text on the form that is directly adjacent to the red annotation. First check the text that is to the left, if it doesn't seem relevant then check top, right, then bottom.'\n") sb.WriteString("3. Determine what information the field is requesting.\n\n") sb.WriteString("\n## IMPORTANT RULES\n\n") sb.WriteString("- The labels on the images are NON-SEQUENTIAL random identifiers. Do NOT assume any ordering from the labels.\n") sb.WriteString("- Each field's question must match the ACTUAL field based on the surrounding printed text on the form, NOT the label order.\n") sb.WriteString("- wizard_page is a UX step number for a multi-step form wizard. It is NOT the PDF page number.\n") sb.WriteString("- Group semantically related fields on the same wizard_page (e.g., first name + last name = page 1, street + city + state + zip = page 2, phone = page 3).\n") sb.WriteString("- Start wizard_page at 1 and increment by 1 for each new group.\n\n") sb.WriteString("## OUTPUT FORMAT\n\n") sb.WriteString("Return a JSON object:\n\n") sb.WriteString("1. \"description\": A brief 1-2 sentence description of what this form/document is for.\n\n") sb.WriteString("2. \"fields\": An array with one entry per labeled field. Each entry:\n") sb.WriteString(" - \"label\": The EXACT label from the red annotation bar (not a number you make up).\n") sb.WriteString(" - \"question\": A clear question to ask the user for this field's value.\n") sb.WriteString(" - \"value_group\": A group name for 2+ fields collecting the same value (e.g. 'Client Name'). Empty string if standalone.\n") sb.WriteString(" - \"wizard_page\": Integer (1, 2, 3...) for UX wizard grouping. Related fields share the same number.\n\n") sb.WriteString("Example:\n") sb.WriteString(`{ "description": "A legal form for collecting client and attorney information.", "fields": [ {"label": "A3F1", "question": "What is the client's first name?", "value_group": "Client first name", "wizard_page": 1}, {"label": "B7E2", "question": "What is the client's last name?", "value_group": "Client last name", "wizard_page": 1}, {"label": "C4D5", "question": "What is the client's street address?", "value_group": "", "wizard_page": 2}, {"label": "D8F3", "question": "What is the client's city, state, and zip?", "value_group": "", "wizard_page": 2}, {"label": "E1A6", "question": "What is the client's telephone number?", "value_group": "", "wizard_page": 3}, {"label": "D4F1", "question": "What is the client's first name?", "value_group": "Client first name", "wizard_page": 4}, {"label": "F5HF", "question": "What is the client's last name?", "value_group": "Client last name", "wizard_page": 4}, ] }`) sb.WriteString("\n\nUse the EXACT label from each red annotation bar. Every labeled field must appear exactly once in the array.\n") return sb.String() }