package pdfprocessor import ( "bytes" "encoding/json" "fmt" "os" "strings" "github.com/pdfcpu/pdfcpu/pkg/api" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" ) // pdfcpuFillForm is the JSON structure pdfcpu's FillForm expects. type pdfcpuFillForm struct { Forms []pdfcpuForm `json:"forms"` } type pdfcpuForm struct { TextFields []*pdfcpuTextField `json:"textfield,omitempty"` CheckBoxes []*pdfcpuCheckboxField `json:"checkbox,omitempty"` } type pdfcpuTextField struct { Name string `json:"name"` Value string `json:"value"` } type pdfcpuCheckboxField struct { Name string `json:"name"` Value bool `json:"value"` } // FillField represents a field to fill with its value and position info. type FillField struct { Name string Type string // "text", "button" (checkbox), or "radio" Value string PosX float64 PosY float64 Width float64 Height float64 Page int } // FillPDF populates form fields in a PDF with the given values and returns // the filled PDF as bytes. func FillPDF(pdfPath string, fields []FillField) ([]byte, error) { if len(fields) == 0 { return nil, fmt.Errorf("no fields to fill") } // Separate named and unnamed fields namedTextFields := make(map[string]string) namedCheckboxFields := make(map[string]string) var unnamedFields []FillField for _, f := range fields { if strings.HasPrefix(f.Name, "__unnamed_") { unnamedFields = append(unnamedFields, f) } else if f.Type == "button" || f.Type == "radio" { namedCheckboxFields[f.Name] = f.Value } else { namedTextFields[f.Name] = f.Value } } conf := model.NewDefaultConfiguration() // Step 1: Fill named fields using pdfcpu's FillForm API pdfData, err := os.ReadFile(pdfPath) if err != nil { return nil, fmt.Errorf("reading PDF: %w", err) } var buf bytes.Buffer hasNamed := len(namedTextFields) > 0 || len(namedCheckboxFields) > 0 if hasNamed { textFields := make([]*pdfcpuTextField, 0, len(namedTextFields)) for name, value := range namedTextFields { textFields = append(textFields, &pdfcpuTextField{Name: name, Value: value}) } checkboxes := make([]*pdfcpuCheckboxField, 0, len(namedCheckboxFields)) for name, value := range namedCheckboxFields { checkboxes = append(checkboxes, &pdfcpuCheckboxField{Name: name, Value: value == "1" || value == "Yes"}) } formData := pdfcpuFillForm{ Forms: []pdfcpuForm{ {TextFields: textFields, CheckBoxes: checkboxes}, }, } formJSON, err := json.Marshal(formData) if err != nil { return nil, fmt.Errorf("marshaling form data: %w", err) } if err := api.FillForm(bytes.NewReader(pdfData), bytes.NewReader(formJSON), &buf, conf); err != nil { return nil, fmt.Errorf("filling named fields: %w", err) } } else { buf.Write(pdfData) } // Step 2: Fill unnamed fields by modifying widget annotations directly if len(unnamedFields) == 0 { return buf.Bytes(), nil } // Read back the (possibly partially filled) PDF ctx, err := api.ReadValidateAndOptimize(bytes.NewReader(buf.Bytes()), conf) if err != nil { return nil, fmt.Errorf("re-reading PDF for unnamed fields: %w", err) } // Build position lookup for unnamed fields: "page_x_y_w_h" -> (type, value) type posEntry struct { pType string value string } type posKey struct { page int x, y, w, h float64 } posToValue := make(map[posKey]posEntry) for _, f := range unnamedFields { posToValue[posKey{page: f.Page, x: f.PosX, y: f.PosY, w: f.Width, h: f.Height}] = posEntry{pType: f.Type, value: f.Value} } // Iterate through all widget annotations and match by position for page := 1; page <= ctx.XRefTable.PageCount; page++ { pgAnnots := ctx.XRefTable.PageAnnots[page] if len(pgAnnots) == 0 { continue } wAnnots, ok := pgAnnots[model.AnnWidget] if !ok { continue } indRefs := wAnnots.IndRefs if indRefs == nil { continue } for _, indRef := range *indRefs { d, err := ctx.XRefTable.DereferenceDict(indRef) if err != nil || len(d) == 0 { continue } // Skip widgets that already have a /T (named fields handled above) if hasName := tryExtractName(d); hasName != "" { continue } // Extract rect and match to our unnamed fields rect := extractRect(d) if rect == nil { continue } key := posKey{ page: page, x: rect.LL.X, y: rect.LL.Y, w: rect.UR.X - rect.LL.X, h: rect.UR.Y - rect.LL.Y, } if entry, ok := posToValue[key]; ok { // Set /V on the widget annotation if entry.pType == "button" || entry.pType == "radio" { // Checkboxes use PDF name objects: /Yes (checked) or /Off (unchecked) v := "Off" if entry.value == "1" || entry.value == "Yes" { v = "Yes" } d["V"] = types.Name(v) } else { d["V"] = types.StringLiteral(entry.value) } } } } // Write the modified PDF var out bytes.Buffer if err := api.Write(ctx, &out, conf); err != nil { return nil, fmt.Errorf("writing PDF: %w", err) } return out.Bytes(), nil }