package pdf import ( "bytes" "encoding/json" "fmt" "os" "strings" "github.com/pdfcpu/pdfcpu/pkg/api" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/form" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" "pdf-form-api/models" ) func ExtractFields(path string) ([]models.FormField, error) { f, err := os.Open(path) if err != nil { return nil, fmt.Errorf("opening pdf: %w", err) } defer f.Close() conf := model.NewDefaultConfiguration() ctx, err := api.ReadValidateAndOptimize(f, conf) if err != nil { return nil, fmt.Errorf("reading pdf: %w", err) } xRefTable := ctx.XRefTable if xRefTable.Form == nil { return nil, fmt.Errorf("no form found in pdf") } fields, err := extractFieldsFromAcroForm(xRefTable) if err != nil { return nil, fmt.Errorf("extracting fields: %w", err) } return fields, nil } func extractFieldsFromAcroForm(xRefTable *model.XRefTable) ([]models.FormField, error) { fieldsArr, ok := xRefTable.Form.Find("Fields") if !ok { return nil, fmt.Errorf("no fields in form") } arr, err := xRefTable.DereferenceArray(fieldsArr) if err != nil { return nil, fmt.Errorf("dereferencing fields: %w", err) } pageCount, _ := getPageCount(xRefTable) var result []models.FormField for _, item := range arr { indRef, ok := item.(types.IndirectRef) if !ok { continue } d, err := xRefTable.DereferenceDict(indRef) if err != nil || d == nil { continue } ftStr := d.NameEntry("FT") if ftStr == nil { continue } name := extractFieldName(d) if name == "" { continue } ffType := parseFieldType(*ftStr, d, xRefTable) value := extractValue(d) defValue := extractDefault(d) choices := extractOptions(xRefTable, d) title := "" tu := d.StringEntry("TU") if tu != nil { title = *tu } required := false if flags := d.IntEntry("Ff"); flags != nil { required = (*flags & 0x0001) != 0 } rect := extractRect(d) page := extractPage(d, xRefTable, pageCount) result = append(result, models.FormField{ Name: name, Type: ffType, Choices: choices, Value: value, DefaultVal: defValue, Title: title, Required: required, Page: page, Rect: rect, }) } return result, nil } func extractFieldName(d types.Dict) string { name := d.StringEntry("T") if name != nil { return *name } if raw, ok := d.Find("T"); ok { if sl, ok := raw.(types.StringLiteral); ok { s, _ := types.StringLiteralToString(sl) return s } if hl, ok := raw.(types.HexLiteral); ok { s, _ := types.HexLiteralToString(hl) return s } } return "" } func parseFieldType(ft string, d types.Dict, xRefTable *model.XRefTable) models.FieldType { switch ft { case "Tx": return models.FieldText case "Btn": if isRadioButtonGroup(d, xRefTable) { return models.FieldRadio } return models.FieldCheck case "Ch": if isListBox(d) { return models.FieldList } return models.FieldCombo case "Sig": return models.FieldSign default: return models.FieldText } } func isRadioButtonGroup(d types.Dict, xRefTable *model.XRefTable) bool { if raw, ok := d.Find("Kids"); ok { kids, err := xRefTable.DereferenceArray(raw) if err != nil { return false } return len(kids) > 0 } return false } func isListBox(d types.Dict) bool { if flags := d.IntEntry("Ff"); flags != nil { return (*flags & 0x20000) == 0 } return false } func extractValue(d types.Dict) string { val := d.StringEntry("V") if val != nil { return *val } if raw, ok := d.Find("V"); ok { if sl, ok := raw.(types.StringLiteral); ok { s, _ := types.StringLiteralToString(sl) return s } if hl, ok := raw.(types.HexLiteral); ok { s, _ := types.HexLiteralToString(hl) return s } if nm, ok := raw.(types.Name); ok { return string(nm) } } return "" } func extractDefault(d types.Dict) string { val := d.StringEntry("DV") if val != nil { return *val } if raw, ok := d.Find("DV"); ok { if sl, ok := raw.(types.StringLiteral); ok { s, _ := types.StringLiteralToString(sl) return s } if hl, ok := raw.(types.HexLiteral); ok { s, _ := types.HexLiteralToString(hl) return s } } return "" } func extractOptions(xRefTable *model.XRefTable, d types.Dict) []string { raw, ok := d.Find("Opt") if !ok { return nil } arr, err := xRefTable.DereferenceArray(raw) if err != nil { return nil } var result []string for _, item := range arr { switch v := item.(type) { case types.StringLiteral: s, err := types.StringLiteralToString(v) if err != nil { continue } result = append(result, s) case types.HexLiteral: s, err := types.HexLiteralToString(v) if err != nil { continue } result = append(result, s) case types.Array: if len(v) >= 2 { switch val := v[1].(type) { case types.StringLiteral: s, _ := types.StringLiteralToString(val) result = append(result, s) case types.HexLiteral: s, _ := types.HexLiteralToString(val) result = append(result, s) } } } } return result } func FillPDF(inputPath string, fill map[string]string) (*bytes.Buffer, error) { f, err := os.Open(inputPath) if err != nil { return nil, fmt.Errorf("opening pdf: %w", err) } defer f.Close() conf := model.NewDefaultConfiguration() ctx, err := api.ReadValidateAndOptimize(f, conf) if err != nil { return nil, fmt.Errorf("reading pdf: %w", err) } xRefTable := ctx.XRefTable // Extract fields to get types fields, err := extractFieldsFromAcroForm(xRefTable) if err != nil { return nil, fmt.Errorf("extracting fields: %w", err) } fieldTypes := make(map[string]models.FieldType) for _, fld := range fields { fieldTypes[fld.Name] = fld.Type } // Build the form fill data - only text fields and checkboxes to avoid the corrupt choice field issue formData := form.FormGroup{ Forms: []form.Form{ { Pages: make(map[string]*form.Page), }, }, } for pg := range buildPageMap(xRefTable) { formData.Forms[0].Pages[pg] = &form.Page{} } for name, val := range fill { ft := fieldTypes[name] switch ft { case models.FieldText: formData.Forms[0].TextFields = append(formData.Forms[0].TextFields, &form.TextField{ Name: name, Value: val, }) case models.FieldCheck: formData.Forms[0].CheckBoxes = append(formData.Forms[0].CheckBoxes, &form.CheckBox{ Name: name, Value: strings.EqualFold(val, "true") || strings.EqualFold(val, "yes") || strings.EqualFold(val, "on") || val == "/Yes", }) } } jsonData, err := json.Marshal(formData) if err != nil { return nil, fmt.Errorf("marshaling form data: %w", err) } f, err = os.Open(inputPath) if err != nil { return nil, fmt.Errorf("reopening pdf: %w", err) } defer f.Close() var outBuf bytes.Buffer // Try api.FillForm first - it may work for text/checkbox only if err := api.FillForm(f, bytes.NewReader(jsonData), &outBuf, conf); err == nil { return &outBuf, nil } // If api.FillForm fails (corrupt choice field), fall back to direct form manipulation return fillPDFDirectly(inputPath, fill, fieldTypes, conf) } func fillPDFDirectly(inputPath string, fill map[string]string, fieldTypes map[string]models.FieldType, conf *model.Configuration) (*bytes.Buffer, error) { f, err := os.Open(inputPath) if err != nil { return nil, fmt.Errorf("opening pdf: %w", err) } defer f.Close() ctx, err := api.ReadValidateAndOptimize(f, conf) if err != nil { return nil, fmt.Errorf("reading pdf: %w", err) } xRefTable := ctx.XRefTable fieldsArr, ok := xRefTable.Form.Find("Fields") if !ok { return nil, fmt.Errorf("no fields in form") } arr, err := xRefTable.DereferenceArray(fieldsArr) if err != nil { return nil, fmt.Errorf("dereferencing fields: %w", err) } // Map field names to their dict references fieldMap := make(map[string]types.Dict) for _, item := range arr { indRef, ok := item.(types.IndirectRef) if !ok { continue } d, err := xRefTable.DereferenceDict(indRef) if err != nil || d == nil { continue } name := extractFieldName(d) if name == "" { continue } fieldMap[name] = d } // Update field values for name, val := range fill { d, found := fieldMap[name] if !found { continue } ft := fieldTypes[name] switch ft { case models.FieldText: // Encode value as UTF-16BE for PDF encoded, err := types.EscapedUTF16String(val) if err != nil { continue } d["V"] = types.StringLiteral(*encoded) case models.FieldCheck: if strings.EqualFold(val, "true") || strings.EqualFold(val, "yes") || strings.EqualFold(val, "on") || val == "/Yes" { d["V"] = types.Name("Yes") } else { d["V"] = types.Name("Off") } case models.FieldCombo, models.FieldList: encoded, err := types.EscapedUTF16String(val) if err != nil { continue } d["V"] = types.StringLiteral(*encoded) case models.FieldRadio: d["V"] = types.Name(val) } } // Write the modified PDF var outBuf bytes.Buffer if err := api.Write(ctx, &outBuf, conf); err != nil { return nil, fmt.Errorf("writing pdf: %w", err) } return &outBuf, nil } func buildPageMap(xRefTable *model.XRefTable) map[string]bool { pageSet := make(map[string]bool) if err := xRefTable.EnsurePageCount(); err == nil && xRefTable.PageCount > 0 { for i := 1; i <= xRefTable.PageCount; i++ { pageSet[fmt.Sprintf("p%d", i)] = true } } return pageSet } func getPageCount(xRefTable *model.XRefTable) (int, error) { if err := xRefTable.EnsurePageCount(); err != nil { return 0, err } return xRefTable.PageCount, nil } func extractRect(d types.Dict) string { raw, ok := d.Find("Rect") if !ok { return "" } arr, err := derefArray(raw) if err != nil || len(arr) != 4 { return "" } parts := make([]string, 4) for i, v := range arr { if num, ok := v.(types.Float); ok { parts[i] = fmt.Sprintf("%.2f", float64(num)) } else if num, ok := v.(types.Integer); ok { parts[i] = fmt.Sprintf("%d", int64(num)) } } return strings.Join(parts, " ") } func derefArray(raw types.Object) (types.Array, error) { if arr, ok := raw.(types.Array); ok { return arr, nil } return nil, fmt.Errorf("not an array") } func buildPageRefMap(xRefTable *model.XRefTable) map[types.IndirectRef]int { refMap := make(map[types.IndirectRef]int) for i := 1; i <= xRefTable.PageCount; i++ { ref, err := xRefTable.PageDictIndRef(i) if err == nil && ref != nil { refMap[*ref] = i } } return refMap } func extractPage(d types.Dict, xRefTable *model.XRefTable, totalPageCount int) int { pageRefMap := buildPageRefMap(xRefTable) raw, ok := d.Find("P") if !ok { return 1 } if indRef, ok := raw.(types.IndirectRef); ok { if pageNum, found := pageRefMap[indRef]; found { return pageNum } } return 1 }