package shortcode import ( "regexp" "strconv" "strings" ) // Field represents a parsed [field] shortcode. type Field struct { Idx int // maps to PDF field position (required) Type string // text, email, tel, number, url, date, textarea, button, radio Label string // display label (optional) Group string // group name for grouped fields (optional) } // Handler defines a shortcode handler. type Handler interface { // Tag is the shortcode tag name (e.g. "field"). Tag() string // Parse parses attribute key-value pairs from a shortcode. // Returns the parsed result, or nil if invalid. // Also returns the replacement string to put in the output. Parse(attrs map[string]string) (interface{}, string) } var registry = make(map[string]Handler) // Register adds a shortcode handler. func Register(h Handler) { registry[h.Tag()] = h } // ShortcodesRe matches [tag attr="val" ...] patterns (non-nested). var ShortcodesRe = regexp.MustCompile(`\[([a-z]+)[^]]*]`) // legacyMarkerRe matches legacy [FN] markers like [F0], [F12], etc. var legacyMarkerRe = regexp.MustCompile(`\[F(\d+)\]`) // Parse extracts all shortcodes from the given markdown text. // Returns a map of tag -> []parsed results. func Parse(markdown string) map[string][]interface{} { result := make(map[string][]interface{}) matches := ShortcodesRe.FindAllStringSubmatchIndex(markdown, -1) for _, match := range matches { full := markdown[match[0]:match[1]] tagStr := markdown[match[2]:match[3]] handler, ok := registry[tagStr] if !ok { continue } attrs := ParseAttrs(full) parsed, _ := handler.Parse(attrs) if parsed != nil { result[tagStr] = append(result[tagStr], parsed) } } // Backward compat: extract legacy [FN] markers as field shortcodes. // Type is left empty so GetFieldsWithDefaults can apply the original PDF field type. legacyMatches := legacyMarkerRe.FindAllStringSubmatchIndex(markdown, -1) for _, match := range legacyMatches { idxStr := markdown[match[2]:match[3]] idx, err := strconv.Atoi(idxStr) if err != nil { continue } if _, exists := result["field"]; !exists { result["field"] = []interface{}{} } result["field"] = append(result["field"], &Field{ Idx: idx, }) } return result } // ParseAttrs extracts key="value" pairs from a shortcode string. func ParseAttrs(s string) map[string]string { attrs := make(map[string]string) // Remove the tag name and brackets s = s[1 : len(s)-1] // strip leading [ and trailing ] space := strings.IndexFunc(s, func(r rune) bool { return r == ' ' || r == '\t' }) if space == -1 { return attrs } s = s[space:] // Match key="value" or key=value pairs re := regexp.MustCompile(`(\w+)=("(?:[^"\\]|\\.)*"|[^ ]+)`) for _, m := range re.FindAllStringSubmatch(s, -1) { key := m[1] val := m[2] // Strip surrounding quotes if strings.HasPrefix(val, `"`) && strings.HasSuffix(val, `"`) { val = val[1 : len(val)-1] } attrs[key] = val } return attrs } // HasShortcodes returns true if the markdown contains any shortcodes. func HasShortcodes(markdown string) bool { return ShortcodesRe.MatchString(markdown) } // HasFields returns true if the markdown contains field shortcodes or legacy markers. func HasFields(markdown string) bool { return HasShortcodes(markdown) || legacyMarkerRe.MatchString(markdown) } // GetFields returns all [field] shortcodes parsed from the markdown. func GetFields(markdown string) []*Field { return GetFieldsWithDefaults(markdown, nil) } // GetFieldsWithDefaults returns all [field] shortcodes parsed from the markdown, // applying defaultTypes as fallback for fields that don't specify a type. // defaultTypes is a map of {idx: originalType} (e.g. from raw PDF extraction). func GetFieldsWithDefaults(markdown string, defaultTypes map[int]string) []*Field { parsed := Parse(markdown) var fields []*Field for _, v := range parsed["field"] { f, ok := v.(*Field) if !ok { continue } // Apply default type when shortcode doesn't specify one if f.Type == "" { if dt, ok := defaultTypes[f.Idx]; ok { f.Type = dt } else { f.Type = "text" } } fields = append(fields, f) } return fields }