package pdfprocessor import ( "fmt" "os" "os/exec" "path/filepath" "strconv" "strings" ) // PageImage holds info about a converted page image type PageImage struct { FilePath string PageNumber int Width int Height int } // ConvertPDFToImages converts each page of a PDF to a PNG image using ImageMagick. func ConvertPDFToImages(pdfPath, outputDir string, dpi int) ([]PageImage, error) { outputPattern := filepath.Join(outputDir, "page_%04d.png") cmd := exec.Command("convert", "-density", strconv.Itoa(dpi), pdfPath, "-depth", "8", "-background", "white", "-alpha", "remove", "-alpha", "off", outputPattern, ) output, err := cmd.CombinedOutput() if err != nil { return nil, fmt.Errorf("converting PDF to images: %w (output: %s)", err, string(output)) } // ImageMagick %d starts at 0. We need 1-indexed page numbers to match pdfcpu. var pages []PageImage for i := 0; ; i++ { path := filepath.Join(outputDir, fmt.Sprintf("page_%04d.png", i)) if _, err := os.Stat(path); os.IsNotExist(err) { break } pages = append(pages, PageImage{ FilePath: path, PageNumber: i + 1, // 1-indexed to match pdfcpu page numbering }) } if len(pages) == 0 { return nil, fmt.Errorf("no pages were converted from PDF") } for i := range pages { if info, err := GetImageInfo(pages[i].FilePath); err == nil { pages[i].Width = info.Width pages[i].Height = info.Height } } return pages, nil } type ImageInfo struct { Width int Height int } func GetImageInfo(filePath string) (ImageInfo, error) { cmd := exec.Command("identify", "-format", "%w %h", filePath) output, err := cmd.Output() if err != nil { return ImageInfo{}, err } parts := strings.Fields(string(output)) if len(parts) < 2 { return ImageInfo{}, fmt.Errorf("unexpected identify output: %s", string(output)) } w, _ := strconv.Atoi(parts[0]) h, _ := strconv.Atoi(parts[1]) return ImageInfo{Width: w, Height: h}, nil }