Java pdf 轉圖片

時間 2019-12-05

標籤 java pdf 圖片欄目 Java 简体版

原文原文鏈接

maven 依賴：apache

<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox</artifactId>
  <version>2.0.8</version>
</dependency>
<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox-tools</artifactId>
  <version>2.0.8</version>
</dependency>

代碼示例：數組

private static final int HOME_PAGE_INDEX = 0;

/**
 * Pdf -> Image (首頁)
 *
 * [@param](https://my.oschina.net/u/2303379) pdf    pdf流
 * [@param](https://my.oschina.net/u/2303379) format 圖片格式
 * [@return](https://my.oschina.net/u/556800) pdf 圖片流
 */
public static byte[] getImageFromPdf(byte[] pdf, String format) {
    return pdfHomePageToImage(getFromByteArray(pdf), format);
}

/**
 * Pdf -> Image (首頁)
 *
 * [@param](https://my.oschina.net/u/2303379) pdf    pdf文件
 * [@param](https://my.oschina.net/u/2303379) format 圖片格式
 * @return pdf 圖片流
 */
public static byte[] getImageFromPdf(File pdf, String format) {
    return pdfHomePageToImage(getFromFile(pdf), format);
}

/**
 * Pdf -> Image (指定頁)
 *
 * @param pdf       pdf文件
 * @param pageIndex 頁號(頁號從0開始)
 * @param format    圖片格式
 * @return pdf 圖片流
 */
public static byte[] getImageFromPdf(File pdf, int pageIndex, String format) {
    return pdfToImageWithIndex(getFromFile(pdf), pageIndex, pageIndex, format).get(0);
}

/**
 * Pdf -> Image (指定頁)
 *
 * @param pdf       pdf流
 * @param pageIndex 起始頁號(頁號從0開始)
 * @param format    圖片格式
 * @return pdf 圖片流
 */
public static byte[] getImageFromPdf(byte[] pdf, int pageIndex, String format) {
    return pdfToImageWithIndex(getFromByteArray(pdf), pageIndex, pageIndex, format).get(0);
}

/**
 * Pdf -> Image (指定頁)
 *
 * @param pdf        pdf文件
 * @param startIndex 起始頁號(頁號從0開始)
 * @param endIndex   終止頁號
 * @param format     圖片格式
 * @return pdf 圖片流
 */
public static List<byte[]> getImageFromPdf(File pdf, int startIndex, int endIndex, String format) {
    return pdfToImageWithIndex(getFromFile(pdf), startIndex, endIndex, format);
}

/**
 * Pdf -> Image (指定頁)
 *
 * @param pdf        pdf流
 * @param startIndex 起始頁號(頁號從0開始)
 * @param endIndex   終止頁號
 * @param format     圖片格式
 * @return pdf 圖片流
 */
public static List<byte[]> getImageFromPdf(byte[] pdf, int startIndex, int endIndex, String format) {
    return pdfToImageWithIndex(getFromByteArray(pdf), startIndex, endIndex, format);
}

private static List<byte[]> pdfToImageWithIndex(PDDocument document, int startIndex, int endIndex, String format) {
    List<byte[]> pdfImages = new ArrayList<>();
    if (startIndex > endIndex) {
        throw new IllegalArgumentException("The param startIndex cannot be greater than endIndex");
    }
    if (endIndex > pdfTotalPages(document)) {
        throw new IllegalArgumentException(String.format("The pdf max page index is [%s], But the endIndex you input is [%s]", pdfTotalPages(document), endIndex));
    }
    for (int i = startIndex; i < endIndex; i++) {
        pdfImages.add(pdfToImage(document, i, endIndex, format));
    }
    return pdfImages;
}

/**
 * Pdf -> Image (全部頁)
 *
 * @param pdf    pdf byte[]
 * @param format 圖片格式
 * @return 圖片 byte[]
 */
public static List<byte[]> getImageFromPdfAllPages(byte[] pdf, String format) {
    return pdfToImageForAllPages(getFromByteArray(pdf), format);
}

/**
 * Pdf -> Image (全部頁)
 *
 * @param pdf    pdf文件
 * @param format 圖片格式
 * @return 圖片 byte[]
 */
public static List<byte[]> getImageFromPdfAllPages(File pdf, String format) {
    return pdfToImageForAllPages(getFromFile(pdf), format);
}

private static List<byte[]> pdfToImageForAllPages(PDDocument document, String format) {
    List<byte[]> pdfImages = new ArrayList<>();
    int totalPages = pdfTotalPages(document);
    for (int i = 0; i < totalPages; i++) {
        pdfImages.add(pdfToImage(document, i, totalPages, format));
    }
    return pdfImages;
}

/**
 * pdf首頁 -> image
 *
 * @param document PDDocument
 * @param format   圖片格式
 * @return 圖片 byte[]
 */
private static byte[] pdfHomePageToImage(PDDocument document, String format) {
    return pdfToImage(document, HOME_PAGE_INDEX, HOME_PAGE_INDEX, format);
}


/**
 * pdf -> image
 *
 * @param document   PDDocument
 * @param startIndex 頁號
 * @param endIndex   頁數
 * @param format     圖片格式
 * @return 圖片 byte[]
 */
private static byte[] pdfToImage(PDDocument document, int startIndex, int endIndex, String format) {
    try {
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(startIndex, 100);
        return imageToBytes(bufferedImage, format);
    } catch (Exception e) {
        log.warn("Pdf Read Error:", e);
    } finally {
        // 開發者須要的頁數讀取完再關閉
        if (startIndex == endIndex) {
            try {
                document.close();
            } catch (IOException e) {
                log.warn("IO Exception", e);
            }
        }

    }
    return null;
}

/**
 * 獲取pdf總頁數
 *
 * @param document pdf
 * @return pdf總頁數
 */
private static int pdfTotalPages(PDDocument document) {
    return document.getNumberOfPages();
}

/**
 * PDDocument
 *
 * @param pdf pdf文件
 * @return 圖片 byte[]
 */
private static PDDocument getFromFile(File pdf) {
    try {
        return PDDocument.load(pdf);
    } catch (IOException e) {
        throw new RuntimeException(String.format("Load pdf %s, The File must be a pdf.", e.getMessage()));
    }
}

/**
 * PDDocument
 *
 * @param pdf pdf文件 byte[]
 * @return 圖片 byte[]
 */
private static PDDocument getFromByteArray(byte[] pdf) {
    try {
        return PDDocument.load(pdf);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException("Load pdf error: The File must be a pdf.");
    }

}

/**
 * 轉換BufferedImage 數據爲byte數組
 *
 * @param bImage Image對象
 * @param format image格式字符串.如"gif","png"
 * @return byte數組
 */
private static byte[] imageToBytes(BufferedImage bImage, String format) {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try {
        ImageIO.write(bImage, format, out);
    } catch (IOException e) {
        log.warn("IO Exception", e);
    }
    return out.toByteArray();
}