用iText讀取PDF內容,替換指定內容,輸出新PDF

本文介紹怎樣用iText,讀取PDF內容,以及內容的位置,並替換指定內容,輸出新的個人文件java

主要用到的功能
1.讀取PDF內容,以及內容的位置
2.複製PDF
3.修改PDF,在PDF指定位置輸出內容ide

示例代碼使用的jar字體

<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itextpdf</artifactId>
    <version>5.5.13</version>
</dependency>
<dependency>
    <groupId>com.google.guava</groupId>
    <artifactId>guava</artifactId>
    <version>27.0.1-jre</version>
</dependency>
public class WordItem {
    //記錄PDF中內容的bean
    
    //所在頁數
    private Integer pageNum; 
    //x座標
    private Float x;
    //y座標
    private Float y; 
    //內容
    private String content;
    
    public Integer getPageNum() {
        return pageNum;
    }
    public void setPageNum(Integer pageNum) {
        this.pageNum = pageNum;
    }
    public Float getX() {
        return x;
    }
    public void setX(Float x) {
        this.x = x;
    }
    public Float getY() {
        return y;
    }
    public void setY(Float y) {
        this.y = y;
    }
    public String getContent() {
        return content;
    }
    public void setContent(String content) {
        this.content = content;
    }
}
public class ReplaceWordItem extends WordItem{
    
    //須要替換的關鍵字,替換後的內容和位置
    
    //文檔中搜索的關鍵字
    private String key; 
    //寫入的內容
    private String value; 
    //偏移的位置 1:上  2:右  3:下面   4:左邊
    private int site = 2; 
    //偏移的量
    private float size = 30;

    public ReplaceWordItem(String key, String value, float size) {
        super();
        this.key = key;
        this.value = value;
        this.size = size;
    }

    public ReplaceWordItem() {
        super();
    }

    public ReplaceWordItem(String key, String value, int site, float size) {
        super();
        this.key = key;
        this.value = value;
        this.site = site;
        this.size = size;
    }

    public String getKey() {
        return key;
    }
    public void setKey(String key) {
        this.key = key;
    }
    public String getValue() {
        return value;
    }
    public void setValue(String value) {
        this.value = value;
    }
    public int getSite() {
        return site;
    }
    public void setSite(int site) {
        this.site = site;
    }
    public float getSize() {
        return size;
    }
    public void setSize(float size) {
        this.size = size;
    }
}
public class KeyWordPositionListener implements RenderListener{
    //用來解析PDF內容的類

    //頁面上全部的詞
    private List<WordItem> allItems = new ArrayList<WordItem>();

    /**
     * 第幾頁
     */
    private Integer pageNumber;
    
    private WordItem prevItem = new WordItem();
    
    @Override
    public void beginTextBlock() {
        // TODO Auto-generated method stub
    }

    @Override
    public void renderText(TextRenderInfo renderInfo) {
        //讀取PDF時,有些肉眼看上去是一行的字,可能會被解析爲多個,致使找不到知足條件的關鍵字,這裏作了簡單的處理
        //即若是一些詞是連續的,先後沒有空白字符串,即認爲是一個詞
        String content = renderInfo.getText().trim();
        
        Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange();
        System.out.println("content=" + content + " x="+textRectangle.getX() + " y="+textRectangle.getY());
        
        WordItem item = null;
        boolean newFlag = false;
                
        if(Strings.isNullOrEmpty(prevItem.getContent())) {
            item = new WordItem();
            newFlag = true;
        }else {
            if(allItems.size() == 0) {
                item = new WordItem();
                newFlag = true;
            }else {
                item = allItems.get(allItems.size()-1);
            }
        }
        
        //內容會斷開,如代理機構名稱   變成  代理機構 名稱  2個部分????????????
        //關鍵字相關信息
        if(!content.equals("")) {
            if(newFlag) {
                item.setPageNum(pageNumber);
                item.setContent(content);
                
                item.setX((float)textRectangle.getX());
                item.setY((float)textRectangle.getY());
                
                allItems.add(item);      //先保存全部的項
            }else {
                //以前有內容
                item.setContent(item.getContent() + content);
            }
        }
        
        prevItem = new WordItem();
        prevItem.setContent(content);
    }

    @Override
    public void endTextBlock() {
        // TODO Auto-generated method stub
    }
    @Override
    public void renderImage(ImageRenderInfo renderInfo) {
        // TODO Auto-generated method stub
    }
    public List<WordItem> getAllItems() {
        return allItems;
    }
    public void setAllItems(List<WordItem> allItems) {
        this.allItems = allItems;
    }
    public Integer getPageNumber() {
        return pageNumber;
    }
    public void setPageNumber(Integer pageNumber) {
        this.pageNumber = pageNumber;
    }
}
public class SearchWord {
    
    /**
     * 從PDF中讀取內容
     * 內容與關鍵字比對,若是知足條件,則在匹配內容的指定位置,增長鬚要顯示的內容(替換關鍵字,修改PDF)
     */
    public static void main(String[] args) throws Exception {

        String path = "in.pdf";
        String outPath = "out.pdf";
        
        PdfReader reader = new PdfReader(path); 
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outPath));
        
        //關鍵字,以及替換後的內容及位置
        List<ReplaceWordItem> keyList = Lists.newArrayList(
                                    new ReplaceWordItem("郵編", "蘇州21510000", 2),
                                    new ReplaceWordItem("代理機構名稱", "蘇州XXX事務所", 80)
                                );
        
        //找到的位置,匹配到的關鍵字
        List<ReplaceWordItem> keyItemList = matchPage(reader, keyList);
        
        //修改PDF
        for(int i=0; i<keyItemList.size(); i++) {
            
            ReplaceWordItem keyItem = keyItemList.get(i);
            
            PdfContentByte overContent = stamper.getOverContent(keyItem.getPageNum());
            overContent.beginText();
            
            //字體和大小
            BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/simsun.ttc,1", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);    
            overContent.setFontAndSize(bf, 10F);  
            
            //位置
            overContent.setTextMatrix(keyItem.getX() + keyItem.getSize() + 30, keyItem.getY());  
            
            //內容
            overContent.showText(keyItem.getValue());   
            
            overContent.endText();
        }

        stamper.close();
    }
    
    /**
     * 知足關鍵字的位置
     */
    public static List<ReplaceWordItem> matchPage(PdfReader reader, List<ReplaceWordItem> keywordList) throws Exception { 
        
        //文檔裏全部的內容
        List<WordItem> allItemList = new ArrayList<>();
        
        for(int page=1; page<=reader.getNumberOfPages(); page++){ 
            
            KeyWordPositionListener renderListener = new KeyWordPositionListener(); 
            renderListener.setPageNumber(page); 
            
            PdfReaderContentParser parse = new PdfReaderContentParser(reader); 
            parse.processContent(page, renderListener); 
            
            Rectangle rectangle = reader.getPageSize(page); 
            System.out.println(rectangle.getWidth() + " " + rectangle.getHeight() + " " + rectangle.getLeft() + " " + rectangle.getRight());
            
            //PageSize.A4   210mm*297mm
            //Itext單位 Pt   1pt = 0.35mm
            //public static final Rectangle A4 = new RectangleReadOnly(595,842);
            allItemList.addAll(renderListener.getAllItems());
        } 
        
        for (WordItem wordItem : allItemList) {
            System.out.println("wordItem.getContent() " + wordItem.getContent());
        }
        
        List<ReplaceWordItem> keyItemList = new ArrayList<>();
        //那些知足關鍵字
        for (ReplaceWordItem key : keywordList) {
            for (WordItem pageItem : allItemList) {
                if(Objects.equal(key.getKey(), pageItem.getContent())) {
                    
                    key.setPageNum(pageItem.getPageNum());
                    key.setX(pageItem.getX());
                    key.setY(pageItem.getY());
                    
                    keyItemList.add(key);
                    
                    //找到第一個就結束
                    break;
                }
            }
        }
        
        return keyItemList; 
    }
}
相關文章
相關標籤/搜索