本文介紹怎樣用iText,讀取PDF內容,以及內容的位置,並替換指定內容,輸出新的個人文件java
主要用到的功能:
1.讀取PDF內容,以及內容的位置
2.複製PDF
3.修改PDF,在PDF指定位置輸出內容ide
示例代碼使用的jar字體
<dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>27.0.1-jre</version> </dependency>
public class WordItem { //記錄PDF中內容的bean //所在頁數 private Integer pageNum; //x座標 private Float x; //y座標 private Float y; //內容 private String content; public Integer getPageNum() { return pageNum; } public void setPageNum(Integer pageNum) { this.pageNum = pageNum; } public Float getX() { return x; } public void setX(Float x) { this.x = x; } public Float getY() { return y; } public void setY(Float y) { this.y = y; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } }
public class ReplaceWordItem extends WordItem{ //須要替換的關鍵字,替換後的內容和位置 //文檔中搜索的關鍵字 private String key; //寫入的內容 private String value; //偏移的位置 1:上 2:右 3:下面 4:左邊 private int site = 2; //偏移的量 private float size = 30; public ReplaceWordItem(String key, String value, float size) { super(); this.key = key; this.value = value; this.size = size; } public ReplaceWordItem() { super(); } public ReplaceWordItem(String key, String value, int site, float size) { super(); this.key = key; this.value = value; this.site = site; this.size = size; } public String getKey() { return key; } public void setKey(String key) { this.key = key; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } public int getSite() { return site; } public void setSite(int site) { this.site = site; } public float getSize() { return size; } public void setSize(float size) { this.size = size; } }
public class KeyWordPositionListener implements RenderListener{ //用來解析PDF內容的類 //頁面上全部的詞 private List<WordItem> allItems = new ArrayList<WordItem>(); /** * 第幾頁 */ private Integer pageNumber; private WordItem prevItem = new WordItem(); @Override public void beginTextBlock() { // TODO Auto-generated method stub } @Override public void renderText(TextRenderInfo renderInfo) { //讀取PDF時,有些肉眼看上去是一行的字,可能會被解析爲多個,致使找不到知足條件的關鍵字,這裏作了簡單的處理 //即若是一些詞是連續的,先後沒有空白字符串,即認爲是一個詞 String content = renderInfo.getText().trim(); Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange(); System.out.println("content=" + content + " x="+textRectangle.getX() + " y="+textRectangle.getY()); WordItem item = null; boolean newFlag = false; if(Strings.isNullOrEmpty(prevItem.getContent())) { item = new WordItem(); newFlag = true; }else { if(allItems.size() == 0) { item = new WordItem(); newFlag = true; }else { item = allItems.get(allItems.size()-1); } } //內容會斷開,如代理機構名稱 變成 代理機構 名稱 2個部分???????????? //關鍵字相關信息 if(!content.equals("")) { if(newFlag) { item.setPageNum(pageNumber); item.setContent(content); item.setX((float)textRectangle.getX()); item.setY((float)textRectangle.getY()); allItems.add(item); //先保存全部的項 }else { //以前有內容 item.setContent(item.getContent() + content); } } prevItem = new WordItem(); prevItem.setContent(content); } @Override public void endTextBlock() { // TODO Auto-generated method stub } @Override public void renderImage(ImageRenderInfo renderInfo) { // TODO Auto-generated method stub } public List<WordItem> getAllItems() { return allItems; } public void setAllItems(List<WordItem> allItems) { this.allItems = allItems; } public Integer getPageNumber() { return pageNumber; } public void setPageNumber(Integer pageNumber) { this.pageNumber = pageNumber; } }
public class SearchWord { /** * 從PDF中讀取內容 * 內容與關鍵字比對,若是知足條件,則在匹配內容的指定位置,增長鬚要顯示的內容(替換關鍵字,修改PDF) */ public static void main(String[] args) throws Exception { String path = "in.pdf"; String outPath = "out.pdf"; PdfReader reader = new PdfReader(path); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outPath)); //關鍵字,以及替換後的內容及位置 List<ReplaceWordItem> keyList = Lists.newArrayList( new ReplaceWordItem("郵編", "蘇州21510000", 2), new ReplaceWordItem("代理機構名稱", "蘇州XXX事務所", 80) ); //找到的位置,匹配到的關鍵字 List<ReplaceWordItem> keyItemList = matchPage(reader, keyList); //修改PDF for(int i=0; i<keyItemList.size(); i++) { ReplaceWordItem keyItem = keyItemList.get(i); PdfContentByte overContent = stamper.getOverContent(keyItem.getPageNum()); overContent.beginText(); //字體和大小 BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/simsun.ttc,1", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); overContent.setFontAndSize(bf, 10F); //位置 overContent.setTextMatrix(keyItem.getX() + keyItem.getSize() + 30, keyItem.getY()); //內容 overContent.showText(keyItem.getValue()); overContent.endText(); } stamper.close(); } /** * 知足關鍵字的位置 */ public static List<ReplaceWordItem> matchPage(PdfReader reader, List<ReplaceWordItem> keywordList) throws Exception { //文檔裏全部的內容 List<WordItem> allItemList = new ArrayList<>(); for(int page=1; page<=reader.getNumberOfPages(); page++){ KeyWordPositionListener renderListener = new KeyWordPositionListener(); renderListener.setPageNumber(page); PdfReaderContentParser parse = new PdfReaderContentParser(reader); parse.processContent(page, renderListener); Rectangle rectangle = reader.getPageSize(page); System.out.println(rectangle.getWidth() + " " + rectangle.getHeight() + " " + rectangle.getLeft() + " " + rectangle.getRight()); //PageSize.A4 210mm*297mm //Itext單位 Pt 1pt = 0.35mm //public static final Rectangle A4 = new RectangleReadOnly(595,842); allItemList.addAll(renderListener.getAllItems()); } for (WordItem wordItem : allItemList) { System.out.println("wordItem.getContent() " + wordItem.getContent()); } List<ReplaceWordItem> keyItemList = new ArrayList<>(); //那些知足關鍵字 for (ReplaceWordItem key : keywordList) { for (WordItem pageItem : allItemList) { if(Objects.equal(key.getKey(), pageItem.getContent())) { key.setPageNum(pageItem.getPageNum()); key.setX(pageItem.getX()); key.setY(pageItem.getY()); keyItemList.add(key); //找到第一個就結束 break; } } } return keyItemList; } }