itext替換pdf中的中文

在網上找了一段時間,發現有兩個版本的java

1,使用aspose.pdf,這篇文章寫得很清楚https://blog.csdn.net/da_keng/article/details/65633498,我稍微改了一下,能夠替換多個字符canvas

public static void main(String[] args) {
        String srcPath = "E:\\CA\\入職合同模版樣例.pdf";
        String targetPath = "E:\\CA\\out.pdf";
        Map<String, String> map = new HashMap<String, String>();
        map.put("[$合同編號$]", "ZR-20181009-00000164");
        map.put("[$乙方姓名$]", "TroubleA");
        map.put("[$簽字日期$]", "2018/10/10 11:24:30");
        test(srcPath, targetPath, map);
    }

    public static void test(String srcPath, String targetPath, Map<String, String> map){
        InputStream license = Main.class.getClassLoader().getResourceAsStream("\\license.xml");
        try {
            new License().setLicense(license);
        } catch (Exception e) {
            e.printStackTrace();
        }
        Document pdfDoc = new Document(srcPath);
        for (Map.Entry<String, String> entry : map.entrySet()) {
            System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue());

            TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(entry.getKey());
            PageCollection pages = pdfDoc.getPages();
            System.out.println("文檔總頁碼數:"+pages.size());
            pages.accept(textFragmentAbsorber);
            int i = 0;
            for (TextFragment textFragment :(Iterable<TextFragment>) textFragmentAbsorber.getTextFragments()) {
                textFragment.setText(entry.getValue());
                textFragment.getTextState().setBackgroundColor(com.aspose.pdf.Color.getRed());  //添加紅色背景
                System.out.println(++i);
            }
        }
        pdfDoc.save(targetPath);
    }

2,像這位連接中的大佬同樣,https://blog.csdn.net/sishenkankan/article/details/53107195,使用具體的java代碼去尋找x,y的位置而後畫一個白板覆蓋再從新寫上須要替換的文字,有如下兩個缺點,數組

a)當須要替換的文字不在一個TextRenderInfo裏面,沒法找到比較長的須要替換的app

b)當pdf包含有多頁的時候,不方便具體的控制替換到哪一頁ide

因此我又百度了一個可以定位pdf中須要替換的文字位置的方法,https://blog.csdn.net/sdizoea/article/details/75105798,像文中大佬同樣直接定位而後解決了文字不在一個塊中時查找的麻煩,貼一下全部的代碼,一共有6個類,測試

package com.sinosoft.lis.utils;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.log.Logger;
import com.itextpdf.text.log.LoggerFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;

import java.io.*;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

/**
 * 替換PDF文件某個區域內的文本
 */
public class PdfReplacer {
    private static final Logger logger = LoggerFactory.getLogger(PdfReplacer.class);

    private int fontSize;
    private Map<String, ReplaceRegion> replaceRegionMap = new HashMap<String, ReplaceRegion>();
    private Map<String, String> replaceTextMap =new HashMap<String, String>();
    private ByteArrayOutputStream output;
    private PdfReader reader;
    private PdfStamper stamper;
    private PdfContentByte canvas;
    private Font font;

    public PdfReplacer(byte[] pdfBytes) throws DocumentException, IOException{
        init(pdfBytes);
    }

    public PdfReplacer(String fileName) throws IOException, DocumentException{
        FileInputStream in = null;
        try{
            in =new FileInputStream(fileName);
            byte[] pdfBytes = new byte[in.available()];
            in.read(pdfBytes);
            init(pdfBytes);
        }finally{
            in.close();
        }
    }

    private void init(byte[] pdfBytes) throws DocumentException, IOException{
        logger.info("初始化開始");
        reader = new PdfReader(pdfBytes);
        output = new ByteArrayOutputStream();
        stamper = new PdfStamper(reader, output);
        canvas = stamper.getOverContent(1);
        setFont(10);
        logger.info("初始化成功");
    }

    private void close() throws DocumentException, IOException{
        if(reader != null){
            reader.close();
        }
        if(output != null){
            output.close();
        }

        output=null;
        replaceRegionMap=null;
        replaceTextMap=null;
    }

    public void replaceText(float x, float y, float w,float h, String text){
        ReplaceRegion region = new ReplaceRegion(text); 	//用文本做爲別名
        region.setH(h);
        region.setW(w);
        region.setX(x);
        region.setY(y);
        addReplaceRegion(region);
        this.replaceText(text, text);
    }

    public void replaceText(String name, String text){
        this.replaceTextMap.put(name, text);
    }

    /**
     * 替換文本
     * @throws IOException
     * @throws DocumentException
     */
    private void process() throws DocumentException, IOException{
        try{
            parseReplaceText();
            canvas.saveState();
            Set<Entry<String, ReplaceRegion>> entrys = replaceRegionMap.entrySet();
            for (Entry<String, ReplaceRegion> entry : entrys) {
                ReplaceRegion value = entry.getValue();
                canvas.setColorFill(BaseColor.RED);
                canvas.rectangle(value.getX(),value.getY(),value.getW(),value.getH());
            }
            canvas.fill();
            canvas.restoreState();
            //開始寫入文本
            canvas.beginText();
            for (Entry<String, ReplaceRegion> entry : entrys) {
                ReplaceRegion value = entry.getValue();
                //設置字體
                canvas.setFontAndSize(font.getBaseFont(), getFontSize());
                canvas.setTextMatrix(value.getX(),value.getY()+2/*修正背景與文本的相對位置*/);
                canvas.showText((String) replaceTextMap.get(value.getAliasName()));
            }
            canvas.endText();
        }finally{
            if(stamper != null){
                stamper.close();
            }
        }
    }

    /**
     * 未指定具體的替換位置時,系統自動查找位置
     */
    private void parseReplaceText() {
        PdfPositionParse parse = new PdfPositionParse(reader);
        Set<Entry<String, String>> entrys = this.replaceTextMap.entrySet();
        for (Entry<String, String> entry : entrys) {
            if(this.replaceRegionMap.get(entry.getKey()) == null){
                parse.addFindText(entry.getKey());
            }
        }

        try {
            Map<String, ReplaceRegion> parseResult = parse.parse();
            Set<Entry<String, ReplaceRegion>> parseEntrys = parseResult.entrySet();
            for (Entry<String, ReplaceRegion> entry : parseEntrys) {
                if(entry.getValue() != null){
                    this.replaceRegionMap.put(entry.getKey(), entry.getValue());
                }
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }

    }

    /**
     * 生成新的PDF文件
     * @param fileName
     * @throws DocumentException
     * @throws IOException
     */
    public void toPdf(String fileName) throws DocumentException, IOException{
        FileOutputStream fileOutputStream = null;
        try{
            process();
            fileOutputStream = new FileOutputStream(fileName);
            fileOutputStream.write(output.toByteArray());
            fileOutputStream.flush();
        }catch(IOException e){
            logger.error(e.getMessage(), e);
            throw e;
        }finally{
            if(fileOutputStream != null){
                fileOutputStream.close();
            }
            close();
        }
        logger.info("文件生成成功");
    }

    /**
     * 將生成的PDF文件轉換成二進制數組
     * @return
     * @throws DocumentException
     * @throws IOException
     */
    public byte[] toBytes() throws DocumentException, IOException{
        try{
            process();
            logger.info("二進制數據生成成功");
            return output.toByteArray();
        }finally{
            close();
        }
    }

    /**
     * 添加替換區域
     * @param replaceRegion
     */
    public void addReplaceRegion(ReplaceRegion replaceRegion){
        this.replaceRegionMap.put(replaceRegion.getAliasName(), replaceRegion);
    }

    /**
     * 經過別名獲得替換區域
     * @param aliasName
     * @return
     */
    public ReplaceRegion getReplaceRegion(String aliasName){
        return this.replaceRegionMap.get(aliasName);
    }

    public int getFontSize() {
        return fontSize;
    }

    /**
     * 設置字體大小
     * @param fontSize
     * @throws DocumentException
     * @throws IOException
     */
    public void setFont(int fontSize) throws DocumentException, IOException{
        if(fontSize != this.fontSize){
            this.fontSize = fontSize;
            BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
            font = new Font(bf,this.fontSize,Font.BOLD);
        }
    }

    public void setFont(Font font){
        if(font == null){
            throw new NullPointerException("font is null");
        }
        this.font = font;
    }

    public void manipulatePdf1(String src, String dest, Map<String, String> replaceTextMap) throws Exception {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));

        for (String key : replaceTextMap.keySet()) {
            float x, y;
            int pageNum;

            List<MatchItem> list = MatchItem.matchPage(src, key);
            logger.info(list.toString());
            for (int i = 0; i < list.size(); i++) {
                x = list.get(i).getX();
                y = list.get(i).getY();
                pageNum = list.get(i).getPageNum();
                logger.info("字段:" + key + " 定位至 第 " + pageNum + " 頁  x:" + x + "---y:" + y);
                PdfContentByte canvas = stamper.getOverContent(pageNum);

                BaseFont bf = BaseFont.createFont(
                        URLDecoder.decode(PdfReplacer.class.getResource("/fonts/adobesongstd-light.otf").getFile()),
                        BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
                Font font = new Font(bf, 10, Font.BOLD);
                // 設置字體和大小
                canvas.setFontAndSize(font.getBaseFont(), 12);
                canvas.saveState();
                canvas.setColorFill(BaseColor.RED);
                canvas.rectangle(x, y + 1, 85, 13);
                canvas.fill();
                canvas.restoreState();
                // 開始寫入文本
                canvas.beginText();
                // 設置字體的輸出位置
                canvas.setTextMatrix(x, y + 2);
                // 要輸出的text
                canvas.showText(replaceTextMap.get(key));
                canvas.endText();
            }

        }

        stamper.close();
        reader.close();
    }

    public PdfReplacer() {
    }

    /**
     * 測試使用主方法
     */
    public static void main(String[] args) throws Exception {
        String SRC = "E:\\CA\\入職合同模版樣例.pdf";
        String DEST = "E:\\CA\\out.pdf";

        Map<String, String> replaceMap = new HashMap<>();
        replaceMap.put("[$合同編號$]", "ZR-20181009-00000164");
        replaceMap.put("[$乙方姓名$]", "TroubleA");
        replaceMap.put("[$簽字日期$]", "2018/10/10 11:24:30");
        replaceMap.put("[$終止日期$]", "2018/10/10 end");

        File file = new File(DEST);
			if (!outFile.getParentFile().exists()) {
				outFile.getParentFile().mkdirs();
			}
        new PdfReplacer().manipulatePdf1(SRC, DEST, replaceMap);
    }
}
package com.sinosoft.lis.utils;

/**
 * 須要替換的區域
 */
public class ReplaceRegion {

    private String aliasName;
    private Float x;
    private Float y;
    private Float w = 12f;
    private Float h = 2f;
    private int PageSize;

    public ReplaceRegion(String aliasName) {
        this.aliasName = aliasName;
    }

    public int getPageSize() {
        return PageSize;
    }

    public void setPageSize(int pageSize) {
        PageSize = pageSize;
    }

    /**
     * 替換區域的別名
     */
    public String getAliasName() {
        return aliasName;
    }

    public void setAliasName(String aliasName) {
        this.aliasName = aliasName;
    }

    public Float getX() {
        return x;
    }

    public void setX(Float x) {
        this.x = x;
    }

    public Float getY() {
        return y;
    }

    public void setY(Float y) {
        this.y = y;
    }

    public Float getW() {
        return w;
    }

    public void setW(Float w) {
        this.w = w;
    }

    public Float getH() {
        return h;
    }

    public void setH(Float h) {
        this.h = h;
    }
}
package com.sinosoft.lis.utils;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 解析PDF中文本的x,y位置
 */
public class PdfPositionParse {

    private PdfReader reader;
    private List<String> findText = new ArrayList<String>();	//須要查找的文本
    private PdfReaderContentParser parser;

    public PdfPositionParse(String fileName) throws IOException{
        FileInputStream in = null;
        try{
            in =new FileInputStream(fileName);
            byte[] bytes = new byte[in.available()];
            in.read(bytes);
            init(bytes);
        }finally{
            in.close();
        }
    }

    public PdfPositionParse(byte[] bytes) throws IOException{
        init(bytes);
    }

    private boolean needClose = true;
    /**
     * 傳遞進來的reader不會在PdfPositionParse結束時關閉
     * @param reader
     */
    public PdfPositionParse(PdfReader reader){
        this.reader = reader;
        parser = new PdfReaderContentParser(reader);
        needClose = false;
    }

    public void addFindText(String text){
        this.findText.add(text);
    }

    private void init(byte[] bytes) throws IOException {
        reader = new PdfReader(bytes);
        parser = new PdfReaderContentParser(reader);
    }

    /**
     * 解析文本
     * @throws IOException
     */
    public Map<String, ReplaceRegion> parse() throws IOException{
        try{
            if(this.findText.size() == 0){
                throw new NullPointerException("沒有須要查找的文本");
            }
            PositionRenderListener listener = new PositionRenderListener(this.findText);
            parser.processContent(6, listener);
            return listener.getResult();
        }finally{
            if(reader != null && needClose){
                reader.close();
            }
        }
    }
}
package com.sinosoft.lis.utils;

import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * pdf渲染監聽,當找到渲染的文本時,獲得文本的座標x,y,w,h
 */
public class PositionRenderListener implements RenderListener{

    private List<String> findText;
    private float defaultH;		///出現沒法取到值的狀況,默認爲12
    private float fixHeight;	//可能出現沒法徹底覆蓋的狀況,提供修正的參數,默認爲2

    public PositionRenderListener(List<String> findText, float defaultH,float fixHeight) {
        this.findText = findText;
        this.defaultH = defaultH;
        this.fixHeight = fixHeight;
    }

    public PositionRenderListener(List<String> findText) {
        this.findText = findText;
        this.defaultH = 12;
        this.fixHeight = -1;
    }

    @Override
    public void beginTextBlock() {

    }

    @Override
    public void endTextBlock() {

    }

    @Override
    public void renderImage(ImageRenderInfo imageInfo) {
    }

    private Map<String, ReplaceRegion> result = new HashMap<String, ReplaceRegion>();

    @Override
    public void renderText(TextRenderInfo textInfo) {
    }

    public Map<String, ReplaceRegion> getResult() {
        for (String key : findText) {	//補充沒有找到的數據
            if(this.result.get(key) == null){
                this.result.put(key, null);
            }
        }
        return this.result;
    }
}

還有用來替換尋找x,y位置的兩個類字體

package com.sinosoft.lis.utils;

import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.util.ArrayList;
import java.util.List;

import static com.sinosoft.lis.utils.KeyWordPositionListener.findKeywordItems;

/**
 * 用來保存關鍵字新建的對象
 */
public class MatchItem {
    private Integer pageNum;
    private Float x;
    private Float y;
    private Float pageWidth;
    private Float pageHeight;
    private String content;

    public Integer getPageNum() {
        return pageNum;
    }

    public void setPageNum(Integer pageNum) {
        this.pageNum = pageNum;
    }

    public Float getX() {
        return x;
    }

    public void setX(Float x) {
        this.x = x;
    }

    public Float getY() {
        return y;
    }

    public void setY(Float y) {
        this.y = y;
    }

    public Float getPageWidth() {
        return pageWidth;
    }

    public void setPageWidth(Float pageWidth) {
        this.pageWidth = pageWidth;
    }

    public Float getPageHeight() {
        return pageHeight;
    }

    public void setPageHeight(Float pageHeight) {
        this.pageHeight = pageHeight;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String toString() {
        return "MatchItem [pageNum=" + pageNum + ", x=" + x + ", y=" + y
                + ", pageWidth=" + pageWidth + ", pageHeight=" + pageHeight
                + ", content=" + content + "]";
    }

    public MatchItem(String content) {
        this.content = content;
    }

    public MatchItem() {
    }

    public static List matchPage(String fileName, String keyword) throws Exception {
        List items = new ArrayList();
        PdfReader reader = new PdfReader(fileName);
        int pageSize = reader.getNumberOfPages();
        for (int page = 1; page <= pageSize; page++) {
            items.addAll(matchPage(reader, page, keyword));
        }
        return items;
    }

    public static List matchPage(PdfReader reader, Integer pageNumber, String keyword) throws Exception {
        KeyWordPositionListener renderListener = new KeyWordPositionListener();
        renderListener.setKeyword(keyword);
        PdfReaderContentParser parse = new PdfReaderContentParser(reader);
        Rectangle rectangle = reader.getPageSize(pageNumber);
        renderListener.setPageNumber(pageNumber);
        renderListener.setCurPageSize(rectangle);
        parse.processContent(pageNumber, renderListener);
        return findKeywordItems(renderListener, keyword);
    }

}
package com.sinosoft.lis.utils;

import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
import com.sinosoft.lis.comm.PubFun;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by Administrator on 2018/10/9.
 */
public class KeyWordPositionListener implements RenderListener {
    private List<MatchItem> matches = new ArrayList<MatchItem>();
    private List<MatchItem> allItems = new ArrayList<MatchItem>();
    private Rectangle curPageSize;

    /**
     * 匹配的關鍵字
     */
    private String keyword;
    /**
     * 匹配的當前頁
     */
    private Integer pageNumber;

    public void beginTextBlock() {
        //do nothing
    }

    public void renderText(TextRenderInfo renderInfo) {
        String content = renderInfo.getText();
        content = content.replace("<", "").replace("《", "").replace("(", "").replace("(", "").replace("\"", "").replace("'", "")
                .replace(">", "").replace("》", "").replace(")", "").replace(")", "").replace("、", "").replace(".", "")
                .replace(":", "").replace(":", "").replace(" ", "");
        Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange();
        MatchItem item = new MatchItem();
        item.setContent(content);
        item.setPageNum(pageNumber);
        item.setPageWidth(curPageSize.getWidth());
        item.setPageHeight(curPageSize.getHeight());
        item.setX((float)textRectangle.getX());
        item.setY((float)textRectangle.getY());
        if(!PubFun.isEmpty(content)){
            if(content.equalsIgnoreCase(keyword)) {
                matches.add(item);
            }
        }else{
            item.setContent("空字符串");
        }
        allItems.add(item);//先保存全部的項
    }

    public void endTextBlock() {
        //do nothing
    }

    public void renderImage(ImageRenderInfo renderInfo) {
        //do nothing
    }

    /**
     * 設置須要匹配的當前頁
     * @param pageNumber
     */
    public void setPageNumber(Integer pageNumber) {
        this.pageNumber = pageNumber;
    }

    /**
     * 設置須要匹配的關鍵字,忽略大小寫
     * @param keyword
     */
    public void setKeyword(String keyword) {
        this.keyword = keyword;
    }

    /**
     * 返回匹配的結果列表
     * @return
     */
    public List<MatchItem> getMatches() {
        return matches;
    }

    void setCurPageSize(Rectangle rect) {
        this.curPageSize = rect;
    }

    public List<MatchItem> getAllItems() {
        return allItems;
    }

    public void setAllItems(List<MatchItem> allItems) {
        this.allItems = allItems;
    }

    public static List findKeywordItems(KeyWordPositionListener renderListener,String keyword){
        //先判斷本頁中是否存在關鍵詞
        List<MatchItem> allItems = renderListener.getAllItems();//全部塊LIST
        StringBuffer sbtemp = new StringBuffer("");
        for(MatchItem item : allItems){//將一頁中全部的塊內容鏈接起來組成一個字符串。
            sbtemp.append(item.getContent());
        }
        if(sbtemp.toString().indexOf(keyword) == -1){//一頁組成的字符串沒有關鍵詞,直接return
            return renderListener.getMatches();
        }
        //第一種狀況:關鍵詞與塊內容徹底匹配的項
        List matches = renderListener.getMatches();
        //第二種狀況:多個塊內容拼成一個關鍵詞,則一個一個來匹配,組裝成一個關鍵詞
        sbtemp = new StringBuffer("");
        List tempItems = new ArrayList();
        for(MatchItem item : allItems){
            //1,關鍵詞中存在某塊 2,拼裝的連續的塊=關鍵詞  3,避開某個塊徹底匹配關鍵詞
            //關鍵詞 中國移動  而塊爲 中 ,國,移動
            //關鍵詞 中華人民  而塊爲中,華人民共和國  這種狀況解決不了,也不容許存在
            if(keyword.indexOf(item.getContent()) != -1 && !keyword.equals(item.getContent())){
                tempItems.add(item);
                sbtemp.append(item.getContent());
                if(keyword.indexOf(sbtemp.toString()) == -1){//若是暫存的字符串和關鍵詞 再也不匹配時
                    sbtemp = new StringBuffer(item.getContent());
                    tempItems.clear();
                    tempItems.add(item);
                }
                if(sbtemp.toString().equalsIgnoreCase(keyword)){//暫存的字符串正好匹配到關鍵詞時
                    MatchItem tmpitem = getRightItem(tempItems, keyword);
                    if(tmpitem != null){
                        matches.add(tmpitem);//獲得匹配的項
                    }
                    sbtemp = new StringBuffer("");//清空暫存的字符串
                    tempItems.clear();//清空暫存的LIST
                    continue;//繼續查找
                }
            }else{//若是找不到則清空
                sbtemp = new StringBuffer("");
                tempItems.clear();
            }
        }
        //第三種狀況:關鍵詞存在塊中
        for(MatchItem item : allItems){
            if(item.getContent().indexOf(keyword) != -1 && !keyword.equals(item.getContent())){
                matches.add(item);
            }
        }
        return matches;
    }

    public static MatchItem getRightItem(List<MatchItem> tempItems,String keyword){
        for(MatchItem item:tempItems){
            if(keyword.indexOf(item.getContent()) != -1 && !keyword.equals(item.getContent())){
                return item;
            }
        } return null;
    }

    public KeyWordPositionListener() {
    }
}

注:因爲個人需求是替換文中的固定字段,而且字段長度是我這邊控制的都是4個字,因此我固定了85寬度以及15高度的一個紅色圖片,在查找到文字的x,y位置之後,直接將該圖片覆蓋到pdf中,而後再到圖片上從新寫字this

相關文章
相關標籤/搜索