dom4j解析國際化（xml:lang）XML文件

時間 2019-11-13

標籤 dom4j dom 解析國際化 xml lang 文件欄目 Java開源简体版

原文原文鏈接

一、問題背景

因爲老項目須要進行國際化（翻譯英文），其中一些xml內容也須要進行翻譯。但這時問題就來了，英文版是有了中文怎麼辦？ java

存兩個xml？abc.xml，abc_zh_CN.xml。這也是個方法但這就須要修改讀寫xml的模塊，讓它像properties同樣能夠支持按語言讀取。不想存2份xml的另外一個緣由就是並非xml中的全部內容都須要翻譯，這樣的方式無疑須要維護不少重複配置。 node

二、分析解決

xml自己就支持多語言，能夠採用xml:lang屬性來完成。dom4j是否是也能夠按xml:lang來解析？ api

a、首先查看了dom4j的api，發現有一個XMLFilter這樣的類，以這個爲突破口。

b、須要解析的xml樣本（/org/noahx/xmli18n/test.xml）

<?xml version="1.0" encoding="UTF-8"?>

<root>

    <test xml:lang="zh">
        <abc>你好0</abc>
        <bcd bye="再見0"/>
        <test>嵌套測試</test>
    </test>


    <test xml:lang="en">
        <abc>hello0</abc>
        <bcd bye="goodbye0"/>
        <test>嵌套測試2</test>
    </test>

    <test>
        <abc>你好1</abc>
        <abc xml:lang="zh">你好2</abc>
        <abc xml:lang="en">hello1</abc>
        <bcd bye="goodbye1" xml:lang="en"/>
        <bcd bye="再見1"/>
        <bcd bye="再見2" xml:lang="zh_CN"/>
        <test xml:lang="en">嵌套測試3
        </test>
        <test>嵌套測試4
        </test>
        <test>
            <abc xml:lang="en">hello2</abc>
            <bcd xml:lang="en" bye="goodbye2"/>
            <abc xml:lang="zh">你好3</abc>
            <abc xml:lang="zh_TW">你好4</abc>
            <abc xml:lang="zh_CN">你好5</abc>
        </test>
    </test>


</root>

通常來講在相對大的節點定義一個xml:lang=就能夠了，就像上面的test節點。

c、開發LocaleXMLFilter過濾掉不符合的Locale（org.noahx.xmli18n.LocaleXMLFilter）

package org.noahx.xmli18n;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;

import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with IntelliJ IDEA.
 * User: noah
 * Date: 10/29/12
 * Time: 10:37 AM
 * To change this template use File | Settings | File Templates.
 */
public class LocaleXMLFilter extends XMLFilterImpl {

    /**
     * Locale正則式
     */
    private static final Pattern LOCALE_PATTERN =
            Pattern.compile("(^[^_-]*)(?:[_-]([^_-]*)(?:[_-]([^_-]*))?)?");

    /**
     * 默認讀取XML使用的Locale
     */
    private Locale defaultLocale;


    /**
     * 存放當前xml元素路徑
     */
    private StringBuilder currentPath = new StringBuilder("#");

    /**
     * 存放忽略元素路徑
     */
    private Set<String> ignoreSet = new HashSet<String>();


    public LocaleXMLFilter(Locale defaultLocale) {
        this.defaultLocale = defaultLocale;
    }

    /**
     * 起始元素過濾
     *
     * @param url
     * @param localName
     * @param qName
     * @param att
     * @throws SAXException
     */
    public void startElement(String url, String localName,
                             String qName, Attributes att) throws SAXException {

        boolean parentIgnoring = isIgnoreNode();        //判斷父節點是否已經被忽略

        currentPath.append(localName);                //生成xml路徑，# => #root/,#root/=>#root/a/
        currentPath.append("/");

        boolean ignoring = parentIgnoring;   //子節點順延父節點忽略

        if (!ignoring) {                      //判斷xml:lang是否與defaultLocale衝突，若是不同，忽略
            String lang = att.getValue("xml:lang");
            if (lang != null) {
                Locale xmlLocale = getLocaleFromLocaleString(lang);
                if (notSameLocale(xmlLocale)) {
                    ignoring = true;
                }

            }
        }

        if (ignoring) {     //忽略
            tagIgnoreNode();
        } else {            //不忽略
            super.startElement(url, localName, qName, att);
        }
    }

    /**
     * 中間字符過濾
     *
     * @param data
     * @param start
     * @param length
     * @throws SAXException
     */
    public void characters(char[] data, int start, int length)
            throws SAXException {
        if (!isIgnoreNode()) {      //不忽略
            super.characters(data, start, length);
        }
    }

    /**
     * 結束元素過濾
     *
     * @param url
     * @param localName
     * @param qName
     * @throws SAXException
     */
    public void endElement(String url, String localName, String qName)
            throws SAXException {


        if (isIgnoreNode()) {     //忽略
            untagIgnoreNode();
        } else {     //不忽略
            super.endElement(url, localName, qName);
        }

        currentPath.replace(currentPath.length() - localName.length() - 1, currentPath.length(), "");  //清除當前路徑，#/root/a/ => #/root/


    }

    /**
     * 判斷是否屬於同語言，同國家
     *
     * @param xmlLocale
     * @return
     */
    private boolean notSameLocale(Locale xmlLocale) {
        boolean same = true;

        if (xmlLocale.getLanguage().equals(defaultLocale.getLanguage())) {  //same lang
            if (!xmlLocale.getCountry().equals("")) {
                if (xmlLocale.getCountry().equals(defaultLocale.getCountry())) {       //same country

                    if (!xmlLocale.getVariant().equals("") && !xmlLocale.getVariant().equals(defaultLocale.getVariant())) {   //diff variant
                        same = false;
                    }
                } else {
                    same = false;
                }
            }

        } else {
            same = false;
        }
        return !same;
    }

    /**
     * zh_CN字符串轉換爲Locale
     *
     * @param s
     * @return
     */
    private Locale getLocaleFromLocaleString(String s) {
        if (s == null) {
            return null;
        }

        Matcher matcher = LOCALE_PATTERN.matcher(s);

        matcher.find();

        String language = matcher.group(1);
        language = (language == null) ? "" : language;
        String country = matcher.group(2);
        country = (country == null) ? "" : country;
        String variant = matcher.group(3);
        variant = (variant == null) ? "" : variant;

        return new Locale(language, country, variant);
    }

    /**
     * 當前節點是否被忽略
     *
     * @return
     */
    private boolean isIgnoreNode() {
        return ignoreSet.contains(currentPath.toString());
    }

    /**
     * 標記爲忽略
     */
    private void tagIgnoreNode() {
        ignoreSet.add(currentPath.toString());
    }

    /**
     * 撤銷標記爲忽略
     */
    private void untagIgnoreNode() {
        ignoreSet.remove(currentPath.toString());
    }


}

方式就是在startElement，characters，endElement時進行干預從根本上過濾掉不符合的Locale內容。這樣就能夠把對dom4j的影響下降到最小。

d、主程序測試（org.noahx.xmli18n.TestLangXml）

package org.noahx.xmli18n;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import java.util.List;
import java.util.Locale;

/**
 * Created with IntelliJ IDEA.
 * User: noah
 * Date: 10/26/12
 * Time: 5:19 PM
 * To change this template use File | Settings | File Templates.
 */
public class TestLangXml {

    public static void main(String[] args) {
        SAXReader saxReader = new SAXReader();
        saxReader.setXMLFilter(new LocaleXMLFilter(Locale.SIMPLIFIED_CHINESE));

        try {
            Document document = saxReader.read(Thread.currentThread().getContextClassLoader().getResourceAsStream("org/noahx/xmli18n/test.xml"));


            List<Node> nodes = document.selectNodes("//root/test");


            for (Node n : nodes) {
                System.out.println(n.asXML());
            }
            System.out.println(nodes.size());

        } catch (DocumentException e) {
            e.printStackTrace();
        }

    }
}

從打印的xml的內容中就能夠看到，不符合的內容已經被過濾。咱們對dom4j只是加入saxReader.setXMLFilter(new LocaleXMLFilter(Locale.SIMPLIFIED_CHINESE));這一行。 app