Unable to parse the default media type registry

今天寫了一段獲取MIME類型的代碼,對比用org.apache.tika和net.sf.jmimemagic。javascript

jdk版本是1.8.css

1.pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

2.FileUtils.java:

package mime;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.FileNameMap;
import java.net.URLConnection;
import java.net.URLEncoder;

import javax.activation.MimetypesFileTypeMap;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import net.sf.jmimemagic.Magic;
import net.sf.jmimemagic.MagicException;
import net.sf.jmimemagic.MagicMatch;
import net.sf.jmimemagic.MagicMatchNotFoundException;
import net.sf.jmimemagic.MagicParseException;

public class FileUtils {

	public static String getMimeTypeByFileTypeMap(String path) {
		MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap();
		// 默認沒有pdf的,若是傳入pdf的,會默認application/octet-stream,也沒有application/xml
		mimetypesFileTypeMap.addMimeTypes("application/pdf pdf");
		File f = new File(path);
		return mimetypesFileTypeMap.getContentType(f);
	}

	public static String getMimeTypeByFileTypeMap2(String path) {
		String mimeType = null;
		int idx = path.lastIndexOf('.');
		if (idx == -1) {
			mimeType = "application/octet-stream";
		} else {
			String fileExtension = path.substring(idx).toLowerCase();
			if (fileExtension.equals(".html")) {
				mimeType = "text/html";
			} else if (fileExtension.equals(".css")) {
				mimeType = "text/css";
			} else if (fileExtension.equals(".js")) {
				mimeType = "application/javascript";
			} else if (fileExtension.equals(".gif")) {
				mimeType = "image/gif";
			} else if (fileExtension.equals(".png")) {
				mimeType = "image/png";
			} else if (fileExtension.equals(".txt")) {
				mimeType = "text/plain";
			} else if (fileExtension.equals(".xml")) {
				mimeType = "application/xml";
			} else if (fileExtension.equals(".json")) {
				mimeType = "application/json";
			} else {
				MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap();
				mimeType = mimeTypesMap.getContentType(path);
			}
		}
		return mimeType;

	}

	public static String getMimeTypeBySpring(String path) {
		ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap();
		// 沒有application/xml
		String contentType = mimeMap.getContentType(path);
		return contentType;
	}

	public static String getMimeByFileNameMap(String fileUrl) {
		FileNameMap fileNameMap = URLConnection.getFileNameMap();
		try {
			String mimeType = fileNameMap
					.getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8"));
			if (mimeType == null) {
				mimeType = "application/octet-stream";
			}
			return mimeType;
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			return "";
		}
	}

	public static String getMimeByTika(String fileUrl) {
		String mimeType = null;
		try {
			ContentHandler contenthandler = new BodyContentHandler();

			Metadata metadata = new Metadata();
			metadata.add(Metadata.CONTENT_ENCODING, "utf-8");
			metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl);

			// Parser parser = new DefaultParser();獲取不到MIME類型
			Parser parser = new AutoDetectParser();

			ParseContext context = new ParseContext();
			context.set(Parser.class, parser);

			parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context);

			for (String name : metadata.names()) {
				System.out.println(name);
			}
			mimeType = metadata.get(Metadata.CONTENT_TYPE);
		} catch (IOException | TikaException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		}
		return mimeType;

	}

	public static String getMimeByJMimeMagic(String fileUrl) {
		MagicMatch match;
		try {
			match = Magic.getMagicMatch(new File(fileUrl), true);
			return match.getMimeType();
		} catch (MagicParseException | MagicMatchNotFoundException | MagicException e) {
			e.printStackTrace();
		}
		return "";
	}

}

3.MIMETest.java:

package mime;

public class MIMETest {
	public static void main(String[] args) {

		// src目錄
		// String fileName = "funds.properties";
		String fileName = "createPerson.sql";
		// String path = getPath(fileName);

		// 絕對路徑
		// String path = "E:/test/process.txt";
		// String path = "E:/test/02.jpg";
		// String path = "E:/Anheng/receiver-design.pdf";
		// String path = "E:/api/dom4j.chm";
		// String path = "E:/eclipse/ajax/pom.xml";
		// String path = "E:/test/person.json";
		// String path = "E:/test/file.java";
		// String path = "E:/test/static.ftl";
		// String path = "E:/test/rest.jerseySpring.war";
		// String path = "E:/test/upload/myeclipse.exe";
		String path = "E:/test/upload/myeclipse.ini";

		System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap(path));

		System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap2(path));

		System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeBySpring(path));

		System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeByFileNameMap(path));

		/* Tika會檢查路徑的合法性;
		 並且properties文件會返回text/x-java-properties,以上只會返回application/octet-stream;
		 .sql文件會返回text/x-sql,以上幾種也是隻會返回application/octet-stream;
		 .json文件會返回application/json,以上幾種除非添加了這一類型,否則返回application/octet-stream;
		 .java文件會返回text/x-java-source,前兩種返回application/octet-stream,後兩種返回text/plain;
		 .ftl文件會返回text/html,以上返回application/octet-stream;
		 .war文件會返回application/x-tika-java-web-archive,以上返回application/octet-stream;
		 .exe文件會返回application/x-dosexec,以上返回application/octet-stream;
		 .ini文件會返回text/x-ini,以上返回application/octet-stream;
		 */

		System.out.println("getMimeByTika: Mime Type of " + path + " is "
				+ FileUtils.getMimeByTika(path));

		System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is "
				+ FileUtils.getMimeByJMimeMagic(path));

	}

	private static String getPath(String fileName) {
		String prefix = System.getProperty("user.dir");
		String fileSeparator = System.getProperty("file.separator");
		String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator
				+ "resources" + fileSeparator;
		String path = prefix + sourcePath + fileName;
		return path;
	}
}

本來只測試Tika,即不加入jmimemagic的依賴時,測試正常,後來加入jmimemagic依賴,報錯以下:html

Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580)
	at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69)
	at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218)
	at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341)
	at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51)
	at mime.FileUtils.getMimeByTika(FileUtils.java:103)
	at mime.MIMETest.main(MIMETest.java:48)
Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170)
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577)
	... 6 more
Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing
	at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source)
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119)
	... 10 more

按照錯誤提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();處打斷點,在加net.sf.jmimemagi依賴先後對比異常緣由,發現了下面一個現象 :java

加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子類是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,該類在jdk自帶jar包mysql

rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如圖:web

而加net.sf.jmimemagi後,javax.xml.parsers.SAXParserFactory的子類變成了ajax

xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,spring

如圖:sql

該類在setFeature()時拋出了異常。即有兩個相同名稱的實現類,致使衝突報了異常。故咱們將xercesImpl-2.2.4.0.jar排除掉便可,修改後的pom.xml以下所示:apache

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
			<exclusions>
				<exclusion>
						<groupId>xerces</groupId>
						<artifactId>xercesImpl</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

至此,再運行,則各方法都再也不拋異常。

相關文章
相關標籤/搜索