今天寫了一段獲取MIME類型的代碼,對比用org.apache.tika和net.sf.jmimemagic。javascript
jdk版本是1.8.css
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>hui</groupId> <artifactId>TestWithMaven</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>TestWithMaven</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> <version>2.9.2</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.13</version> </dependency> <dependency> <groupId>org.apache.ibatis</groupId> <artifactId>ibatis-core</artifactId> <version>3.0</version> </dependency> <dependency> <groupId>org.mybatis</groupId> <artifactId>mybatis</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.38</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-context-support</artifactId> <version>4.2.2.RELEASE</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>1.13</version> </dependency> <dependency> <groupId>net.sf.jmimemagic</groupId> <artifactId>jmimemagic</artifactId> <version>0.1.4</version> </dependency> <dependency> <groupId>xml-apis</groupId> <artifactId>xmlParserAPIs</artifactId> <version>2.0.2</version> </dependency> </dependencies> </project>
package mime; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.FileNameMap; import java.net.URLConnection; import java.net.URLEncoder; import javax.activation.MimetypesFileTypeMap; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import net.sf.jmimemagic.Magic; import net.sf.jmimemagic.MagicException; import net.sf.jmimemagic.MagicMatch; import net.sf.jmimemagic.MagicMatchNotFoundException; import net.sf.jmimemagic.MagicParseException; public class FileUtils { public static String getMimeTypeByFileTypeMap(String path) { MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap(); // 默認沒有pdf的,若是傳入pdf的,會默認application/octet-stream,也沒有application/xml mimetypesFileTypeMap.addMimeTypes("application/pdf pdf"); File f = new File(path); return mimetypesFileTypeMap.getContentType(f); } public static String getMimeTypeByFileTypeMap2(String path) { String mimeType = null; int idx = path.lastIndexOf('.'); if (idx == -1) { mimeType = "application/octet-stream"; } else { String fileExtension = path.substring(idx).toLowerCase(); if (fileExtension.equals(".html")) { mimeType = "text/html"; } else if (fileExtension.equals(".css")) { mimeType = "text/css"; } else if (fileExtension.equals(".js")) { mimeType = "application/javascript"; } else if (fileExtension.equals(".gif")) { mimeType = "image/gif"; } else if (fileExtension.equals(".png")) { mimeType = "image/png"; } else if (fileExtension.equals(".txt")) { mimeType = "text/plain"; } else if (fileExtension.equals(".xml")) { mimeType = "application/xml"; } else if (fileExtension.equals(".json")) { mimeType = "application/json"; } else { MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap(); mimeType = mimeTypesMap.getContentType(path); } } return mimeType; } public static String getMimeTypeBySpring(String path) { ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap(); // 沒有application/xml String contentType = mimeMap.getContentType(path); return contentType; } public static String getMimeByFileNameMap(String fileUrl) { FileNameMap fileNameMap = URLConnection.getFileNameMap(); try { String mimeType = fileNameMap .getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8")); if (mimeType == null) { mimeType = "application/octet-stream"; } return mimeType; } catch (UnsupportedEncodingException e) { e.printStackTrace(); return ""; } } public static String getMimeByTika(String fileUrl) { String mimeType = null; try { ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); metadata.add(Metadata.CONTENT_ENCODING, "utf-8"); metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl); // Parser parser = new DefaultParser();獲取不到MIME類型 Parser parser = new AutoDetectParser(); ParseContext context = new ParseContext(); context.set(Parser.class, parser); parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context); for (String name : metadata.names()) { System.out.println(name); } mimeType = metadata.get(Metadata.CONTENT_TYPE); } catch (IOException | TikaException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } return mimeType; } public static String getMimeByJMimeMagic(String fileUrl) { MagicMatch match; try { match = Magic.getMagicMatch(new File(fileUrl), true); return match.getMimeType(); } catch (MagicParseException | MagicMatchNotFoundException | MagicException e) { e.printStackTrace(); } return ""; } }
package mime; public class MIMETest { public static void main(String[] args) { // src目錄 // String fileName = "funds.properties"; String fileName = "createPerson.sql"; // String path = getPath(fileName); // 絕對路徑 // String path = "E:/test/process.txt"; // String path = "E:/test/02.jpg"; // String path = "E:/Anheng/receiver-design.pdf"; // String path = "E:/api/dom4j.chm"; // String path = "E:/eclipse/ajax/pom.xml"; // String path = "E:/test/person.json"; // String path = "E:/test/file.java"; // String path = "E:/test/static.ftl"; // String path = "E:/test/rest.jerseySpring.war"; // String path = "E:/test/upload/myeclipse.exe"; String path = "E:/test/upload/myeclipse.ini"; System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is " + FileUtils.getMimeTypeByFileTypeMap(path)); System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is " + FileUtils.getMimeTypeByFileTypeMap2(path)); System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is " + FileUtils.getMimeTypeBySpring(path)); System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is " + FileUtils.getMimeByFileNameMap(path)); /* Tika會檢查路徑的合法性; 並且properties文件會返回text/x-java-properties,以上只會返回application/octet-stream; .sql文件會返回text/x-sql,以上幾種也是隻會返回application/octet-stream; .json文件會返回application/json,以上幾種除非添加了這一類型,否則返回application/octet-stream; .java文件會返回text/x-java-source,前兩種返回application/octet-stream,後兩種返回text/plain; .ftl文件會返回text/html,以上返回application/octet-stream; .war文件會返回application/x-tika-java-web-archive,以上返回application/octet-stream; .exe文件會返回application/x-dosexec,以上返回application/octet-stream; .ini文件會返回text/x-ini,以上返回application/octet-stream; */ System.out.println("getMimeByTika: Mime Type of " + path + " is " + FileUtils.getMimeByTika(path)); System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is " + FileUtils.getMimeByJMimeMagic(path)); } private static String getPath(String fileName) { String prefix = System.getProperty("user.dir"); String fileSeparator = System.getProperty("file.separator"); String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator + "resources" + fileSeparator; String path = prefix + sourcePath + fileName; return path; } }
本來只測試Tika,即不加入jmimemagic的依賴時,測試正常,後來加入jmimemagic依賴,報錯以下:html
Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580) at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69) at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218) at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341) at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51) at mime.FileUtils.getMimeByTika(FileUtils.java:103) at mime.MIMETest.main(MIMETest.java:48) Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170) at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577) ... 6 more Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source) at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source) at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source) at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119) ... 10 more
按照錯誤提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();處打斷點,在加net.sf.jmimemagi依賴先後對比異常緣由,發現了下面一個現象 :java
加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子類是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,該類在jdk自帶jar包mysql
rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如圖:web
而加net.sf.jmimemagi後,javax.xml.parsers.SAXParserFactory的子類變成了ajax
xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,spring
如圖:sql
該類在setFeature()時拋出了異常。即有兩個相同名稱的實現類,致使衝突報了異常。故咱們將xercesImpl-2.2.4.0.jar排除掉便可,修改後的pom.xml以下所示:apache
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>hui</groupId> <artifactId>TestWithMaven</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>TestWithMaven</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> <version>2.9.2</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.13</version> </dependency> <dependency> <groupId>org.apache.ibatis</groupId> <artifactId>ibatis-core</artifactId> <version>3.0</version> </dependency> <dependency> <groupId>org.mybatis</groupId> <artifactId>mybatis</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.38</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-context-support</artifactId> <version>4.2.2.RELEASE</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>1.13</version> </dependency> <dependency> <groupId>net.sf.jmimemagic</groupId> <artifactId>jmimemagic</artifactId> <version>0.1.4</version> <exclusions> <exclusion> <groupId>xerces</groupId> <artifactId>xercesImpl</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>xml-apis</groupId> <artifactId>xmlParserAPIs</artifactId> <version>2.0.2</version> </dependency> </dependencies> </project>
至此,再運行,則各方法都再也不拋異常。