import org.apache.poi.hwpf.extractor.WordExtractor; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; public class Doc2Text { Doc2Text() throws IOException { var fojingPath = "/home/weiyinfu/Desktop/佛經"; var targetPath = "/home/weiyinfu/Desktop/fojing"; if (!Files.exists(Paths.get(targetPath))) { Files.createDirectory(Paths.get(targetPath)); } Files.list(Paths.get(fojingPath)).forEach(docPath -> { try { InputStream cin = Files.newInputStream(docPath); WordExtractor extractor = new WordExtractor(cin); cin.close(); String content = extractor.getText(); String filename = docPath.getFileName() + ".txt"; var cout = Files.newBufferedWriter(Paths.get(targetPath).resolve(filename), StandardOpenOption.CREATE); cout.write(content); cout.close(); } catch (IOException e) { e.printStackTrace(); } }); } public static void main(String[] args) throws IOException { new Doc2Text(); } }
須要POIjava
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.0</version> </dependency>