Heim > Java > javaLernprogramm > Hauptteil

Java-Methode zum Konvertieren von Word/PDF/TXT in HTML

PHPz
Freigeben: 2023-04-26 10:37:07
nach vorne
1020 Leute haben es durchsucht

一:Java实现将word转换为html

   1:引入依赖

 1 <dependency>
 2   <groupId>fr.opensagres.xdocreport</groupId>
 3   <artifactId>fr.opensagres.xdocreport.document</artifactId>
 4   <version>1.0.5</version>
 5 </dependency>
 6 <dependency> 
 7   <groupId>fr.opensagres.xdocreport</groupId> 
 8   <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> 
 9   <version>1.0.5</version> 
10 </dependency>
11   <dependency>
12   <groupId>org.apache.poi</groupId>
13   <artifactId>poi</artifactId>
14   <version>3.12</version>
15 </dependency>
16 <dependency>
17   <groupId>org.apache.poi</groupId>
18   <artifactId>poi-scratchpad</artifactId>
19   <version>3.12</version>
20 </dependency>
Nach dem Login kopieren

  2:代码demo

  1 package com.svse.controller;
  2 
  3 import javax.xml.parsers.DocumentBuilderFactory;
  4 import javax.xml.parsers.ParserConfigurationException;
  5 import javax.xml.transform.OutputKeys;
  6 import javax.xml.transform.Transformer;
  7 import javax.xml.transform.TransformerException;
  8 import javax.xml.transform.TransformerFactory;
  9 import javax.xml.transform.dom.DOMSource;
 10 import javax.xml.transform.stream.StreamResult;
 11 
 12 import org.apache.poi.hwpf.HWPFDocument;
 13 import org.apache.poi.hwpf.converter.PicturesManager;
 14 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
 15 import org.apache.poi.hwpf.usermodel.PictureType;
 16 import org.apache.poi.xwpf.converter.core.BasicURIResolver;
 17 import org.apache.poi.xwpf.converter.core.FileImageExtractor;
 18 import org.apache.poi.xwpf.converter.core.FileURIResolver;
 19 import org.apache.poi.xwpf.converter.core.IURIResolver;
 20 import org.apache.poi.xwpf.converter.core.IXWPFConverter;
 21 import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
 22 import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
 23 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 24 /**
 25  * word 转换成html
 26  */
 27 public class TestWordToHtml {
 28 
 29     public static  final String STORAGEPATH="C://works//files//";
 30     public static  final String IP="192.168.30.222";
 31     public static  final String PORT="8010";
 32     public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {
 33         TestWordToHtml wt=new TestWordToHtml();
 34         //wt.Word2003ToHtml("甲骨文考证.doc");
 35         wt.Word2007ToHtml("甲骨文考证.docx");
 36 
 37     }
 38       
 39      /**
 40      * 2003版本word转换成html
 41      * @throws IOException
 42      * @throws TransformerException
 43      * @throws ParserConfigurationException
 44      */
 45     public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {
 46        
 47         final String imagepath = STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片  图片会保存在此路径
 48         final String strRanString=getRandomNum();
 49         String filepath =STORAGEPATH;
 50         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";
 51         final String file = filepath + fileName;
 52         InputStream input = new FileInputStream(new File(file));
 53         HWPFDocument wordDocument = new HWPFDocument(input);
 54         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
 55         //设置图片存放的位置
 56         wordToHtmlConverter.setPicturesManager(new PicturesManager() {
 57             public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
 58                 File imgPath = new File(imagepath);
 59                 if(!imgPath.exists()){//图片目录不存在则创建
 60                     imgPath.mkdirs();
 61                 }
 62                 
 63                 File file = new File(imagepath +strRanString+suggestedName);
 64                 try {
 65                     OutputStream os = new FileOutputStream(file);
 66                     os.write(content);
 67                     os.close();
 68                 } catch (FileNotFoundException e) {
 69                     e.printStackTrace();
 70                 } catch (IOException e) {
 71                     e.printStackTrace();
 72                 }
 73                 
 74                 return  "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;
 75                // return imagepath +strRanString+suggestedName;
 76             }
 77         });
 78         
 79         //解析word文档
 80         wordToHtmlConverter.processDocument(wordDocument);
 81         Document htmlDocument = wordToHtmlConverter.getDocument();
 82         
 83         File htmlFile = new File(filepath +strRanString+htmlName);
 84         OutputStream outStream = new FileOutputStream(htmlFile);
 85         
 86 
 87         DOMSource domSource = new DOMSource(htmlDocument);
 88         StreamResult streamResult = new StreamResult(outStream);
 89 
 90         TransformerFactory factory = TransformerFactory.newInstance();
 91         Transformer serializer = factory.newTransformer();
 92         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
 93         serializer.setOutputProperty(OutputKeys.INDENT, "yes");
 94         serializer.setOutputProperty(OutputKeys.METHOD, "html");
 95         
 96         serializer.transform(domSource, streamResult);
 97         outStream.close();
 98         
 99         System.out.println("生成html文件路径:"+ "http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);
100     }
101 
102     /**
103      * 2007版本word转换成html
104      * @throws IOException
105      */
106     public void Word2007ToHtml(String fileName) throws IOException {
107         
108        final String strRanString=getRandomNum();
109         
110         String filepath = STORAGEPATH+strRanString;
111         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";
112         File f = new File(STORAGEPATH+fileName);  
113         if (!f.exists()) {  
114             System.out.println("Sorry File does not Exists!");  
115         } else {  
116             if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {  
117                 try {
118                     // 1) 加载word文档生成 XWPFDocument对象  
119                     InputStream in = new FileInputStream(f);  
120                     XWPFDocument document = new XWPFDocument(in);  
121       
122                     // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)  
123                     File imageFolderFile = new File(filepath);  
124                     XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));  
125                     options.setExtractor(new FileImageExtractor(imageFolderFile));  
126                     options.URIResolver(new IURIResolver() {
127                         public String resolve(String uri) {
128                             //http://192.168.30.222:8010//uploadFile/....
129                             return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;
130                         }
131                     });
132                     
133                     options.setIgnoreStylesIfUnused(false);  
134                     options.setFragment(true);  
135                       
136                     // 3) 将 XWPFDocument转换成XHTML  
137                     OutputStream out = new FileOutputStream(new File(filepath + htmlName));  
138                     IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();
139                     converter.convert(document,out, options);
140                     //XHTMLConverter.getInstance().convert(document, out, options);  
141                     System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);
142                 } catch (Exception e) {
143                     e.printStackTrace();
144                 }
145             
146             } else {  
147                 System.out.println("Enter only MS Office 2007+ files");  
148             }  
149         }  
150     }  
151 
152      /**
153      *功能说明:生成时间戳
154      *创建人:zsq
155      *创建时间:2019年12月7日 下午2:37:09
156      *
157      */
158      public static String getRandomNum(){
159          Date dt = new Date();
160          SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");  
161          String str=sdf.format(dt);
162          return str;
163      }
164      
165    }
Nach dem Login kopieren

二:Java实现将Pdf转换为html

  1: 引入依赖

 1 <dependency>
 2             <groupId>net.sf.cssbox</groupId>
 3             <artifactId>pdf2dom</artifactId>
 4             <version>1.7</version>
 5         </dependency> 
 6         <dependency>
 7             <groupId>org.apache.pdfbox</groupId>
 8             <artifactId>pdfbox</artifactId>
 9             <version>2.0.12</version>
10         </dependency>
11         <dependency>
12             <groupId>org.apache.pdfbox</groupId>
13             <artifactId>pdfbox-tools</artifactId>
14             <version>2.0.12</version>
15  </dependency>
16
Nach dem Login kopieren

2:代码Demo

 1 public class PdfToHtml {
 2 
 3   /*
 4     pdf转换html
 5      */
 6     public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath)  {
 7        // String outputPath = "C:\\works\\files\\ZSQ保密知识测试题库.html";
 8     9        //try() 写在()里面会自动关闭流
10         try{
11             BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));
12             //加载PDF文档
13             //PDDocument document = PDDocument.load(bytes);
14             PDDocument document = PDDocument.load(new File(inPdfPath));
15             PDFDomTree pdfDomTree = new PDFDomTree();
16             pdfDomTree.writeText(document,out);
17         } catch (Exception e) {
18             e.printStackTrace();
19         }
20     }
21 
22     public static void main(String[] args) throws IOException {
23         PdfToHtml ph=new PdfToHtml();
24         String pdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";
25         String outputPath="C:\\works\\files\\武研中心行政考勤制度.html";
26         ph.pdfToHtmlTest(pdfPath,outputPath);
27   }
28 
29 }
Nach dem Login kopieren

三:Java实现将TXT转换为html

 1  /*
 2      * txt文档转html
 3        filePath:txt原文件路径
 4        htmlPosition:转化后生成的html路径
 5     */
 6     public static void txtToHtml(String filePath, String htmlPosition) {
 7         try {
 8             //String encoding = "GBK";
 9             File file = new File(filePath);
10             if (file.isFile() && file.exists()) { // 判断文件是否存在
11                 InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");
12                 // 考虑到编码格式
13                 BufferedReader bufferedReader = new BufferedReader(read);
14                 // 写文件
15                 FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
16                 OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");
17                 BufferedWriter bw = new BufferedWriter(osw);
18                 String lineTxt = null;
19                 while ((lineTxt = bufferedReader.readLine()) != null) {
20                     bw.write("&nbsp&nbsp&nbsp"+lineTxt + "</br>");
21                 }
22                 bw.close();
23                 osw.close();
24                 fos.close();
25                 read.close();
26             } else {
27                 System.out.println("找不到指定的文件");
28             }
29         } catch (Exception e) {
30             System.out.println("读取文件内容出错");
31             e.printStackTrace();
32         }
33     }
Nach dem Login kopieren

Das obige ist der detaillierte Inhalt vonJava-Methode zum Konvertieren von Word/PDF/TXT in HTML. Für weitere Informationen folgen Sie bitte anderen verwandten Artikeln auf der PHP chinesischen Website!

Verwandte Etiketten:
Quelle:yisu.com
Erklärung dieser Website
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn
Beliebte Tutorials
Mehr>
Neueste Downloads
Mehr>
Web-Effekte
Quellcode der Website
Website-Materialien
Frontend-Vorlage