> Java > java지도 시간 > Word/Pdf/TXT를 HTML로 변환하는 Java 방법

Word/Pdf/TXT를 HTML로 변환하는 Java 방법

PHPz
풀어 주다: 2023-04-26 10:37:07
앞으로
1297명이 탐색했습니다.

一:Java实现将word转换为html

   1:引入依赖

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

 1 <dependency>

 2   <groupId>fr.opensagres.xdocreport</groupId>

 3   <artifactId>fr.opensagres.xdocreport.document</artifactId>

 4   <version>1.0.5</version>

 5 </dependency>

 6 <dependency> 

 7   <groupId>fr.opensagres.xdocreport</groupId> 

 8   <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> 

 9   <version>1.0.5</version> 

10 </dependency>

11   <dependency>

12   <groupId>org.apache.poi</groupId>

13   <artifactId>poi</artifactId>

14   <version>3.12</version>

15 </dependency>

16 <dependency>

17   <groupId>org.apache.poi</groupId>

18   <artifactId>poi-scratchpad</artifactId>

19   <version>3.12</version>

20 </dependency>

로그인 후 복사

  2:代码demo

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

  1 package com.svse.controller;

  

  3 import javax.xml.parsers.DocumentBuilderFactory;

  4 import javax.xml.parsers.ParserConfigurationException;

  5 import javax.xml.transform.OutputKeys;

  6 import javax.xml.transform.Transformer;

  7 import javax.xml.transform.TransformerException;

  8 import javax.xml.transform.TransformerFactory;

  9 import javax.xml.transform.dom.DOMSource;

 10 import javax.xml.transform.stream.StreamResult;

 11 

 12 import org.apache.poi.hwpf.HWPFDocument;

 13 import org.apache.poi.hwpf.converter.PicturesManager;

 14 import org.apache.poi.hwpf.converter.WordToHtmlConverter;

 15 import org.apache.poi.hwpf.usermodel.PictureType;

 16 import org.apache.poi.xwpf.converter.core.BasicURIResolver;

 17 import org.apache.poi.xwpf.converter.core.FileImageExtractor;

 18 import org.apache.poi.xwpf.converter.core.FileURIResolver;

 19 import org.apache.poi.xwpf.converter.core.IURIResolver;

 20 import org.apache.poi.xwpf.converter.core.IXWPFConverter;

 21 import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

 22 import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

 23 import org.apache.poi.xwpf.usermodel.XWPFDocument;

 24 /**

 25  * word 转换成html

 26  */

 27 public class TestWordToHtml {

 28 

 29     public static  final String STORAGEPATH="C://works//files//";

 30     public static  final String IP="192.168.30.222";

 31     public static  final String PORT="8010";

 32     public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {

 33         TestWordToHtml wt=new TestWordToHtml();

 34         //wt.Word2003ToHtml("甲骨文考证.doc");

 35         wt.Word2007ToHtml("甲骨文考证.docx");

 36 

 37     }

 38       

 39      /**

 40      * 2003版本word转换成html

 41      * @throws IOException

 42      * @throws TransformerException

 43      * @throws ParserConfigurationException

 44      */

 45     public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {

 46        

 47         final String imagepath = STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片  图片会保存在此路径

 48         final String strRanString=getRandomNum();

 49         String filepath =STORAGEPATH;

 50         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";

 51         final String file = filepath + fileName;

 52         InputStream input = new FileInputStream(new File(file));

 53         HWPFDocument wordDocument = new HWPFDocument(input);

 54         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

 55         //设置图片存放的位置

 56         wordToHtmlConverter.setPicturesManager(new PicturesManager() {

 57             public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {

 58                 File imgPath = new File(imagepath);

 59                 if(!imgPath.exists()){//图片目录不存在则创建

 60                     imgPath.mkdirs();

 61                 }

 62                 

 63                 File file = new File(imagepath +strRanString+suggestedName);

 64                 try {

 65                     OutputStream os = new FileOutputStream(file);

 66                     os.write(content);

 67                     os.close();

 68                 } catch (FileNotFoundException e) {

 69                     e.printStackTrace();

 70                 } catch (IOException e) {

 71                     e.printStackTrace();

 72                 }

 73                 

 74                 return  "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;

 75                // return imagepath +strRanString+suggestedName;

 76             }

 77         });

 78         

 79         //解析word文档

 80         wordToHtmlConverter.processDocument(wordDocument);

 81         Document htmlDocument = wordToHtmlConverter.getDocument();

 82         

 83         File htmlFile = new File(filepath +strRanString+htmlName);

 84         OutputStream outStream = new FileOutputStream(htmlFile);

 85         

 86 

 87         DOMSource domSource = new DOMSource(htmlDocument);

 88         StreamResult streamResult = new StreamResult(outStream);

 89 

 90         TransformerFactory factory = TransformerFactory.newInstance();

 91         Transformer serializer = factory.newTransformer();

 92         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

 93         serializer.setOutputProperty(OutputKeys.INDENT, "yes");

 94         serializer.setOutputProperty(OutputKeys.METHOD, "html");

 95         

 96         serializer.transform(domSource, streamResult);

 97         outStream.close();

 98         

 99         System.out.println("生成html文件路径:""http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);

100     }

101 

102     /**

103      * 2007版本word转换成html

104      * @throws IOException

105      */

106     public void Word2007ToHtml(String fileName) throws IOException {

107         

108        final String strRanString=getRandomNum();

109         

110         String filepath = STORAGEPATH+strRanString;

111         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";

112         File f = new File(STORAGEPATH+fileName);  

113         if (!f.exists()) {  

114             System.out.println("Sorry File does not Exists!");  

115         } else {  

116             if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {  

117                 try {

118                     // 1) 加载word文档生成 XWPFDocument对象  

119                     InputStream in = new FileInputStream(f);  

120                     XWPFDocument document = new XWPFDocument(in);  

121       

122                     // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)  

123                     File imageFolderFile = new File(filepath);  

124                     XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));  

125                     options.setExtractor(new FileImageExtractor(imageFolderFile));  

126                     options.URIResolver(new IURIResolver() {

127                         public String resolve(String uri) {

128                             //http://192.168.30.222:8010//uploadFile/....

129                             return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;

130                         }

131                     });

132                     

133                     options.setIgnoreStylesIfUnused(false);  

134                     options.setFragment(true);  

135                       

136                     // 3) 将 XWPFDocument转换成XHTML  

137                     OutputStream out = new FileOutputStream(new File(filepath + htmlName));  

138                     IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();

139                     converter.convert(document,out, options);

140                     //XHTMLConverter.getInstance().convert(document, out, options);  

141                     System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);

142                 } catch (Exception e) {

143                     e.printStackTrace();

144                 }

145             

146             } else {  

147                 System.out.println("Enter only MS Office 2007+ files");  

148             }  

149         }  

150     }  

151 

152      /**

153      *功能说明:生成时间戳

154      *创建人:zsq

155      *创建时间:2019年12月7日 下午2:37:09

156      *

157      */

158      public static String getRandomNum(){

159          Date dt = new Date();

160          SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");  

161          String str=sdf.format(dt);

162          return str;

163      }

164      

165    }

로그인 후 복사

二:Java实现将Pdf转换为html

  1: 引入依赖

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

 1 <dependency>

 2             <groupId>net.sf.cssbox</groupId>

 3             <artifactId>pdf2dom</artifactId>

 4             <version>1.7</version>

 5         </dependency> 

 6         <dependency>

 7             <groupId>org.apache.pdfbox</groupId>

 8             <artifactId>pdfbox</artifactId>

 9             <version>2.0.12</version>

10         </dependency>

11         <dependency>

12             <groupId>org.apache.pdfbox</groupId>

13             <artifactId>pdfbox-tools</artifactId>

14             <version>2.0.12</version>

15  </dependency>

16

로그인 후 복사

2:代码Demo

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

 public class PdfToHtml {

 

 3   /*

 4     pdf转换html

 5      */

 6     public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath)  {

 7        // String outputPath = "C:\\works\\files\\ZSQ保密知识测试题库.html";

 8     9        //try() 写在()里面会自动关闭流

10         try{

11             BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));

12             //加载PDF文档

13             //PDDocument document = PDDocument.load(bytes);

14             PDDocument document = PDDocument.load(new File(inPdfPath));

15             PDFDomTree pdfDomTree = new PDFDomTree();

16             pdfDomTree.writeText(document,out);

17         } catch (Exception e) {

18             e.printStackTrace();

19         }

20     }

21 

22     public static void main(String[] args) throws IOException {

23         PdfToHtml ph=new PdfToHtml();

24         String pdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";

25         String outputPath="C:\\works\\files\\武研中心行政考勤制度.html";

26         ph.pdfToHtmlTest(pdfPath,outputPath);

27   }

28 

29 }

로그인 후 복사

三:Java实现将TXT转换为html

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

 1  /*

 2      * txt文档转html

 3        filePath:txt原文件路径

 4        htmlPosition:转化后生成的html路径

 5     */

 6     public static void txtToHtml(String filePath, String htmlPosition) {

 7         try {

 8             //String encoding = "GBK";

 9             File file = new File(filePath);

10             if (file.isFile() && file.exists()) { // 判断文件是否存在

11                 InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");

12                 // 考虑到编码格式

13                 BufferedReader bufferedReader = new BufferedReader(read);

14                 // 写文件

15                 FileOutputStream fos = new FileOutputStream(new File(htmlPosition));

16                 OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");

17                 BufferedWriter bw = new BufferedWriter(osw);

18                 String lineTxt = null;

19                 while ((lineTxt = bufferedReader.readLine()) != null) {

20                     bw.write("&nbsp&nbsp&nbsp"+lineTxt + "</br>");

21                 }

22                 bw.close();

23                 osw.close();

24                 fos.close();

25                 read.close();

26             } else {

27                 System.out.println("找不到指定的文件");

28             }

29         } catch (Exception e) {

30             System.out.println("读取文件内容出错");

31             e.printStackTrace();

32         }

33     }

로그인 후 복사

위 내용은 Word/Pdf/TXT를 HTML로 변환하는 Java 방법의 상세 내용입니다. 자세한 내용은 PHP 중국어 웹사이트의 기타 관련 기사를 참조하세요!

관련 라벨:
본 웹사이트의 성명
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.
인기 튜토리얼
더>
최신 다운로드
더>
웹 효과
웹사이트 소스 코드
웹사이트 자료
프론트엔드 템플릿