1 package edu.jiangxin.apktoolbox.word;
2
3 import org.apache.logging.log4j.LogManager;
4 import org.apache.logging.log4j.Logger;
5 import org.apache.poi.hpsf.SummaryInformation;
6 import org.apache.poi.hwpf.HWPFDocument;
7 import org.apache.poi.xwpf.usermodel.XWPFDocument;
8
9 import java.io.File;
10 import java.io.FileInputStream;
11 import java.io.IOException;
12
13 public class WordUtils {
14
15 private static final Logger LOGGER = LogManager.getLogger(WordUtils.class.getSimpleName());
16
17 private WordUtils() {
18
19 }
20
21
22
23
24
25
26
27
28
29
30 public static int getPageCount(File file) {
31 if (file == null || !file.exists() || !file.isFile()) {
32 LOGGER.warn("Invalid file: {}", file);
33 return 0;
34 }
35
36 String name = file.getName().toLowerCase();
37 if (name.endsWith(".doc")) {
38 return getDocPageCount(file);
39 } else if (name.endsWith(".docx")) {
40 return getDocxPageCount(file);
41 } else {
42 LOGGER.warn("Unsupported file type: {}", file.getPath());
43 return 0;
44 }
45 }
46
47 private static int getDocPageCount(File file) {
48 try (FileInputStream fis = new FileInputStream(file);
49 HWPFDocument document = new HWPFDocument(fis)) {
50 SummaryInformation si = document.getSummaryInformation();
51 int pageCount = (si != null) ? si.getPageCount() : 0;
52 if (pageCount <= 0) {
53 LOGGER.info("Page count <= 0 for .doc file: {}, returning 0", file.getPath());
54 return 0;
55 }
56 LOGGER.info("Processing .doc file: {}, page count: {}", file.getPath(), pageCount);
57 return pageCount;
58 } catch (IOException e) {
59 LOGGER.error("Error reading .doc file: {}, message: {}", file.getPath(), e.getMessage());
60 return 0;
61 }
62 }
63
64 private static int getDocxPageCount(File file) {
65 try (FileInputStream fis = new FileInputStream(file);
66 XWPFDocument document = new XWPFDocument(fis)) {
67
68 int pageCount = 0;
69 if (document.getProperties() != null
70 && document.getProperties().getExtendedProperties() != null
71 && document.getProperties().getExtendedProperties().getUnderlyingProperties() != null) {
72 pageCount = document.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();
73 }
74
75 if (pageCount <= 0) {
76 String revision = null;
77 if (document.getProperties() != null
78 && document.getProperties().getCoreProperties() != null) {
79 revision = document.getProperties().getCoreProperties().getRevision();
80 }
81 if (revision != null && !revision.isEmpty()) {
82 try {
83 pageCount = Integer.parseInt(revision);
84 } catch (NumberFormatException e) {
85 LOGGER.warn("Cannot parse revision as page count for .docx file: {}", file.getPath());
86 pageCount = 0;
87 }
88 }
89 }
90 if (pageCount <= 0) {
91 LOGGER.info("Page count <= 0 for .docx file: {}, returning 0", file.getPath());
92 return 0;
93 }
94 LOGGER.info("Processing .docx file: {}, page count: {}", file.getPath(), pageCount);
95 return pageCount;
96 } catch (IOException e) {
97 LOGGER.error("Error reading .docx file: {}, message: {}", file.getPath(), e.getMessage());
98 return 0;
99 }
100 }
101 }