1 package edu.jiangxin.apktoolbox.pdf;
2
3 import com.itextpdf.io.image.ImageDataFactory;
4 import com.itextpdf.kernel.geom.PageSize;
5 import com.itextpdf.kernel.pdf.PdfDocument;
6 import com.itextpdf.kernel.pdf.PdfReader;
7 import com.itextpdf.kernel.pdf.PdfWriter;
8 import com.itextpdf.layout.Document;
9 import com.itextpdf.layout.element.AreaBreak;
10 import com.itextpdf.layout.element.Image;
11 import com.itextpdf.layout.properties.HorizontalAlignment;
12 import org.apache.commons.io.IOUtils;
13 import org.apache.logging.log4j.LogManager;
14 import org.apache.logging.log4j.Logger;
15 import org.apache.pdfbox.Loader;
16 import org.apache.pdfbox.pdmodel.PDDocument;
17 import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
18 import org.apache.pdfbox.pdmodel.PDPage;
19 import org.apache.pdfbox.pdmodel.PDPageTree;
20 import org.apache.pdfbox.text.PDFTextStripper;
21
22 import javax.imageio.ImageIO;
23 import java.awt.image.BufferedImage;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.util.Set;
28
29 public class PdfUtils {
30 private static final Logger LOGGER = LogManager.getLogger(PdfUtils.class.getSimpleName());
31 public static boolean isScannedPdf(File file, int threshold) {
32 int length = 0;
33
34 try (PDDocument document = Loader.loadPDF(file)) {
35 boolean isEncrypted = document.isEncrypted();
36 if (isEncrypted) {
37 document.setAllSecurityToBeRemoved(true);
38 }
39
40 PDFTextStripper stripper = new PDFTextStripper();
41 String text = stripper.getText(document).trim();
42 length = text.length();
43 } catch (IOException e) {
44 LOGGER.error("Error reading PDF file: {}", e.getMessage());
45 return false;
46 }
47 LOGGER.info("Processing file: {}, text size: {}", file.getPath(), length);
48 return length < threshold;
49 }
50
51 public static boolean isEncryptedPdf(File file) {
52 boolean isEncrypted;
53
54 try (PDDocument document = Loader.loadPDF(file)) {
55 isEncrypted = document.isEncrypted();
56 } catch (IOException e) {
57 LOGGER.error("Error reading PDF file: {}", e.getMessage());
58 return false;
59 }
60 LOGGER.info("Processing file: {}, is encrypted: {}", file.getPath(), isEncrypted);
61 return isEncrypted;
62 }
63
64 public static boolean isNonOutlinePdf(File file) {
65 boolean hasOutline = false;
66
67 try (PDDocument document = Loader.loadPDF(file)) {
68 boolean isEncrypted = document.isEncrypted();
69 if (isEncrypted) {
70 document.setAllSecurityToBeRemoved(true);
71 }
72
73 if (document.getDocumentCatalog() != null && document.getDocumentCatalog().getDocumentOutline() != null) {
74 hasOutline = true;
75 }
76 } catch (IOException e) {
77 LOGGER.error("Error reading PDF file: {}", e.getMessage());
78 return false;
79 }
80 LOGGER.info("Processing file: {}, has outline: {}", file.getPath(), hasOutline);
81 return !hasOutline;
82 }
83
84 public static boolean hasAnnotations(File file) {
85 boolean hasAnnotations = false;
86
87 try (PDDocument document = Loader.loadPDF(file)) {
88 boolean isEncrypted = document.isEncrypted();
89 if (isEncrypted) {
90 document.setAllSecurityToBeRemoved(true);
91 }
92 PDDocumentCatalog catalog = document.getDocumentCatalog();
93 if (catalog == null) {
94 return false;
95 }
96 PDPageTree pages = document.getDocumentCatalog().getPages();
97 if (pages == null || pages.getCount() == 0) {
98 return false;
99 }
100
101 for (PDPage page : pages) {
102 if (page.getAnnotations() != null && !page.getAnnotations().isEmpty()) {
103 int pageNumber = page.getCOSObject().getInt("PageNumber", 0);
104 String subType = page.getAnnotations().get(0).getSubtype();
105 LOGGER.info("Found annotations on page: {}, subType: {}", pageNumber, subType);
106 if (!subType.equals("Link")) {
107 hasAnnotations = true;
108 break;
109 }
110 }
111 }
112 } catch (IOException e) {
113 LOGGER.error("Error reading PDF file: {}", e.getMessage());
114 return hasAnnotations;
115 }
116 LOGGER.info("Processing file: {}, has annotations: {}", file.getPath(), hasAnnotations);
117 return hasAnnotations;
118 }
119
120 public static void removePassword(File encryptedFile, File targetDir) {
121 try (PDDocument document = Loader.loadPDF(encryptedFile)) {
122 boolean isEncrypted = document.isEncrypted();
123 if (isEncrypted) {
124 document.setAllSecurityToBeRemoved(true);
125 }
126 String targetFilePath = targetDir.getAbsolutePath() + File.separator + encryptedFile.getName();
127 document.save(targetFilePath);
128 LOGGER.info("Remove password success: {}", targetFilePath);
129 } catch (IOException e) {
130 LOGGER.error("Error processing PDF file: {}", e.getMessage());
131 }
132 }
133
134 public static void removePasswordWithIText(File encryptedFile, File targetDir) {
135 String targetFilePath = targetDir.getAbsolutePath() + File.separator + encryptedFile.getName();
136 PdfReader reader = null;
137 PdfDocument pdfDoc = null;
138 PdfWriter writer = null;
139 try {
140 reader = new PdfReader(encryptedFile);
141 reader.setUnethicalReading(true);
142 writer = new PdfWriter(targetFilePath);
143 pdfDoc = new PdfDocument(reader, writer);
144 } catch (IOException e) {
145 LOGGER.error("Error processing PDF file: {}", e.getMessage());
146 } finally {
147 IOUtils.closeQuietly(writer);
148 IOUtils.closeQuietly(pdfDoc);
149 IOUtils.closeQuietly(reader);
150 }
151 }
152
153 public static int getPageCount(File file) {
154 int pageCount = 0;
155
156 try (PDDocument document = Loader.loadPDF(file)) {
157 boolean isEncrypted = document.isEncrypted();
158 if (isEncrypted) {
159 document.setAllSecurityToBeRemoved(true);
160 }
161 pageCount = document.getNumberOfPages();
162 } catch (IOException e) {
163 LOGGER.error("Error reading PDF file: {}", e.getMessage());
164 return 0;
165 }
166 LOGGER.info("Processing file: {}, page count: {}", file.getPath(), pageCount);
167 return pageCount;
168 }
169
170 public static void imagesToPdf(Set<File> images, File targetFile) {
171 PdfDocument pdfDoc = null;
172 PdfWriter writer = null;
173 Document doc = null;
174 try {
175 writer = new PdfWriter(new FileOutputStream(targetFile));
176 pdfDoc = new PdfDocument(writer);
177 doc = new Document(pdfDoc);
178
179 for (File img : images) {
180 BufferedImage bufferedImage = ImageIO.read(img);
181 float width = bufferedImage.getWidth();
182 float height = bufferedImage.getHeight();
183
184 PageSize pageSize = new PageSize(width, height);
185 pdfDoc.addNewPage(pageSize);
186
187 Image image = new Image(ImageDataFactory.create(img.getAbsolutePath()));
188 image.setFixedPosition(pdfDoc.getNumberOfPages(), 0, 0, width);
189
190 doc.setMargins(0, 0, 0, 0);
191 doc.add(image);
192 }
193 } catch (IOException e) {
194 LOGGER.error("Error processing PDF file: {}", e.getMessage());
195 } finally {
196 IOUtils.closeQuietly(doc);
197 IOUtils.closeQuietly(pdfDoc);
198 IOUtils.closeQuietly(writer);
199 }
200 }
201 }