1 package edu.jiangxin.apktoolbox.file.core;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.nio.charset.Charset;
7 import java.util.concurrent.CountDownLatch;
8
9 import org.apache.commons.lang3.StringUtils;
10 import org.apache.logging.log4j.LogManager;
11 import org.apache.logging.log4j.Logger;
12 import org.mozilla.universalchardet.UniversalDetector;
13
14 import info.monitorenter.cpdetector.io.ASCIIDetector;
15 import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
16 import info.monitorenter.cpdetector.io.JChardetFacade;
17 import info.monitorenter.cpdetector.io.ParsingDetector;
18 import info.monitorenter.cpdetector.io.UnicodeDetector;
19
20
21
22
23
24
25 public class EncoderDetector {
26 private static final Logger logger = LogManager.getLogger(EncoderDetector.class.getSimpleName());
27 private static String[] detectorCharsets;
28 private static CountDownLatch countDownLatch;
29
30
31
32
33
34
35
36 public static String judgeFile(String fileName) {
37 File file = new File(fileName);
38 if (!file.exists()) {
39 logger.error("Can't find the file: " + fileName);
40 return null;
41 }
42
43 detectorCharsets = new String[2];
44 countDownLatch = new CountDownLatch(2);
45
46 Thread cpDetectorThread = new Thread(new Runnable() {
47 @Override
48 public void run() {
49 CodepageDetectorProxy cpDetectorProxy = CodepageDetectorProxy.getInstance();
50
51
52 cpDetectorProxy.add(new ParsingDetector(false));
53 cpDetectorProxy.add(JChardetFacade.getInstance());
54 cpDetectorProxy.add(ASCIIDetector.getInstance());
55 cpDetectorProxy.add(UnicodeDetector.getInstance());
56 Charset charset = null;
57 try {
58
59 charset = cpDetectorProxy.detectCodepage(file.toURI().toURL());
60 } catch (IOException e) {
61 logger.error("cpDetector failed", e);
62 detectorCharsets[0] = null;
63 }
64 if (charset != null) {
65 detectorCharsets[0] = charset.name();
66 } else {
67 detectorCharsets[0] = null;
68 }
69 countDownLatch.countDown();
70 }
71 });
72
73 Thread universalDetectorThread = new Thread(new Runnable() {
74 @Override
75 public void run() {
76 UniversalDetector universalDetector = new UniversalDetector(null);
77 byte[] buf = new byte[4096];
78 FileInputStream fis = null;
79 try {
80 fis = new FileInputStream(file);
81 int nread;
82 while ((nread = fis.read(buf)) > 0 && !universalDetector.isDone()) {
83 universalDetector.handleData(buf, 0, nread);
84 }
85 universalDetector.dataEnd();
86 detectorCharsets[1] = universalDetector.getDetectedCharset();
87 } catch (IOException e) {
88 logger.error("universalDetector failed", e);
89 detectorCharsets[1] = null;
90 } finally {
91 if (fis != null) {
92 try {
93 fis.close();
94 } catch (IOException e) {
95 logger.error("close fis failed", e);
96 }
97 }
98 }
99 countDownLatch.countDown();
100 }
101 });
102 cpDetectorThread.start();
103 universalDetectorThread.start();
104 try {
105 countDownLatch.await();
106 } catch (InterruptedException e) {
107 logger.error("await InterruptedException");
108 Thread.currentThread().interrupt();
109 }
110 return electBestCharset();
111 }
112
113 private static String electBestCharset() {
114 StringBuilder sb = new StringBuilder();
115 sb.append("cpDetector: ").append(detectorCharsets[0]).append(", universalDetector: ")
116 .append(detectorCharsets[1]);
117 if (StringUtils.isEmpty(detectorCharsets[0]) && StringUtils.isEmpty(detectorCharsets[1])) {
118 logger.warn(sb.toString());
119 return null;
120 } else if (StringUtils.isEmpty(detectorCharsets[0]) && StringUtils.isNotEmpty(detectorCharsets[1])) {
121 logger.info(sb.toString());
122 return detectorCharsets[1];
123 } else if (StringUtils.isNotEmpty(detectorCharsets[0]) && StringUtils.isEmpty(detectorCharsets[1])) {
124 logger.info(sb.toString());
125 return detectorCharsets[0];
126 } else if (detectorCharsets[0].equals(detectorCharsets[1])) {
127 logger.info(sb.toString());
128 return detectorCharsets[1];
129 } else {
130 logger.warn(sb.toString());
131 return detectorCharsets[1];
132 }
133
134 }
135 }