Skip to content

Commit 561633a

Browse files
Make EncodingTester usable in testing parsed state
This change updates EncodingTester to make it test the result for cases when the expected character encoding is not limited to what can be determined by checking only the first 1024 bytes of the input stream. Otherwise, without this change, EncodingTester is limited to only being useful for testing the output of the meta prescan. This change also allows EncodingTester to be given a directory name rather than a list of files (or pathname with a shell wildcard). And when given a directory name, it recurses the directory looking for *.dat files, and then run the tests from those files. Without that change, we can’t easily run EncodingTester from AntRun in Maven — because we can’t use shell wildcards in the “arg” value for the Ant “java” task, and any list of files we otherwise construct within Maven ends up getting put into the java arg value as a single string (single argument) — including the spaces between filenames.
1 parent cd53ca7 commit 561633a

File tree

1 file changed

+47
-8
lines changed

1 file changed

+47
-8
lines changed

test-src/nu/validator/htmlparser/test/EncodingTester.java

+47-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2007 Henri Sivonen
3-
* Copyright (c) 2008 Mozilla Foundation
3+
* Copyright (c) 2008-2020 Mozilla Foundation
44
*
55
* Permission is hereby granted, free of charge, to any person obtaining a
66
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +23,7 @@
2323

2424
package nu.validator.htmlparser.test;
2525

26+
import java.io.File;
2627
import java.io.FileInputStream;
2728
import java.io.IOException;
2829
import java.io.InputStream;
@@ -36,6 +37,8 @@
3637

3738
public class EncodingTester {
3839

40+
protected static int SNIFFING_LIMIT = 16384;
41+
3942
private final InputStream aggregateStream;
4043

4144
private final StringBuilder builder = new StringBuilder();
@@ -47,6 +50,13 @@ public EncodingTester(InputStream aggregateStream) {
4750
this.aggregateStream = aggregateStream;
4851
}
4952

53+
/**
54+
* @param aggregateStream
55+
*/
56+
public EncodingTester() {
57+
this.aggregateStream = null;
58+
}
59+
5060
private void runTests() throws IOException, SAXException {
5161
while (runTest()) {
5262
// spin
@@ -59,7 +69,7 @@ private boolean runTest() throws IOException, SAXException {
5969
}
6070
UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
6171
HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
62-
null, null, Heuristics.NONE);
72+
null, null, Heuristics.NONE, SNIFFING_LIMIT);
6373
Charset charset = reader.getCharset();
6474
stream.close();
6575
if (skipLabel()) {
@@ -107,16 +117,45 @@ private boolean skipLabel() throws IOException {
107117
}
108118
}
109119

120+
private void recurseDirectory(File directory) throws Throwable {
121+
if ("scripted".equals(directory.getName())) {
122+
return;
123+
}
124+
if (directory.canRead()) {
125+
File[] files = directory.listFiles();
126+
for (File file : files) {
127+
if (file.isDirectory()) {
128+
recurseDirectory(file);
129+
} else {
130+
if (!file.getName().endsWith(".dat")) {
131+
continue;
132+
}
133+
EncodingTester tester = new EncodingTester(
134+
new FileInputStream(file.getPath().toString()));
135+
tester.runTests();
136+
}
137+
}
138+
}
139+
}
140+
110141
/**
111142
* @param args
112-
* @throws SAXException
113-
* @throws IOException
143+
* @throws Throwable
114144
*/
115-
public static void main(String[] args) throws IOException, SAXException {
145+
public static void main(String[] args) throws Throwable {
116146
for (int i = 0; i < args.length; i++) {
117-
EncodingTester tester = new EncodingTester(new FileInputStream(
118-
args[i]));
119-
tester.runTests();
147+
File file = new File(args[i]);
148+
if (file.isDirectory()) {
149+
EncodingTester tester = new EncodingTester();
150+
tester.recurseDirectory(file);
151+
} else {
152+
if (!file.getName().endsWith(".dat")) {
153+
return;
154+
}
155+
EncodingTester tester = new EncodingTester(
156+
new FileInputStream(file.getPath().toString()));
157+
tester.runTests();
158+
}
120159
}
121160
}
122161

0 commit comments

Comments
 (0)