Skip to content

Commit cd53ca7

Browse files
Make HtmlInputStreamReader sniffing limit settable
This change makes the sniffing limit in HtmlInputStreamReader settable. Without this change, the HtmlInputStreamReader sniffing limit is hardcoded to 1024 — and in the context of testing, that has the effect of limiting HtmlInputStreamReader to only being useful for testing expected output of the meta prescan. So this change makes it possible for HtmlInputStreamReader to also be used for testing the results for the state where the expected character encoding is not limited to what can be determined by checking the first 1024 bytes of the input stream.
1 parent 3f48926 commit cd53ca7

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

src/nu/validator/htmlparser/io/HtmlInputStreamReader.java

+25-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2007 Henri Sivonen
3-
* Copyright (c) 2013 Mozilla Foundation
3+
* Copyright (c) 2013-2020 Mozilla Foundation
44
*
55
* Permission is hereby granted, free of charge, to any person obtaining a
66
* copy of this software and associated documentation files (the "Software"),
@@ -61,7 +61,7 @@
6161
public final class HtmlInputStreamReader extends Reader implements
6262
ByteReadable, Locator, Locator2 {
6363

64-
private static final int SNIFFING_LIMIT = 1024;
64+
private int sniffingLimit = 1024;
6565

6666
private final InputStream inputStream;
6767

@@ -87,11 +87,9 @@ public final class HtmlInputStreamReader extends Reader implements
8787

8888
private boolean charsetBoundaryPassed = false;
8989

90-
private final byte[] byteArray = new byte[4096]; // Length must be >=
90+
private byte[] byteArray = new byte[4096]; // Length must be >= sniffingLimit
9191

92-
// SNIFFING_LIMIT
93-
94-
private final ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray);
92+
private ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray);
9593

9694
private boolean needToNotifyTokenizer = false;
9795

@@ -112,18 +110,27 @@ public final class HtmlInputStreamReader extends Reader implements
112110
/**
113111
* @param inputStream
114112
* @param errorHandler
115-
* @param locator
113+
* @param tokenizer
114+
* @param driver
115+
* @param heuristics
116+
* @param sniffingLimit
116117
* @throws IOException
117118
* @throws SAXException
118119
*/
119120
public HtmlInputStreamReader(InputStream inputStream,
120121
ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
121-
Heuristics heuristics) throws SAXException, IOException {
122+
Heuristics heuristics, int sniffingLimit)
123+
throws SAXException, IOException {
122124
this.inputStream = inputStream;
123125
this.errorHandler = errorHandler;
124126
this.tokenizer = tokenizer;
125127
this.driver = driver;
126128
this.sniffing = true;
129+
if (sniffingLimit != -1) {
130+
this.sniffingLimit = sniffingLimit;
131+
this.byteArray = new byte[sniffingLimit];
132+
this.byteBuffer = ByteBuffer.wrap(byteArray);
133+
}
127134
Encoding encoding = (new BomSniffer(this)).sniff();
128135
if (encoding == null) {
129136
position = 0;
@@ -178,6 +185,12 @@ public HtmlInputStreamReader(InputStream inputStream,
178185
initDecoder();
179186
}
180187

188+
public HtmlInputStreamReader(InputStream inputStream,
189+
ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver,
190+
Heuristics heuristics) throws SAXException, IOException {
191+
this(inputStream, errorHandler, tokenizer, driver, heuristics, -1);
192+
}
193+
181194
/**
182195
*
183196
*/
@@ -237,7 +250,7 @@ public HtmlInputStreamReader(InputStream inputStream,
237250
if (charsetBoundaryPassed) {
238251
readLen = byteArray.length - oldLimit;
239252
} else {
240-
readLen = SNIFFING_LIMIT - oldLimit;
253+
readLen = sniffingLimit - oldLimit;
241254
}
242255
int num = inputStream.read(byteArray, oldLimit, readLen);
243256
if (num == -1) {
@@ -261,7 +274,7 @@ public HtmlInputStreamReader(InputStream inputStream,
261274
} else if (cr == CoderResult.UNDERFLOW) {
262275
int remaining = byteBuffer.remaining();
263276
if (!charsetBoundaryPassed) {
264-
if (bytesRead + remaining >= SNIFFING_LIMIT) {
277+
if (bytesRead + remaining >= sniffingLimit) {
265278
needToNotifyTokenizer = true;
266279
charsetBoundaryPassed = true;
267280
}
@@ -389,12 +402,12 @@ public int readByte() throws IOException {
389402
throw new IllegalStateException(
390403
"readByte() called when not in the sniffing state.");
391404
}
392-
if (position == SNIFFING_LIMIT) {
405+
if (position == sniffingLimit) {
393406
return -1;
394407
} else if (position < limit) {
395408
return byteArray[position++] & 0xFF;
396409
} else {
397-
int num = inputStream.read(byteArray, limit, SNIFFING_LIMIT - limit);
410+
int num = inputStream.read(byteArray, limit, sniffingLimit - limit);
398411
if (num == -1) {
399412
return -1;
400413
} else {

0 commit comments

Comments
 (0)