1
1
/*
2
2
* Copyright (c) 2007 Henri Sivonen
3
- * Copyright (c) 2013 Mozilla Foundation
3
+ * Copyright (c) 2013-2020 Mozilla Foundation
4
4
*
5
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
6
* copy of this software and associated documentation files (the "Software"),
61
61
public final class HtmlInputStreamReader extends Reader implements
62
62
ByteReadable , Locator , Locator2 {
63
63
64
- private static final int SNIFFING_LIMIT = 1024 ;
64
+ private int sniffingLimit = 1024 ;
65
65
66
66
private final InputStream inputStream ;
67
67
@@ -87,11 +87,9 @@ public final class HtmlInputStreamReader extends Reader implements
87
87
88
88
private boolean charsetBoundaryPassed = false ;
89
89
90
- private final byte [] byteArray = new byte [4096 ]; // Length must be >=
90
+ private byte [] byteArray = new byte [4096 ]; // Length must be >= sniffingLimit
91
91
92
- // SNIFFING_LIMIT
93
-
94
- private final ByteBuffer byteBuffer = ByteBuffer .wrap (byteArray );
92
+ private ByteBuffer byteBuffer = ByteBuffer .wrap (byteArray );
95
93
96
94
private boolean needToNotifyTokenizer = false ;
97
95
@@ -112,18 +110,27 @@ public final class HtmlInputStreamReader extends Reader implements
112
110
/**
113
111
* @param inputStream
114
112
* @param errorHandler
115
- * @param locator
113
+ * @param tokenizer
114
+ * @param driver
115
+ * @param heuristics
116
+ * @param sniffingLimit
116
117
* @throws IOException
117
118
* @throws SAXException
118
119
*/
119
120
public HtmlInputStreamReader (InputStream inputStream ,
120
121
ErrorHandler errorHandler , Tokenizer tokenizer , Driver driver ,
121
- Heuristics heuristics ) throws SAXException , IOException {
122
+ Heuristics heuristics , int sniffingLimit )
123
+ throws SAXException , IOException {
122
124
this .inputStream = inputStream ;
123
125
this .errorHandler = errorHandler ;
124
126
this .tokenizer = tokenizer ;
125
127
this .driver = driver ;
126
128
this .sniffing = true ;
129
+ if (sniffingLimit != -1 ) {
130
+ this .sniffingLimit = sniffingLimit ;
131
+ this .byteArray = new byte [sniffingLimit ];
132
+ this .byteBuffer = ByteBuffer .wrap (byteArray );
133
+ }
127
134
Encoding encoding = (new BomSniffer (this )).sniff ();
128
135
if (encoding == null ) {
129
136
position = 0 ;
@@ -178,6 +185,12 @@ public HtmlInputStreamReader(InputStream inputStream,
178
185
initDecoder ();
179
186
}
180
187
188
+ public HtmlInputStreamReader (InputStream inputStream ,
189
+ ErrorHandler errorHandler , Tokenizer tokenizer , Driver driver ,
190
+ Heuristics heuristics ) throws SAXException , IOException {
191
+ this (inputStream , errorHandler , tokenizer , driver , heuristics , -1 );
192
+ }
193
+
181
194
/**
182
195
*
183
196
*/
@@ -237,7 +250,7 @@ public HtmlInputStreamReader(InputStream inputStream,
237
250
if (charsetBoundaryPassed ) {
238
251
readLen = byteArray .length - oldLimit ;
239
252
} else {
240
- readLen = SNIFFING_LIMIT - oldLimit ;
253
+ readLen = sniffingLimit - oldLimit ;
241
254
}
242
255
int num = inputStream .read (byteArray , oldLimit , readLen );
243
256
if (num == -1 ) {
@@ -261,7 +274,7 @@ public HtmlInputStreamReader(InputStream inputStream,
261
274
} else if (cr == CoderResult .UNDERFLOW ) {
262
275
int remaining = byteBuffer .remaining ();
263
276
if (!charsetBoundaryPassed ) {
264
- if (bytesRead + remaining >= SNIFFING_LIMIT ) {
277
+ if (bytesRead + remaining >= sniffingLimit ) {
265
278
needToNotifyTokenizer = true ;
266
279
charsetBoundaryPassed = true ;
267
280
}
@@ -389,12 +402,12 @@ public int readByte() throws IOException {
389
402
throw new IllegalStateException (
390
403
"readByte() called when not in the sniffing state." );
391
404
}
392
- if (position == SNIFFING_LIMIT ) {
405
+ if (position == sniffingLimit ) {
393
406
return -1 ;
394
407
} else if (position < limit ) {
395
408
return byteArray [position ++] & 0xFF ;
396
409
} else {
397
- int num = inputStream .read (byteArray , limit , SNIFFING_LIMIT - limit );
410
+ int num = inputStream .read (byteArray , limit , sniffingLimit - limit );
398
411
if (num == -1 ) {
399
412
return -1 ;
400
413
} else {
0 commit comments