Skip to content
This repository was archived by the owner on Feb 14, 2024. It is now read-only.

Commit 65b1b81

Browse files
author
tomstrummer
committed
GMOD-91 Added built-in DTD catalog for HTML/XHTML doctypes.
git-svn-id: http://svn.codehaus.org/gmod/httpbuilder/trunk@757 f908d50e-8e36-0410-ac0a-93754bd5008d
1 parent 62dde80 commit 65b1b81

17 files changed

+10816
-24
lines changed

Diff for: pom.xml

+35-16
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,27 @@
5353
<groupId>org.codehaus.groovy</groupId>
5454
<artifactId>groovy</artifactId>
5555
<version>[1.5,)</version>
56+
<exclusions>
57+
<exclusion>
58+
<groupId>org.apache.ant</groupId>
59+
<artifactId>ant</artifactId>
60+
</exclusion>
61+
<exclusion>
62+
<groupId>jline</groupId>
63+
<artifactId>jline</artifactId>
64+
</exclusion>
65+
</exclusions>
5666
</dependency>
5767
<dependency>
5868
<groupId>net.sourceforge.nekohtml</groupId>
5969
<artifactId>nekohtml</artifactId>
6070
<version>1.9.9</version>
6171
</dependency>
72+
<dependency>
73+
<groupId>xml-resolver</groupId>
74+
<artifactId>xml-resolver</artifactId>
75+
<version>1.2</version>
76+
</dependency>
6277
<!-- Not yet used for testing:
6378
<dependency>
6479
<groupId>org.simpleframework</groupId>
@@ -134,21 +149,25 @@
134149
</executions>
135150
</plugin>
136151
<plugin>
137-
<groupId>org.apache.maven.plugins</groupId>
138-
<artifactId>maven-surefire-plugin</artifactId>
139-
<configuration>
140-
<systemProperties>
141-
<property>
142-
<name>twitter.user</name>
143-
<value>${twitter.user}</value>
144-
</property>
145-
<property>
146-
<name>twitter.passwd</name>
147-
<value>${twitter.passwd}</value>
148-
</property>
149-
</systemProperties>
150-
</configuration>
151-
</plugin>
152+
<groupId>org.apache.maven.plugins</groupId>
153+
<artifactId>maven-surefire-plugin</artifactId>
154+
<configuration>
155+
<systemProperties>
156+
<property>
157+
<name>xml.catalog.ignoreMissing</name>
158+
<value>false</value>
159+
</property>
160+
<property>
161+
<name>twitter.user</name>
162+
<value>${twitter.user}</value>
163+
</property>
164+
<property>
165+
<name>twitter.passwd</name>
166+
<value>${twitter.passwd}</value>
167+
</property>
168+
</systemProperties>
169+
</configuration>
170+
</plugin>
152171
<plugin>
153172
<groupId>org.codehaus.mojo</groupId>
154173
<artifactId>cobertura-maven-plugin</artifactId>
@@ -346,4 +365,4 @@
346365
</developer>
347366
</developers>
348367

349-
</project>
368+
</project>

Diff for: src/main/java/groovyx/net/http/ParserRegistry.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,12 @@
4848
import org.apache.http.HttpResponse;
4949
import org.apache.http.NameValuePair;
5050
import org.apache.http.client.utils.URLEncodedUtils;
51+
import org.apache.xml.resolver.tools.CatalogResolver;
5152
import org.codehaus.groovy.runtime.DefaultGroovyMethods;
5253
import org.codehaus.groovy.runtime.MethodClosure;
53-
import org.cyberneko.html.parsers.SAXParser;
54+
import org.xml.sax.EntityResolver;
5455
import org.xml.sax.SAXException;
56+
import org.xml.sax.XMLReader;
5557

5658

5759
/**
@@ -85,6 +87,8 @@ public class ParserRegistry {
8587

8688
protected final Log log = LogFactory.getLog( getClass() );
8789

90+
EntityResolver catalogResolver = new CatalogResolver();
91+
8892
/**
8993
* Helper method to get the charset from the response. This should be done
9094
* when manually parsing any text response to ensure it is decoded using the
@@ -168,8 +172,9 @@ public Map<String,String> parseForm( HttpResponse resp ) throws IOException {
168172
* @throws SAXException
169173
*/
170174
public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException {
171-
return new XmlSlurper( new org.cyberneko.html.parsers.SAXParser() )
172-
.parse( parseText( resp ) );
175+
XMLReader p = new org.cyberneko.html.parsers.SAXParser();
176+
p.setEntityResolver( this.catalogResolver );
177+
return new XmlSlurper( p ).parse( parseText( resp ) );
173178
}
174179

175180
/**
@@ -183,7 +188,9 @@ public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXExcepti
183188
* @throws ParserConfigurationException
184189
*/
185190
public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException {
186-
return new XmlSlurper().parse( parseText( resp ) );
191+
XmlSlurper xml = new XmlSlurper();
192+
xml.setEntityResolver( this.catalogResolver );
193+
return xml.parse( parseText( resp ) );
187194
}
188195

189196
/**

Diff for: src/main/resources/CatalogManager.properties

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Catalogs are relative to this properties file
2+
relative-catalogs false
3+
# Catalog list
4+
catalogs catalog/html.xml
5+
#verbosity 100

Diff for: src/main/resources/catalog/frameset.dtd

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<!--
2+
This is the HTML 4.01 Frameset DTD, which should be
3+
used for documents with frames. This DTD is identical
4+
to the HTML 4.01 Transitional DTD except for the
5+
content model of the "HTML" element: in frameset
6+
documents, the "FRAMESET" element replaces the "BODY"
7+
element.
8+
9+
Draft: $Date: 1999/12/24 23:37:45 $
10+
11+
Authors:
12+
Dave Raggett <[email protected]>
13+
Arnaud Le Hors <[email protected]>
14+
Ian Jacobs <[email protected]>
15+
16+
Further information about HTML 4.01 is available at:
17+
18+
http://www.w3.org/TR/1999/REC-html401-19991224.
19+
-->
20+
<!ENTITY % HTML.Version "-//W3C//DTD HTML 4.01 Frameset//EN"
21+
-- Typical usage:
22+
23+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
24+
"http://www.w3.org/TR/html4/frameset.dtd">
25+
<html>
26+
<head>
27+
...
28+
</head>
29+
<frameset>
30+
...
31+
</frameset>
32+
</html>
33+
-->
34+
35+
<!ENTITY % HTML.Frameset "INCLUDE">
36+
<!ENTITY % HTML4.dtd PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
37+
%HTML4.dtd;

Diff for: src/main/resources/catalog/html.xml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?xml version="1.0"?>
2+
<!DOCTYPE catalog PUBLIC "-//OASIS/DTD Entity Resolution XML Catalog V1.0//EN"
3+
"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd">
4+
5+
<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog" prefer="public">
6+
7+
<!-- XHTML 1.0 -->
8+
<public publicId="-//W3C//DTD XHTML 1.0 Strict//EN"
9+
uri="xhtml1-strict.dtd" />
10+
<public publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"
11+
uri="xhtml1-transitional.dtd" />
12+
<public publicId="-//W3C//DTD XHTML 1.0 Frameset//EN"
13+
uri="xhtml1-frameset.dtd" />
14+
<rewriteSystem systemIdStartString="http://www.w3.org/TR/xhtml1/DTD/"
15+
rewritePrefix="./"/>
16+
17+
<!-- XHTML 1.1 -->
18+
<public publicId='-//W3C//DTD XHTML 1.1//EN' uri='xhtml11-flat.dtd' />
19+
20+
<!-- HTML 4 -->
21+
<public publicId='-//W3C//DTD HTML 4.01//EN' uri='strict.dtd' />
22+
<public publicId='-//W3C//DTD HTML 4.01 Transitional//EN' uri='loose.dtd' />
23+
<public publicId='-//W3C//DTD HTML 4.01 Frameset//EN' uri='frameset.dtd' />
24+
25+
</catalog>

0 commit comments

Comments
 (0)