Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22888 Enhance XML Processor Configuration for Tools Directory in ICU4J Package #3243

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.XMLConstants;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
Expand Down Expand Up @@ -142,7 +143,16 @@ public static void main(String... args) {
private static Map<String, ReportEntry> parseReport(File reportXmlFile) {
try {
Map<String, ReportEntry> entries = new TreeMap<String, ReportEntry>();
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// Securely configure DocumentBuilderFactory
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
docFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
docFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
docFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
docFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
docFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
docFactory.setNamespaceAware(true);

DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
docBuilder.setEntityResolver(new EntityResolver() {
// Ignores JaCoCo report DTD
public InputSource resolveEntity(String publicId, String systemId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;

Expand Down Expand Up @@ -405,133 +406,148 @@ private boolean isXmlLang (String lang){

private void createRB(String xmlfileName) {

String urls = filenameToURL(xmlfileName);
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(true);
Document doc = null;
try {
String urls = filenameToURL(xmlfileName);
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();

if (xliff10) {
dfactory.setValidating(true);
resources = OLD_RESOURCES;
} else {
try {
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema();

dfactory.setSchema(schema);
} catch (SAXException e) {
System.err.println("Can't create the schema...");
System.exit(-1);
} catch (UnsupportedOperationException e) {
System.err.println("ERROR:\tOne of the schema operations is not supported with this JVM.");
System.err.println("\tIf you are using GNU Java, you should try using the latest Sun JVM.");
System.err.println("\n*Here is the stack trace:");
e.printStackTrace();
System.exit(-1);
}
// Set secure processing features to avoid XXE attacks
Copy link
Contributor

@mihnita mihnita Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please wrap only this new configuration section in a try - catch block?

It is more consistent with what the rest of the method does (there are try blocks starting at lines 433 and 470).
And we don't end up with nested try blocks, and everything indented.

Something like this:

Before:

        DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
        dfactory.setNamespaceAware(true);
        Document doc = null;

After:

        DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();

        try {
            // Set secure processing features to avoid XXE attacks
            dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
            dfactory.setNamespaceAware(true);

            // Disable access to external DTDs and entities to mitigate XXE attacks
            dfactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            dfactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
            dfactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            dfactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        } catch (ParserConfigurationException e) {
            System.err.println("ERROR: Parser configuration error: " + e.getMessage());
            System.exit(-1);
        }        

        Document doc = null;

Thank you,
Mihai

dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
dfactory.setNamespaceAware(true);

resources = NEW_RESOURCES;
}
// Disable access to external DTDs and entities to mitigate XXE attacks
dfactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
dfactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
dfactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
dfactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

ErrorHandler nullHandler = new ErrorHandler() {
@Override
public void warning(SAXParseException e) throws SAXException {
Document doc = null;

}
@Override
public void error(SAXParseException e) throws SAXException {
System.err.println("The XLIFF document is invalid, please check it first: ");
System.err.println("Line "+e.getLineNumber()+", Column "+e.getColumnNumber());
System.err.println("Error: " + e.getMessage());
System.exit(-1);
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
};
if (xliff10) {
dfactory.setValidating(true);
resources = OLD_RESOURCES;
} else {
try {
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema();

try {
DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
docBuilder.setErrorHandler(nullHandler);
doc = docBuilder.parse(new InputSource(urls));
dfactory.setSchema(schema);
} catch (SAXException e) {
System.err.println("Can't create the schema...");
System.exit(-1);
} catch (UnsupportedOperationException e) {
System.err.println("ERROR:\tOne of the schema operations is not supported with this JVM.");
System.err.println("\tIf you are using GNU Java, you should try using the latest Sun JVM.");
System.err.println("\n*Here is the stack trace:");
e.printStackTrace();
System.exit(-1);
}

NodeList nlist = doc.getElementsByTagName(FILES);
if(nlist.getLength()>1){
throw new RuntimeException("Multiple <file> elements in the XLIFF file not supported.");
resources = NEW_RESOURCES;
}

// get the value of source-language attribute
String sourceLang = getLanguageName(doc, SOURCELANGUAGE);
// get the value of target-language attribute
String targetLang = getLanguageName(doc, TARGETLANGUAGE);

// get the list of <source> elements
NodeList sourceList = doc.getElementsByTagName(SOURCE);
// get the list of target elements
NodeList targetList = doc.getElementsByTagName(TARGET);

// check if the xliff file has source elements in multiple languages
// the source-language value should be the same as xml:lang values
// of all the source elements.
String xmlSrcLang = checkLangAttribute(sourceList, sourceLang);

// check if the xliff file has target elements in multiple languages
// the target-language value should be the same as xml:lang values
// of all the target elements.
String xmlTargetLang = checkLangAttribute(targetList, targetLang);

// Create the Resource linked list which will hold the
// source and target bundles after parsing
Resource[] set = new Resource[2];
set[0] = new ResourceTable();
set[1] = new ResourceTable();
ErrorHandler nullHandler = new ErrorHandler() {
@Override
public void warning(SAXParseException e) throws SAXException {

// lenient extraction of source language
if(makeSourceRoot == true){
set[0].name = ROOT;
}else if(sourceLang!=null){
set[0].name = sourceLang.replace('-','_');
}else{
if(xmlSrcLang != null){
set[0].name = xmlSrcLang.replace('-','_');
}else{
System.err.println("ERROR: Could not figure out the source language of the file. Please check the XLIFF file.");
}
@Override
public void error(SAXParseException e) throws SAXException {
System.err.println("The XLIFF document is invalid, please check it first: ");
System.err.println("Line "+e.getLineNumber()+", Column "+e.getColumnNumber());
System.err.println("Error: " + e.getMessage());
System.exit(-1);
}
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
};

// lenient extraction of the target language
if(targetLang!=null){
set[1].name = targetLang.replace('-','_');
}else{
if(xmlTargetLang!=null){
set[1].name = xmlTargetLang.replace('-','_');
try {
DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
docBuilder.setErrorHandler(nullHandler);
doc = docBuilder.parse(new InputSource(urls));

NodeList nlist = doc.getElementsByTagName(FILES);
if(nlist.getLength()>1){
throw new RuntimeException("Multiple <file> elements in the XLIFF file not supported.");
}

// get the value of source-language attribute
String sourceLang = getLanguageName(doc, SOURCELANGUAGE);
// get the value of target-language attribute
String targetLang = getLanguageName(doc, TARGETLANGUAGE);

// get the list of <source> elements
NodeList sourceList = doc.getElementsByTagName(SOURCE);
// get the list of target elements
NodeList targetList = doc.getElementsByTagName(TARGET);

// check if the xliff file has source elements in multiple languages
// the source-language value should be the same as xml:lang values
// of all the source elements.
String xmlSrcLang = checkLangAttribute(sourceList, sourceLang);

// check if the xliff file has target elements in multiple languages
// the target-language value should be the same as xml:lang values
// of all the target elements.
String xmlTargetLang = checkLangAttribute(targetList, targetLang);

// Create the Resource linked list which will hold the
// source and target bundles after parsing
Resource[] set = new Resource[2];
set[0] = new ResourceTable();
set[1] = new ResourceTable();

// lenient extraction of source language
if(makeSourceRoot == true){
set[0].name = ROOT;
}else if(sourceLang!=null){
set[0].name = sourceLang.replace('-','_');
}else{
System.err.println("WARNING: Could not figure out the target language of the file. Producing source bundle only.");
if(xmlSrcLang != null){
set[0].name = xmlSrcLang.replace('-','_');
}else{
System.err.println("ERROR: Could not figure out the source language of the file. Please check the XLIFF file.");
System.exit(-1);
}
}
}

// lenient extraction of the target language
if(targetLang!=null){
set[1].name = targetLang.replace('-','_');
}else{
if(xmlTargetLang!=null){
set[1].name = xmlTargetLang.replace('-','_');
}else{
System.err.println("WARNING: Could not figure out the target language of the file. Producing source bundle only.");
}
}

// check if any <alt-trans> elements are present
NodeList altTrans = doc.getElementsByTagName(ALTTRANS);
if(altTrans.getLength()>0){
System.err.println("WARNING: <alt-trans> elements in found. Ignoring all <alt-trans> elements.");
}

// get all the group elements
NodeList list = doc.getElementsByTagName(GROUPS);
// check if any <alt-trans> elements are present
NodeList altTrans = doc.getElementsByTagName(ALTTRANS);
if(altTrans.getLength()>0){
System.err.println("WARNING: <alt-trans> elements in found. Ignoring all <alt-trans> elements.");
}

// process the first group element. The first group element is
// the base table that must be parsed recursively
parseTable(list.item(0), set);
// get all the group elements
NodeList list = doc.getElementsByTagName(GROUPS);

// write out the bundle
writeResource(set, xmlfileName);
}
catch (Throwable se) {
System.err.println("ERROR: " + se.toString());
System.exit(1);
// process the first group element. The first group element is
// the base table that must be parsed recursively
parseTable(list.item(0), set);

// write out the bundle
writeResource(set, xmlfileName);
}
catch (Throwable se) {
System.err.println("ERROR: " + se.toString());
System.exit(1);
}
} catch (ParserConfigurationException e) {
System.err.println("ERROR: Parser configuration error: " + e.getMessage());
System.exit(-1);
}
}

Expand Down