diff --git a/.github/workflows/master-build.yml b/.github/workflows/master-build.yml index 76d98db4eb..00cf135e4c 100644 --- a/.github/workflows/master-build.yml +++ b/.github/workflows/master-build.yml @@ -22,13 +22,9 @@ on: branches: [master] # Java Version Strategy: -# - BUILD: Requires Java 17+ (JUnit 6 dependency) -# - RUNTIME: Supports Java 11+ (javac.version=11 produces Java 11 bytecode) -# -# The 'build' job verifies bytecode compilation for both Java 11 and 17 targets. -# The 'runtime-java11' job verifies the built artifacts actually run on Java 11. -# The 'tests' job runs on JDK 17 (required by JUnit 6) with the default -# javac.version=11 bytecode target for backward compatibility. +# - BUILD and RUNTIME: Java 17 only +# - The 'build' job compiles with javac.version=17. +# - The 'tests' job runs on JDK 17. jobs: javadoc: @@ -85,16 +81,9 @@ jobs: if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }} run: exit 1 - # Build verification with Java bytecode target matrix - # Verifies bytecode compatibility for both Java 11 and Java 17 targets + # Build verification (Java 17 only) build: - strategy: - fail-fast: false - matrix: - javac-version: ['11', '17'] - os: [ubuntu-latest] - runs-on: ${{ matrix.os }} - name: build (javac.version=${{ matrix.javac-version }}) + runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - name: Set up JDK 17 @@ -109,16 +98,13 @@ jobs: key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }} restore-keys: | ${{ runner.os }}-ivy- - - name: Build with javac.version=${{ matrix.javac-version }} - run: ant clean runtime -Djavac.version=${{ matrix.javac-version }} -buildfile build.xml + - name: Build with Java 17 + run: ant clean runtime -Djavac.version=17 -buildfile build.xml - name: Verify bytecode version run: | - # Extract and verify the bytecode version of compiled classes - # Java 11 = major version 55, Java 17 = major version 61 - EXPECTED_VERSION=${{ matrix.javac-version == '11' && '55' || '61' }} - echo "Expected major version: $EXPECTED_VERSION (Java ${{ matrix.javac-version }})" - - # Find a real class file (exclude package-info.class which may have different version) + # Java 17 = major version 61 + EXPECTED_VERSION=61 + echo "Expected major version: $EXPECTED_VERSION (Java 17)" cd build/classes CLASS_FILE=$(find . -name "*.class" ! -name "package-info.class" | head -1) if [ -n "$CLASS_FILE" ]; then @@ -135,44 +121,7 @@ jobs: exit 1 fi - # Verify runtime compatibility on Java 11 - # This ensures the built artifacts can actually run on Java 11 - runtime-java11: - needs: build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - - name: Set up JDK 17 for building - uses: actions/setup-java@v5 - with: - java-version: '17' - distribution: 'temurin' - - name: Cache Ivy dependencies - uses: actions/cache@v4 - with: - path: ~/.ivy2/cache - key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 'src/plugin/**/ivy.xml') }} - restore-keys: | - ${{ runner.os }}-ivy- - - name: Build with Java 11 target - run: ant clean runtime -Djavac.version=11 -buildfile build.xml - - name: Set up JDK 11 for runtime verification - uses: actions/setup-java@v5 - with: - java-version: '11' - distribution: 'temurin' - - name: Verify runtime on Java 11 - run: | - echo "Verifying Nutch can run on Java 11..." - java -version - cd runtime/local - # Actually load Java classes by running showproperties - # This invokes org.apache.nutch.tools.ShowProperties and verifies the JAR loads - bin/nutch showproperties | head -20 - echo "Java 11 runtime verification complete" - - # Tests run on JDK 17 (required by JUnit 6) with default javac.version=11 - # Java 11 runtime compatibility is verified by the runtime-java11 job + # Tests run on JDK 17 tests: strategy: fail-fast: false @@ -212,16 +161,19 @@ jobs: - '.github/workflows/*' # run if the build configuration or both 'core' and 'plugins' files were changed - name: test all + id: build_all if: ${{ steps.filter.outputs.buildconf == 'true' || ( steps.filter.outputs.core == 'true' && steps.filter.outputs.plugins == 'true' ) }} - run: ant clean test -buildfile build.xml + run: ant clean test -buildfile build.xml | tee build.log # run only if 'core' files were changed - name: test core + id: build_core if: ${{ steps.filter.outputs.core == 'true' && steps.filter.outputs.plugins == 'false' && steps.filter.outputs.buildconf == 'false' }} - run: ant clean test-core -buildfile build.xml + run: ant clean test-core -buildfile build.xml | tee build.log # run only if 'plugins' files were changed - name: test plugins + id: build_plugins if: ${{ steps.filter.outputs.plugins == 'true' && steps.filter.outputs.core == 'false' && steps.filter.outputs.buildconf == 'false' }} - run: ant clean test-plugins -buildfile build.xml + run: ant clean test-plugins -buildfile build.xml | tee build.log # run indexer integration tests when indexer plugin files change (Docker required, ubuntu-latest only) - name: test indexer integration if: ${{ steps.filter.outputs.indexer_plugins == 'true' && matrix.os == 'ubuntu-latest' }} @@ -237,6 +189,32 @@ jobs: else echo "has_results=false" >> $GITHUB_OUTPUT fi + # check for deprecation warnings in build output + # Scenario 1: Approved deprecations are allowlisted and do not fail the build. + # Scenario 2: Any other deprecation in the Nutch codebase fails the build. + - name: Check for deprecation warnings + if: always() + run: | + if [ ! -f build.log ]; then + echo "⚠️ build.log not found, skipping deprecation check." + exit 0 + fi + deprecations=$(grep -iE "warning: \[deprecation\]" build.log || true) + if [ -z "$deprecations" ]; then + echo "✅ No Java deprecation warnings found." + exit 0 + fi + # Allowlist: deprecated Nutch classes (e.g. SpellCheckedMetadata, future release), test mocks implementing deprecated Hadoop API, + # and classes using commons-cli Option.Builder.build() / HelpFormatter (deprecated in newer commons-cli; we align with Hadoop 3.4.2 / 1.4). + APPROVED_PATTERN="SpellCheckedMetadata\.java|CrawlDBTestUtil\.java|CrawlDbUpdateUtil\.java|WebGraph\.java|ScoreUpdater\.java|NodeReader\.java|NodeDumper\.java|LinkRank\.java|LinkDumper\.java|ResolveUrls\.java|NutchServer\.java|CrawlCompletionStats\.java|FileDumper\.java|CommonCrawlDataDumper\.java|MimeTypeIndexingFilter\.java" + unapproved=$(echo "$deprecations" | grep -v -E "$APPROVED_PATTERN" || true) + if [ -n "$unapproved" ]; then + echo "❌ Unapproved Java deprecation warnings detected! Failing the build." + echo "$unapproved" + exit 1 + fi + echo "✅ Deprecation warnings only in approved deprecated code." + exit 0 - name: Upload Test Report uses: actions/upload-artifact@v4 if: always() && matrix.os == 'ubuntu-latest' && steps.check_tests.outputs.has_results == 'true' @@ -246,6 +224,7 @@ jobs: ./build/test/TEST-*.xml ./build/**/test/TEST-*.xml retention-days: 1 + overwrite: true - name: Upload Coverage Data uses: actions/upload-artifact@v4 if: always() && matrix.os == 'ubuntu-latest' diff --git a/README.md b/README.md index fa68816042..ab4db798dd 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Apache Nutch README [![master pull request ci](https://github.com/apache/nutch/actions/workflows/master-build.yml/badge.svg)](https://github.com/apache/nutch/actions/workflows/master-build.yml) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=apache_nutch&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=apache_nutch) +[![Smoke Tests](https://ci-builds.apache.org/job/Nutch/job/Nutch-Smoke-Test-Single-Node-Hadoop-Cluster/badge/icon?style=plastic&subject=Smoke%20Tests)](https://ci-builds.apache.org/job/Nutch/job/Nutch-Smoke-Test-Single-Node-Hadoop-Cluster/) diff --git a/build.xml b/build.xml index 2dffdb7699..11e4a09965 100644 --- a/build.xml +++ b/build.xml @@ -96,7 +96,12 @@ - + @@ -141,15 +146,9 @@ + destdir="${build.classes}"> @@ -470,15 +469,9 @@ + destdir="${test.build.classes}"> diff --git a/default.properties b/default.properties index e0fde46d84..6e0e4dd125 100644 --- a/default.properties +++ b/default.properties @@ -62,11 +62,8 @@ javac.optimize=on javac.deprecation=on # Java bytecode target version for compiled classes. -# Set to 11 for backward-compatible runtime (works on Java 11+). -# Note: Building and running tests requires Java 17+ (JUnit 6 requirement), -# but the compiled artifacts will run on Java 11+. -# Override with: ant -Djavac.version=17 to target Java 17 bytecode. -javac.version=11 +# Project requires Java 17+ for build and runtime +javac.version=17 runtime.dir=./runtime runtime.deploy=${runtime.dir}/deploy diff --git a/ivy/ivy.xml b/ivy/ivy.xml index b6b8f67a9d..906f67d35e 100644 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@ -55,6 +55,9 @@ + + + diff --git a/sonar-project.properties b/sonar-project.properties index c8d7c80249..897c5caedd 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -24,10 +24,11 @@ sonar.links.scm=https://github.com/apache/nutch sonar.links.issue=https://issues.apache.org/jira/projects/NUTCH/issues sonar.links.ci=https://github.com/apache/nutch/actions -sonar.sources=src/java,src/plugin +sonar.sources=src/java,src/plugin,src/bin,docker,conf sonar.tests=src/test,src/plugin sonar.test.inclusions=**/src/test/**/*.java,**/Test*.java,**/*IT.java -sonar.exclusions=**/build.xml,**/build-ivy.xml,**/build-plugin.xml,**/ivy.xml,**/plugin.xml +sonar.exclusions=**/build.xml,**/build-ivy.xml,**/build-plugin.xml,**/ivy.xml,\ + **/sample/**,**/data/**,src/testresources/**,src/java/overview.html sonar.source.encoding=UTF-8 sonar.java.source=17 diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java index 03cf0fbd39..a0152a639f 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java +++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java @@ -16,47 +16,27 @@ */ package org.apache.nutch.crawl; -import java.io.Closeable; -import java.io.DataOutputStream; -import java.io.File; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.json.JsonWriteFeature; +import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.tdunning.math.stats.MergingDigest; +import com.tdunning.math.stats.TDigest; import org.apache.commons.jexl3.JexlScript; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.MapFile; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.*; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; @@ -67,26 +47,22 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; -import org.apache.nutch.util.AbstractChecker; -import org.apache.nutch.util.JexlUtil; -import org.apache.nutch.util.NutchConfiguration; -import org.apache.nutch.util.NutchJob; -import org.apache.nutch.util.SegmentReaderUtil; -import org.apache.nutch.util.StringUtil; -import org.apache.nutch.util.TimingUtil; +import org.apache.nutch.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonGenerationException; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.tdunning.math.stats.MergingDigest; -import com.tdunning.math.stats.TDigest; +import java.io.Closeable; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Read utility for the CrawlDB. @@ -263,7 +239,7 @@ protected static class LineRecordWriter public LineRecordWriter(DataOutputStream out) { this.out = out; jsonMapper.getFactory() - .configure(JsonGenerator.Feature.ESCAPE_NON_ASCII, true); + .configure(JsonWriteFeature.ESCAPE_NON_ASCII.mappedFeature(), true); SimpleModule module = new SimpleModule(); module.addSerializer(Writable.class, new WritableSerializer()); jsonMapper.registerModule(module); diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java index 102ce39b94..e0945feb3d 100644 --- a/src/java/org/apache/nutch/crawl/Generator.java +++ b/src/java/org/apache/nutch/crawl/Generator.java @@ -797,50 +797,6 @@ public Path[] generate(Path dbDir, Path segments, int numLists, long topN, false, 1, null); } - /** - * This is an old signature used for compatibility - does not specify whether - * or not to normalise and set the number of segments to 1 - * - * @param dbDir - * Crawl database directory - * @param segments - * Segments directory - * @param numLists - * Number of fetch lists (partitions) per segment or number of - * fetcher map tasks. (One fetch list partition is fetched in one - * fetcher map task.) - * @param topN - * Number of top URLs to be selected - * @param curTime - * Current time in milliseconds - * @param filter - * whether to apply filtering operation - * @param force - * if true, and the target lockfile exists, consider it valid. If - * false and the target file exists, throw an IOException. - * @deprecated since 1.19 use - * {@link #generate(Path, Path, int, long, long, boolean, boolean, boolean, int, String, String)} - * or - * {@link #generate(Path, Path, int, long, long, boolean, boolean, boolean, int, String)} - * in the instance that no hostdb is available - * @throws IOException - * if an I/O exception occurs. - * @see LockUtil#createLockFile(Configuration, Path, boolean) - * @throws InterruptedException - * if a thread is waiting, sleeping, or otherwise occupied, and the - * thread is interrupted, either before or during the activity. - * @throws ClassNotFoundException - * if runtime class(es) are not available - * @return Path to generated segment or null if no entries were selected - **/ - @Deprecated - public Path[] generate(Path dbDir, Path segments, int numLists, long topN, - long curTime, boolean filter, boolean force) - throws IOException, InterruptedException, ClassNotFoundException { - return generate(dbDir, segments, numLists, topN, curTime, filter, true, - force, 1, null); - } - /** * This signature should be used in the instance that no hostdb is available. * Generate fetchlists in one or more segments. Whether to filter URLs or not diff --git a/src/java/org/apache/nutch/indexer/IndexWriter.java b/src/java/org/apache/nutch/indexer/IndexWriter.java index 43d4a48c7e..7ffea27e9e 100644 --- a/src/java/org/apache/nutch/indexer/IndexWriter.java +++ b/src/java/org/apache/nutch/indexer/IndexWriter.java @@ -30,15 +30,6 @@ public interface IndexWriter extends Pluggable, Configurable { */ final static String X_POINT_ID = IndexWriter.class.getName(); - /** - * @param conf Nutch configuration - * @param name target name of the {@link IndexWriter} to be opened - * @throws IOException Some exception thrown by some writer. - * @deprecated use {@link #open(IndexWriterParams)}} instead. - */ - @Deprecated - public void open(Configuration conf, String name) throws IOException; - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/java/org/apache/nutch/indexer/IndexWriters.java b/src/java/org/apache/nutch/indexer/IndexWriters.java index f8ae8ee866..3db075ff41 100644 --- a/src/java/org/apache/nutch/indexer/IndexWriters.java +++ b/src/java/org/apache/nutch/indexer/IndexWriters.java @@ -211,7 +211,7 @@ private Collection getIndexWriters(NutchDocument doc) { public void open(Configuration conf, String name) throws IOException { for (Map.Entry entry : this.indexWriters .entrySet()) { - entry.getValue().getIndexWriter().open(conf, name); + entry.getValue().getIndexWriter().open(new IndexWriterParams(new HashMap<>())); entry.getValue().getIndexWriter() .open(entry.getValue().getIndexWriterConfig().getParams()); } diff --git a/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java b/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java index 0547f4f43f..fb0a8c7886 100644 --- a/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java +++ b/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java @@ -27,8 +27,13 @@ * A decorator to Metadata that adds spellchecking capabilities to property * names. Currently used spelling vocabulary contains just the HTTP headers from * {@link HttpHeaders} class. Other names are case insensitive. - * + * + * @deprecated Spell-checking HTTP header names is no longer recommended. Use + * {@link CaseInsensitiveMetadata} instead for case-insensitive + * header handling (see NUTCH-3002). This class may be removed in a + * future release. */ +@Deprecated public class SpellCheckedMetadata extends CaseInsensitiveMetadata { /** @@ -101,7 +106,7 @@ private static String normalize(final String str) { *
  • CoNtEntType gives Content-Type
  • *
  • ConTnTtYpe gives Content-Type
  • * - * If no matching with a well-known metadata name is found, then the original + * If no well-known metadata name match is found, then the original * name is returned. * * @param name diff --git a/src/java/org/apache/nutch/net/protocols/ProtocolException.java b/src/java/org/apache/nutch/net/protocols/ProtocolException.java deleted file mode 100644 index 97d1f7fe59..0000000000 --- a/src/java/org/apache/nutch/net/protocols/ProtocolException.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nutch.net.protocols; - -import java.io.Serializable; - -/** - * Base exception for all protocol handlers - * - * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead. - */ -@Deprecated -@SuppressWarnings("serial") -public class ProtocolException extends Exception implements Serializable { - - public ProtocolException() { - super(); - } - - public ProtocolException(String message) { - super(message); - } - - public ProtocolException(String message, Throwable cause) { - super(message, cause); - } - - public ProtocolException(Throwable cause) { - super(cause); - } - -} diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java index 3a0fb2e915..4554d7036d 100644 --- a/src/java/org/apache/nutch/plugin/Plugin.java +++ b/src/java/org/apache/nutch/plugin/Plugin.java @@ -88,9 +88,4 @@ public PluginDescriptor getDescriptor() { private void setDescriptor(PluginDescriptor descriptor) { fDescriptor = descriptor; } - - @Override - protected void finalize() throws Throwable { - shutDown(); - } } diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java index bec0625214..81f3a356f2 100644 --- a/src/java/org/apache/nutch/plugin/PluginRepository.java +++ b/src/java/org/apache/nutch/plugin/PluginRepository.java @@ -18,6 +18,7 @@ import java.lang.invoke.MethodHandles; import java.lang.reflect.Array; +import java.lang.ref.Cleaner; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -70,6 +71,8 @@ public class PluginRepository implements URLStreamHandlerFactory { protected static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final Cleaner CLEANER = Cleaner.create(); + /** * @param conf a populated {@link Configuration} * @throws RuntimeException if a fatal runtime error is encountered @@ -98,13 +101,22 @@ public PluginRepository(Configuration conf) throws RuntimeException { try { installExtensions(this.fRegisteredPlugins); } catch (PluginRuntimeException e) { - LOG.error("Could not install extensions.", e.toString()); + LOG.error("Could not install extensions: {}", e.toString()); throw new RuntimeException(e.getMessage()); } registerURLStreamHandlerFactory(); displayStatus(); + + // Register cleanup action with Cleaner + CLEANER.register(this, () -> { + try { + shutDownActivatedPlugins(); + } catch (PluginRuntimeException e) { + LOG.error("Error during cleanup of activated plugins", e); + } + }); } /** @@ -313,19 +325,6 @@ public Plugin getPluginInstance(PluginDescriptor pDescriptor) } } - /** - * Attempts to shut down all activated plugins. - * @deprecated - * @see JEP 421: Deprecate Finalization for Removal - * @see java.lang.Object#finalize() - * @deprecated - */ - @Override - @Deprecated - public void finalize() throws Throwable { - shutDownActivatedPlugins(); - } - /** * Shuts down all plugins * diff --git a/src/java/org/apache/nutch/protocol/ProtocolStatus.java b/src/java/org/apache/nutch/protocol/ProtocolStatus.java index 1659fda403..b2bb049fdd 100644 --- a/src/java/org/apache/nutch/protocol/ProtocolStatus.java +++ b/src/java/org/apache/nutch/protocol/ProtocolStatus.java @@ -63,16 +63,10 @@ public class ProtocolStatus implements Writable { public static final int NOTFETCHING = 20; /** Unchanged since the last fetch. */ public static final int NOTMODIFIED = 21; - /** - * Request was refused by protocol plugins, because it would block. The - * expected number of milliseconds to wait before retry may be provided in - * args. - */ - @Deprecated - public static final int WOULDBLOCK = 22; - /** Thread was blocked http.max.delays times during fetching. */ - @Deprecated - public static final int BLOCKED = 23; + // Status code 22: would block (legacy, use literal for compatibility). + private static final int WOULDBLOCK_CODE = 22; + // Status code 23: blocked (legacy, use literal for compatibility). + private static final int BLOCKED_CODE = 23; // Useful static instances for status codes that don't usually require any // additional arguments. @@ -92,9 +86,9 @@ public class ProtocolStatus implements Writable { public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus( NOTMODIFIED); public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus( - WOULDBLOCK); + WOULDBLOCK_CODE); public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus( - BLOCKED); + BLOCKED_CODE); private int code; private long lastModified; @@ -116,8 +110,8 @@ public class ProtocolStatus implements Writable { codeToName.put(Integer.valueOf(REDIR_EXCEEDED), "redir_exceeded"); codeToName.put(Integer.valueOf(NOTFETCHING), "notfetching"); codeToName.put(Integer.valueOf(NOTMODIFIED), "notmodified"); - codeToName.put(Integer.valueOf(WOULDBLOCK), "wouldblock"); - codeToName.put(Integer.valueOf(BLOCKED), "blocked"); + codeToName.put(Integer.valueOf(WOULDBLOCK_CODE), "wouldblock"); + codeToName.put(Integer.valueOf(BLOCKED_CODE), "blocked"); } public ProtocolStatus() { @@ -221,7 +215,7 @@ public boolean isSuccess() { public boolean isTransientFailure() { return code == ACCESS_DENIED || code == EXCEPTION || code == REDIR_EXCEEDED - || code == RETRY || code == TEMP_MOVED || code == WOULDBLOCK + || code == RETRY || code == TEMP_MOVED || code == WOULDBLOCK_CODE || code == PROTO_NOT_FOUND; } diff --git a/src/java/org/apache/nutch/protocol/RobotRulesParser.java b/src/java/org/apache/nutch/protocol/RobotRulesParser.java index c5e267ef62..f23b0e6c46 100644 --- a/src/java/org/apache/nutch/protocol/RobotRulesParser.java +++ b/src/java/org/apache/nutch/protocol/RobotRulesParser.java @@ -198,27 +198,6 @@ public boolean isAllowListed(URL url) { return match; } - /** - * Parses the robots content using the {@link SimpleRobotRulesParser} from - * crawler-commons - * - * @param url - * The robots.txt URL - * @param content - * Contents of the robots file in a byte array - * @param contentType - * The content type of the robots file - * @param robotName - * A string containing all the robots agent names used by parser for - * matching - * @return BaseRobotRules object - */ - @Deprecated - public BaseRobotRules parseRules(String url, byte[] content, - String contentType, String robotName) { - return robotParser.parseContent(url, content, contentType, robotName); - } - /** * Parses the robots content using the {@link SimpleRobotRulesParser} from * crawler-commons diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java index 52211dcdeb..898495f577 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java @@ -27,10 +27,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; @@ -417,24 +416,26 @@ public static void main(String[] args) throws Exception { public int run(String[] args) throws Exception { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("webgraphdb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the web graph database to use"); - Option webGraphDbOpts = OptionBuilder.create("webgraphdb"); + Option webGraphDbOpts = Option.builder("webgraphdb") + .argName("webgraphdb") + .hasArg() + .desc("the web graph database to use") + .build(); options.addOption(webGraphDbOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("LinkDumper", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("LinkDumper", "", options, "", false); return -1; } diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java index 3cafb66749..1fd8e1ac76 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java @@ -30,10 +30,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; @@ -723,24 +722,26 @@ public static void main(String[] args) throws Exception { public int run(String[] args) throws Exception { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("webgraphdb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the web graph db to use"); - Option webgraphOpts = OptionBuilder.create("webgraphdb"); + Option webgraphOpts = Option.builder("webgraphdb") + .argName("webgraphdb") + .hasArg() + .desc("the web graph db to use") + .build(); options.addOption(webgraphOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("LinkRank", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("LinkRank", "", options, "", false); return -1; } diff --git a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java index a8a8e7fa12..29bc4bb800 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java +++ b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java @@ -22,10 +22,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; @@ -373,67 +372,77 @@ public static void main(String[] args) throws Exception { public int run(String[] args) throws Exception { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("webgraphdb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the web graph database to use"); - Option webGraphDbOpts = OptionBuilder.create("webgraphdb"); + Option webGraphDbOpts = Option.builder("webgraphdb") + .argName("webgraphdb") + .hasArg() + .desc("the web graph database to use") + .build(); options.addOption(webGraphDbOpts); - OptionBuilder.withArgName("inlinks"); - OptionBuilder.withDescription("show highest inlinks"); - Option inlinkOpts = OptionBuilder.create("inlinks"); + Option inlinkOpts = Option.builder("inlinks") + .argName("inlinks") + .desc("show highest inlinks") + .build(); options.addOption(inlinkOpts); - OptionBuilder.withArgName("outlinks"); - OptionBuilder.withDescription("show highest outlinks"); - Option outlinkOpts = OptionBuilder.create("outlinks"); + Option outlinkOpts = Option.builder("outlinks") + .argName("outlinks") + .desc("show highest outlinks") + .build(); options.addOption(outlinkOpts); - OptionBuilder.withArgName("scores"); - OptionBuilder.withDescription("show highest scores"); - Option scoreOpts = OptionBuilder.create("scores"); + Option scoreOpts = Option.builder("scores") + .argName("scores") + .desc("show highest scores") + .build(); options.addOption(scoreOpts); - OptionBuilder.withArgName("topn"); - OptionBuilder.hasOptionalArg(); - OptionBuilder.withDescription("show topN scores"); - Option topNOpts = OptionBuilder.create("topn"); + Option topNOpts = Option.builder("topn") + .argName("topn") + .optionalArg(true) + .desc("show topN scores") + .build(); options.addOption(topNOpts); - OptionBuilder.withArgName("output"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the output directory to use"); - Option outputOpts = OptionBuilder.create("output"); + Option outputOpts = Option.builder("output") + .argName("output") + .hasArg() + .desc("the output directory to use") + .build(); options.addOption(outputOpts); - OptionBuilder.withArgName("asEff"); - OptionBuilder - .withDescription("Solr ExternalFileField compatible output format"); - Option effOpts = OptionBuilder.create("asEff"); + Option effOpts = Option.builder("asEff") + .argName("asEff") + .desc("Solr ExternalFileField compatible output format") + .build(); options.addOption(effOpts); - OptionBuilder.hasArgs(2); - OptionBuilder.withDescription("group "); - Option groupOpts = OptionBuilder.create("group"); + Option groupOpts = Option.builder("group") + .hasArgs() + .numberOfArgs(2) + .desc("group ") + .build(); options.addOption(groupOpts); - OptionBuilder.withArgName("asSequenceFile"); - OptionBuilder.withDescription("whether to output as a sequencefile"); - Option sequenceFileOpts = OptionBuilder.create("asSequenceFile"); + Option sequenceFileOpts = Option.builder("asSequenceFile") + .argName("asSequenceFile") + .desc("whether to output as a sequencefile") + .build(); options.addOption(sequenceFileOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("NodeDumper", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("NodeDumper", "", options, "", false); return -1; } diff --git a/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java b/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java index d6fd9d05b7..855b7d878a 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java +++ b/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java @@ -20,10 +20,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; @@ -91,32 +90,35 @@ public void dumpUrl(Path webGraphDb, String url) throws IOException { public static void main(String[] args) throws Exception { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("webgraphdb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the webgraphdb to use"); - Option webGraphOpts = OptionBuilder.create("webgraphdb"); + Option webGraphOpts = Option.builder("webgraphdb") + .argName("webgraphdb") + .hasArg() + .desc("the webgraphdb to use") + .build(); options.addOption(webGraphOpts); - OptionBuilder.withArgName("url"); - OptionBuilder.hasOptionalArg(); - OptionBuilder.withDescription("the url to dump"); - Option urlOpts = OptionBuilder.create("url"); + Option urlOpts = Option.builder("url") + .argName("url") + .optionalArg(true) + .desc("the url to dump") + .build(); options.addOption(urlOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { // command line must take a webgraphdb and a url CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb") || !line.hasOption("url")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("WebGraphReader", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("WebGraphReader", "", options, "", false); return; } diff --git a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java index a595d4bf3b..89d61055a9 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java +++ b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java @@ -23,10 +23,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; @@ -229,31 +228,34 @@ public static void main(String[] args) throws Exception { public int run(String[] args) throws Exception { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("crawldb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the crawldb to use"); - Option crawlDbOpts = OptionBuilder.create("crawldb"); + Option crawlDbOpts = Option.builder("crawldb") + .argName("crawldb") + .hasArg() + .desc("the crawldb to use") + .build(); options.addOption(crawlDbOpts); - OptionBuilder.withArgName("webgraphdb"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the webgraphdb to use"); - Option webGraphOpts = OptionBuilder.create("webgraphdb"); + Option webGraphOpts = Option.builder("webgraphdb") + .argName("webgraphdb") + .hasArg() + .desc("the webgraphdb to use") + .build(); options.addOption(webGraphOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb") || !line.hasOption("crawldb")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("ScoreUpdater", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("ScoreUpdater", "", options, "", false); return -1; } diff --git a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java index fee0921d0a..49333fec59 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java +++ b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java @@ -29,10 +29,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.slf4j.Logger; @@ -751,19 +750,21 @@ public int run(String[] args) throws Exception { "whether to use URLFilters on the URL's in the segment"); // argument options - @SuppressWarnings("static-access") - Option graphOpt = OptionBuilder - .withArgName("webgraphdb") + Option graphOpt = Option.builder("webgraphdb") + .argName("webgraphdb") .hasArg() - .withDescription( - "the web graph database to create (if none exists) or use if one does") - .create("webgraphdb"); - @SuppressWarnings("static-access") - Option segOpt = OptionBuilder.withArgName("segment").hasArgs() - .withDescription("the segment(s) to use").create("segment"); - @SuppressWarnings("static-access") - Option segDirOpt = OptionBuilder.withArgName("segmentDir").hasArgs() - .withDescription("the segment directory to use").create("segmentDir"); + .desc("the web graph database to create (if none exists) or use if one does") + .build(); + Option segOpt = Option.builder("segment") + .argName("segment") + .hasArgs() + .desc("the segment(s) to use") + .build(); + Option segDirOpt = Option.builder("segmentDir") + .argName("segmentDir") + .hasArgs() + .desc("the segment directory to use") + .build(); // create the options Options options = new Options(); @@ -774,13 +775,13 @@ public int run(String[] args) throws Exception { options.addOption(segOpt); options.addOption(segDirOpt); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("webgraphdb") || (!line.hasOption("segment") && !line.hasOption("segmentDir"))) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("WebGraph", options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("WebGraph", "", options, "", true); return -1; } diff --git a/src/java/org/apache/nutch/service/NutchServer.java b/src/java/org/apache/nutch/service/NutchServer.java index 9468670317..28fc5f9db5 100644 --- a/src/java/org/apache/nutch/service/NutchServer.java +++ b/src/java/org/apache/nutch/service/NutchServer.java @@ -25,13 +25,13 @@ import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider; +import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PosixParser; -import org.apache.commons.cli.CommandLine; import org.apache.cxf.binding.BindingFactoryManager; import org.apache.cxf.jaxrs.JAXRSBindingFactory; import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; @@ -172,13 +172,13 @@ public long getStarted(){ return started; } - public static void main(String[] args) throws ParseException { - CommandLineParser parser = new PosixParser(); + public static void main(String[] args) throws ParseException, java.io.IOException { + CommandLineParser parser = new DefaultParser(); Options options = createOptions(); CommandLine commandLine = parser.parse(options, args); if (commandLine.hasOption(CMD_HELP)) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("NutchServer", options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("NutchServer", "", options, "", true); return; } @@ -196,18 +196,24 @@ public static void main(String[] args) throws ParseException { private static Options createOptions() { Options options = new Options(); - OptionBuilder.withDescription("Show this help"); - options.addOption(OptionBuilder.create(CMD_HELP)); - - OptionBuilder.withArgName("port"); - OptionBuilder.hasOptionalArg(); - OptionBuilder.withDescription("The port to run the Nutch Server. Default port 8081"); - options.addOption(OptionBuilder.create(CMD_PORT)); - - OptionBuilder.withArgName("host"); - OptionBuilder.hasOptionalArg(); - OptionBuilder.withDescription("The host to bind the Nutch Server to. Default is localhost."); - options.addOption(OptionBuilder.create(CMD_HOST)); + Option helpOpt = Option.builder(CMD_HELP) + .desc("Show this help") + .build(); + options.addOption(helpOpt); + + Option portOpt = Option.builder(CMD_PORT) + .argName("port") + .optionalArg(true) + .desc("The port to run the Nutch Server. Default port 8081") + .build(); + options.addOption(portOpt); + + Option hostOpt = Option.builder(CMD_HOST) + .argName("host") + .optionalArg(true) + .desc("The host to bind the Nutch Server to. Default is localhost.") + .build(); + options.addOption(hostOpt); return options; } diff --git a/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java b/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java index 1d1e8175be..fde4c699af 100644 --- a/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java +++ b/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java @@ -18,13 +18,13 @@ import java.util.Collection; import java.util.List; +import java.util.Objects; import java.util.Queue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.nutch.service.model.response.JobInfo; import com.google.common.collect.Lists; @@ -76,7 +76,7 @@ private void addStatusToHistory(JobWorker worker) { public JobWorker findWorker(String jobId) { synchronized (runningWorkers) { for (JobWorker worker : runningWorkers) { - if (StringUtils.equals(worker.getInfo().getId(), jobId)) { + if (Objects.equals(worker.getInfo().getId(), jobId)) { return worker; } } @@ -120,7 +120,7 @@ private Collection getJobsInfo(Collection workers) { public JobInfo getInfo(String jobId) { for (JobInfo jobInfo : getAllJobs()) { - if (StringUtils.equals(jobId, jobInfo.getId())) { + if (Objects.equals(jobId, jobInfo.getId())) { return jobInfo; } } diff --git a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java index d5d5035e89..50491371cb 100644 --- a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java +++ b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java @@ -39,10 +39,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; @@ -573,64 +572,73 @@ public static String reverseUrl(String urlString) { public int run(String[] args) throws Exception { Option helpOpt = new Option("h", "help", false, "show this help message."); // argument options - @SuppressWarnings("static-access") - Option outputOpt = OptionBuilder.withArgName("outputDir").hasArg() - .withDescription( - "output directory (which will be created) to host the CBOR data.") - .create("outputDir"); + Option outputOpt = Option.builder("outputDir") + .argName("outputDir") + .hasArg() + .desc("output directory (which will be created) to host the CBOR data.") + .build(); // WARC format Option warcOpt = new Option("warc", "export to a WARC file"); - @SuppressWarnings("static-access") - Option segOpt = OptionBuilder.withArgName("segment").hasArgs() - .withDescription("the segment or directory containing segments to use").create("segment"); + Option segOpt = Option.builder("segment") + .argName("segment") + .hasArgs() + .desc("the segment or directory containing segments to use") + .build(); // create mimetype and gzip options - @SuppressWarnings("static-access") - Option mimeOpt = OptionBuilder.isRequired(false).withArgName("mimetype") - .hasArgs().withDescription( - "an optional list of mimetypes to dump, excluding all others. Defaults to all.") - .create("mimetype"); - @SuppressWarnings("static-access") - Option gzipOpt = OptionBuilder.withArgName("gzip").hasArg(false) - .withDescription( - "an optional flag indicating whether to additionally gzip the data.") - .create("gzip"); - @SuppressWarnings("static-access") - Option keyPrefixOpt = OptionBuilder.withArgName("keyPrefix").hasArg(true) - .withDescription("an optional prefix for key in the output format.") - .create("keyPrefix"); - @SuppressWarnings("static-access") - Option simpleDateFormatOpt = OptionBuilder.withArgName("SimpleDateFormat") - .hasArg(false).withDescription( - "an optional format for timestamp in GMT epoch milliseconds.") - .create("SimpleDateFormat"); - @SuppressWarnings("static-access") - Option epochFilenameOpt = OptionBuilder.withArgName("epochFilename") + Option mimeOpt = Option.builder("mimetype") + .required(false) + .argName("mimetype") + .hasArgs() + .desc("an optional list of mimetypes to dump, excluding all others. Defaults to all.") + .build(); + Option gzipOpt = Option.builder("gzip") + .argName("gzip") .hasArg(false) - .withDescription("an optional format for output filename.") - .create("epochFilename"); - @SuppressWarnings("static-access") - Option jsonArrayOpt = OptionBuilder.withArgName("jsonArray").hasArg(false) - .withDescription("an optional format for JSON output.") - .create("jsonArray"); - @SuppressWarnings("static-access") - Option reverseKeyOpt = OptionBuilder.withArgName("reverseKey").hasArg(false) - .withDescription("an optional format for key value in JSON output.") - .create("reverseKey"); - @SuppressWarnings("static-access") - Option extensionOpt = OptionBuilder.withArgName("extension").hasArg(true) - .withDescription("an optional file extension for output documents.") - .create("extension"); - @SuppressWarnings("static-access") - Option sizeOpt = OptionBuilder.withArgName("warcSize").hasArg(true) - .withType(Number.class) - .withDescription("an optional file size in bytes for the WARC file(s)") - .create("warcSize"); - @SuppressWarnings("static-access") - Option linkDbOpt = OptionBuilder.withArgName("linkdb").hasArg(true) - .withDescription("an optional linkdb parameter to include inlinks in dump files") - .isRequired(false) - .create("linkdb"); + .desc("an optional flag indicating whether to additionally gzip the data.") + .build(); + Option keyPrefixOpt = Option.builder("keyPrefix") + .argName("keyPrefix") + .hasArg(true) + .desc("an optional prefix for key in the output format.") + .build(); + Option simpleDateFormatOpt = Option.builder("SimpleDateFormat") + .argName("SimpleDateFormat") + .hasArg(false) + .desc("an optional format for timestamp in GMT epoch milliseconds.") + .build(); + Option epochFilenameOpt = Option.builder("epochFilename") + .argName("epochFilename") + .hasArg(false) + .desc("an optional format for output filename.") + .build(); + Option jsonArrayOpt = Option.builder("jsonArray") + .argName("jsonArray") + .hasArg(false) + .desc("an optional format for JSON output.") + .build(); + Option reverseKeyOpt = Option.builder("reverseKey") + .argName("reverseKey") + .hasArg(false) + .desc("an optional format for key value in JSON output.") + .build(); + Option extensionOpt = Option.builder("extension") + .argName("extension") + .hasArg(true) + .desc("an optional file extension for output documents.") + .build(); + Option sizeOpt = Option.builder("warcSize") + .argName("warcSize") + .hasArg(true) + .type(Number.class) + .desc("an optional file size in bytes for the WARC file(s)") + .build(); + Option linkDbOpt = Option.builder("linkdb") + .argName("linkdb") + .hasArg(true) + .desc("an optional linkdb parameter to include inlinks in dump files") + .required(false) + .build(); // create the options Options options = new Options(); @@ -652,14 +660,13 @@ public int run(String[] args) throws Exception { options.addOption(sizeOpt); options.addOption(linkDbOpt); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("outputDir") || (!line .hasOption("segment"))) { - HelpFormatter formatter = new HelpFormatter(); - formatter - .printHelp(CommonCrawlDataDumper.class.getName(), options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp(CommonCrawlDataDumper.class.getName(), "", options, "", true); return 0; } diff --git a/src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java b/src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java index e468532617..78239a8542 100644 --- a/src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java +++ b/src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java @@ -19,8 +19,6 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; -import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.protocol.Content; /** * Factory class that creates new {@link org.apache.nutch.tools.CommonCrawlFormat CommonCrawlFormat} objects (a.k.a. formatter) that map crawled files to CommonCrawl format. @@ -28,36 +26,6 @@ */ public class CommonCrawlFormatFactory { - /** - * Returns a new instance of a {@link org.apache.nutch.tools.CommonCrawlFormat CommonCrawlFormat} object specifying the type of formatter. - * @param formatType the type of formatter to be created. - * @param url the url. - * @param content the content. - * @param metadata the metadata. - * @param nutchConf the configuration. - * @param config the CommonCrawl output configuration. - * @return the new {@link org.apache.nutch.tools.CommonCrawlFormat CommonCrawlFormat} object. - * @throws IOException If any I/O error occurs. - * @deprecated - */ - public static CommonCrawlFormat getCommonCrawlFormat(String formatType, String url, Content content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException { - if (formatType == null) { - return null; - } - - if (formatType.equalsIgnoreCase("jackson")) { - return new CommonCrawlFormatJackson(url, content, metadata, nutchConf, config); - } - else if (formatType.equalsIgnoreCase("jettinson")) { - return new CommonCrawlFormatJettinson(url, content, metadata, nutchConf, config); - } - else if (formatType.equalsIgnoreCase("simple")) { - return new CommonCrawlFormatSimple(url, content, metadata, nutchConf, config); - } - - return null; - } - // The format should not depend on variable attributes, essentially this // should be one for the full job public static CommonCrawlFormat getCommonCrawlFormat(String formatType, Configuration nutchConf, CommonCrawlConfig config) throws IOException { diff --git a/src/java/org/apache/nutch/tools/FileDumper.java b/src/java/org/apache/nutch/tools/FileDumper.java index 79672d2a05..b50dd5164f 100644 --- a/src/java/org/apache/nutch/tools/FileDumper.java +++ b/src/java/org/apache/nutch/tools/FileDumper.java @@ -27,10 +27,9 @@ import com.google.common.base.Strings; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.io.IOUtils; import org.apache.commons.io.FilenameUtils; @@ -308,41 +307,33 @@ public static void main(String[] args) throws Exception { // boolean options Option helpOpt = new Option("h", "help", false, "show this help message"); // argument options - @SuppressWarnings("static-access") - Option outputOpt = OptionBuilder - .withArgName("outputDir") + Option outputOpt = Option.builder("outputDir") + .argName("outputDir") .hasArg() - .withDescription( - "output directory (which will be created) to host the raw data") - .create("outputDir"); - @SuppressWarnings("static-access") - Option segOpt = OptionBuilder.withArgName("segment").hasArgs() - .withDescription("the segment(s) to use").create("segment"); - @SuppressWarnings("static-access") - Option mimeOpt = OptionBuilder - .withArgName("mimetype") + .desc("output directory (which will be created) to host the raw data") + .build(); + Option segOpt = Option.builder("segment") + .argName("segment") .hasArgs() - .withDescription( - "an optional list of mimetypes to dump, excluding all others. Defaults to all.") - .create("mimetype"); - @SuppressWarnings("static-access") - Option mimeStat = OptionBuilder - .withArgName("mimeStats") - .withDescription( - "only display mimetype stats for the segment(s) instead of dumping file.") - .create("mimeStats"); - @SuppressWarnings("static-access") - Option dirStructureOpt = OptionBuilder - .withArgName("flatdir") - .withDescription( - "optionally specify that the output directory should only contain files.") - .create("flatdir"); - @SuppressWarnings("static-access") - Option reverseURLOutput = OptionBuilder - .withArgName("reverseUrlDirs") - .withDescription( - "optionally specify to use reverse URL folders for output structure.") - .create("reverseUrlDirs"); + .desc("the segment(s) to use") + .build(); + Option mimeOpt = Option.builder("mimetype") + .argName("mimetype") + .hasArgs() + .desc("an optional list of mimetypes to dump, excluding all others. Defaults to all.") + .build(); + Option mimeStat = Option.builder("mimeStats") + .argName("mimeStats") + .desc("only display mimetype stats for the segment(s) instead of dumping file.") + .build(); + Option dirStructureOpt = Option.builder("flatdir") + .argName("flatdir") + .desc("optionally specify that the output directory should only contain files.") + .build(); + Option reverseURLOutput = Option.builder("reverseUrlDirs") + .argName("reverseUrlDirs") + .desc("optionally specify to use reverse URL folders for output structure.") + .build(); // create the options Options options = new Options(); @@ -354,13 +345,13 @@ public static void main(String[] args) throws Exception { options.addOption(dirStructureOpt); options.addOption(reverseURLOutput); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("outputDir") || (!line.hasOption("segment"))) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("FileDumper", options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("FileDumper", "", options, "", true); return; } diff --git a/src/java/org/apache/nutch/tools/ResolveUrls.java b/src/java/org/apache/nutch/tools/ResolveUrls.java index 5bf95b9f30..4c45b8733e 100644 --- a/src/java/org/apache/nutch/tools/ResolveUrls.java +++ b/src/java/org/apache/nutch/tools/ResolveUrls.java @@ -29,10 +29,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -163,33 +162,36 @@ public ResolveUrls(String urlsFile, int numThreads) { * @param args the input arguments for this tool. Running * with 'help' will print parameter options. */ - public static void main(String[] args) { + public static void main(String[] args) throws java.io.IOException { Options options = new Options(); - OptionBuilder.withArgName("help"); - OptionBuilder.withDescription("show this help message"); - Option helpOpts = OptionBuilder.create("help"); + Option helpOpts = Option.builder("help") + .argName("help") + .desc("show this help message") + .build(); options.addOption(helpOpts); - OptionBuilder.withArgName("urls"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("the urls file to check"); - Option urlOpts = OptionBuilder.create("urls"); + Option urlOpts = Option.builder("urls") + .argName("urls") + .hasArg() + .desc("the urls file to check") + .build(); options.addOption(urlOpts); - OptionBuilder.withArgName("numThreads"); - OptionBuilder.hasArgs(); - OptionBuilder.withDescription("the number of threads to use"); - Option numThreadOpts = OptionBuilder.create("numThreads"); + Option numThreadOpts = Option.builder("numThreads") + .argName("numThreads") + .hasArgs() + .desc("the number of threads to use") + .build(); options.addOption(numThreadOpts); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { // parse out common line arguments CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("urls")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("ResolveUrls", options); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("ResolveUrls", "", options, "", false); return; } diff --git a/src/java/org/apache/nutch/util/CrawlCompletionStats.java b/src/java/org/apache/nutch/util/CrawlCompletionStats.java index d806b92954..46d1505b6c 100644 --- a/src/java/org/apache/nutch/util/CrawlCompletionStats.java +++ b/src/java/org/apache/nutch/util/CrawlCompletionStats.java @@ -24,11 +24,10 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.MissingOptionException; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.lang3.time.StopWatch; import org.apache.hadoop.conf.Configuration; @@ -68,33 +67,29 @@ public class CrawlCompletionStats extends Configured implements Tool { @Override public int run(String[] args) throws Exception { Option helpOpt = new Option("h", "help", false, "Show this message"); - @SuppressWarnings("static-access") - Option inDirs = OptionBuilder - .withArgName("inputDirs") - .isRequired() - .withDescription("Comma separated list of crawldb directories (e.g., \"./crawl1/crawldb,./crawl2/crawldb\")") + Option inDirs = Option.builder("inputDirs") + .argName("inputDirs") + .required() + .desc("Comma separated list of crawldb directories (e.g., \"./crawl1/crawldb,./crawl2/crawldb\")") .hasArgs() - .create("inputDirs"); - @SuppressWarnings("static-access") - Option outDir = OptionBuilder - .withArgName("outputDir") - .isRequired() - .withDescription("Output directory where results should be dumped") + .build(); + Option outDir = Option.builder("outputDir") + .argName("outputDir") + .required() + .desc("Output directory where results should be dumped") .hasArgs() - .create("outputDir"); - @SuppressWarnings("static-access") - Option modeOpt = OptionBuilder - .withArgName("mode") - .isRequired() - .withDescription("Set statistics gathering mode (by 'host' or by 'domain')") + .build(); + Option modeOpt = Option.builder("mode") + .argName("mode") + .required() + .desc("Set statistics gathering mode (by 'host' or by 'domain')") .hasArgs() - .create("mode"); - @SuppressWarnings("static-access") - Option numReducers = OptionBuilder - .withArgName("numReducers") - .withDescription("Optional number of reduce jobs to use. Defaults to 1") + .build(); + Option numReducers = Option.builder("numReducers") + .argName("numReducers") + .desc("Optional number of reduce jobs to use. Defaults to 1") .hasArgs() - .create("numReducers"); + .build(); Options options = new Options(); options.addOption(helpOpt); @@ -103,20 +98,20 @@ public int run(String[] args) throws Exception { options.addOption(modeOpt); options.addOption(numReducers); - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); CommandLine cli; try { cli = parser.parse(options, args); } catch (MissingOptionException e) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("CrawlCompletionStats", options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("CrawlCompletionStats", "", options, "", true); return 1; } if (cli.hasOption("help")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("CrawlCompletionStats", options, true); + HelpFormatter formatter = HelpFormatter.builder().get(); + formatter.printHelp("CrawlCompletionStats", "", options, "", true); return 1; } diff --git a/src/java/org/apache/nutch/util/NutchJob.java b/src/java/org/apache/nutch/util/NutchJob.java index 25b8945504..8a34a599c6 100644 --- a/src/java/org/apache/nutch/util/NutchJob.java +++ b/src/java/org/apache/nutch/util/NutchJob.java @@ -23,7 +23,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; -import org.apache.nutch.plugin.PluginRepository; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,25 +34,10 @@ public class NutchJob extends Job { private static final String JOB_FAILURE_LOG_FORMAT = "%s job did not succeed, job id: %s, job status: %s, reason: %s"; - /** - * @deprecated, use instead {@link #getInstance(Configuration)} or - * {@link Job#getInstance(Configuration, String)}. - * - * @param conf - * configuration for the job - * @param jobName - * name of the job - * @throws IOException - * see {@link Job#Job(Configuration, String)} - */ - @Deprecated - public NutchJob(Configuration conf, String jobName) throws IOException { - super(conf, jobName); - if (conf != null) { - // initialize plugins early to register URL stream handlers to support - // custom protocol implementations - PluginRepository.get(conf); - } + /** Default constructor for subclass or reflection use. */ + @SuppressWarnings("deprecation") + public NutchJob() throws IOException { + super(new Configuration()); } /** diff --git a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java index e7da55f95a..dac1818182 100644 --- a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java +++ b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java @@ -44,6 +44,7 @@ import com.maxmind.geoip2.record.Postal; import com.maxmind.geoip2.record.RepresentedCountry; import com.maxmind.geoip2.record.Subdivision; +import com.maxmind.geoip2.record.Anonymizer; import com.maxmind.geoip2.record.Traits; /** @@ -114,8 +115,8 @@ public static NutchDocument createDocFromAnonymousIpDb(String serverIp, Optional opt = reader.tryAnonymousIp(InetAddress.getByName(serverIp)); if (opt.isPresent()) { AnonymousIpResponse response = opt.get(); - addIfNotDuplicate(doc, "ip", response.getIpAddress()); - addIfNotNull(doc, ANONYMOUS_NETWORK_ADDRESS, response.getNetwork().toString()); + addIfNotDuplicate(doc, "ip", response.ipAddress()); + addIfNotNull(doc, ANONYMOUS_NETWORK_ADDRESS, response.network().toString()); addIfNotNull(doc, "isAnonymous", response.isAnonymous()); addIfNotNull(doc, "isAnonymousVpn", response.isAnonymousVpn()); addIfNotNull(doc, "isHostingProxy", response.isHostingProvider()); @@ -143,10 +144,10 @@ public static NutchDocument createDocFromAsnDb(String serverIp, Optional opt = reader.tryAsn(InetAddress.getByName(serverIp)); if (opt.isPresent()) { AsnResponse response = opt.get(); - addIfNotDuplicate(doc, "ip", response.getIpAddress()); - addIfNotNull(doc, ASN_NETWORK_ADDRESS, response.getNetwork().toString()); - addIfNotNull(doc, "autonomousSystemNumber", response.getAutonomousSystemNumber()); - addIfNotNull(doc, "autonomousSystemOrganization", response.getAutonomousSystemOrganization()); + addIfNotDuplicate(doc, "ip", response.ipAddress()); + addIfNotNull(doc, ASN_NETWORK_ADDRESS, response.network().toString()); + addIfNotNull(doc, "autonomousSystemNumber", response.autonomousSystemNumber()); + addIfNotNull(doc, "autonomousSystemOrganization", response.autonomousSystemOrganization()); } else { LOG.debug("'{}' IP address not found in ASN DB.", serverIp); } @@ -176,75 +177,69 @@ public static NutchDocument createDocFromCityDb(String serverIp, } private static NutchDocument processCityDocument(NutchDocument doc, CityResponse response) { - City city = response.getCity(); - addIfNotNull(doc, "cityName", city.getName()); - addIfNotNull(doc, "cityConfidence", city.getConfidence()); - addIfNotNull(doc, "cityGeoNameId", city.getGeoNameId()); - - Continent continent = response.getContinent(); - addIfNotNull(doc, "continentCode", continent.getCode()); - addIfNotNull(doc, "continentGeoNameId", continent.getGeoNameId()); - addIfNotNull(doc, "continentName", continent.getName()); - - Country country = response.getRegisteredCountry(); - addIfNotNull(doc, "countryIsoCode", country.getIsoCode()); - addIfNotNull(doc, "countryName", country.getName()); - addIfNotNull(doc, "countryConfidence", country.getConfidence()); - addIfNotNull(doc, "countryGeoNameId", country.getGeoNameId()); + City city = response.city(); + addIfNotNull(doc, "cityName", city.name()); + addIfNotNull(doc, "cityConfidence", city.confidence()); + addIfNotNull(doc, "cityGeoNameId", city.geonameId()); + + Continent continent = response.continent(); + addIfNotNull(doc, "continentCode", continent.code()); + addIfNotNull(doc, "continentGeoNameId", continent.geonameId()); + addIfNotNull(doc, "continentName", continent.name()); + + Country country = response.registeredCountry(); + addIfNotNull(doc, "countryIsoCode", country.isoCode()); + addIfNotNull(doc, "countryName", country.name()); + addIfNotNull(doc, "countryConfidence", country.confidence()); + addIfNotNull(doc, "countryGeoNameId", country.geonameId()); addIfNotNull(doc, "countryInEuropeanUnion", country.isInEuropeanUnion()); - Location location = response.getLocation(); - if (location.getLatitude() != null && location.getLongitude() != null) { - addIfNotNull(doc, "latLon", location.getLatitude() + "," + location.getLongitude()); + Location location = response.location(); + if (location.latitude() != null && location.longitude() != null) { + addIfNotNull(doc, "latLon", location.latitude() + "," + location.longitude()); } - addIfNotNull(doc, "accuracyRadius", location.getAccuracyRadius()); - addIfNotNull(doc, "timeZone", location.getTimeZone()); - addIfNotNull(doc, "populationDensity", location.getPopulationDensity()); - - Postal postal = response.getPostal(); - addIfNotNull(doc, "postalCode", postal.getCode()); - addIfNotNull(doc, "postalConfidence", postal.getConfidence()); - - RepresentedCountry rCountry = response.getRepresentedCountry(); - addIfNotNull(doc, "countryType", rCountry.getType()); - - Subdivision mostSubdivision = response.getMostSpecificSubdivision(); - addIfNotNull(doc, "mostSpecificSubDivName", mostSubdivision.getName()); - addIfNotNull(doc, "mostSpecificSubDivIsoCode", mostSubdivision.getIsoCode()); - addIfNotNull(doc, "mostSpecificSubDivConfidence", mostSubdivision.getConfidence()); - addIfNotNull(doc, "mostSpecificSubDivGeoNameId", mostSubdivision.getGeoNameId()); - - Subdivision leastSubdivision = response.getLeastSpecificSubdivision(); - addIfNotNull(doc, "leastSpecificSubDivName", leastSubdivision.getName()); - addIfNotNull(doc, "leastSpecificSubDivIsoCode", leastSubdivision.getIsoCode()); - addIfNotNull(doc, "leastSpecificSubDivConfidence", leastSubdivision.getConfidence()); - addIfNotNull(doc, "leastSpecificSubDivGeoNameId", leastSubdivision.getGeoNameId()); - - Traits traits = response.getTraits(); - addIfNotNull(doc, "autonomousSystemNumber", traits.getAutonomousSystemNumber()); - addIfNotNull(doc, "autonomousSystemOrganization", traits.getAutonomousSystemOrganization()); - if (traits.getConnectionType() != null) { - addIfNotNull(doc, "connectionType", traits.getConnectionType().toString()); + addIfNotNull(doc, "accuracyRadius", location.accuracyRadius()); + addIfNotNull(doc, "timeZone", location.timeZone()); + addIfNotNull(doc, "populationDensity", location.populationDensity()); + + Postal postal = response.postal(); + addIfNotNull(doc, "postalCode", postal.code()); + addIfNotNull(doc, "postalConfidence", postal.confidence()); + + RepresentedCountry rCountry = response.representedCountry(); + addIfNotNull(doc, "countryType", rCountry.type()); + + Subdivision mostSubdivision = response.mostSpecificSubdivision(); + addIfNotNull(doc, "mostSpecificSubDivName", mostSubdivision.name()); + addIfNotNull(doc, "mostSpecificSubDivIsoCode", mostSubdivision.isoCode()); + addIfNotNull(doc, "mostSpecificSubDivConfidence", mostSubdivision.confidence()); + addIfNotNull(doc, "mostSpecificSubDivGeoNameId", mostSubdivision.geonameId()); + + Subdivision leastSubdivision = response.leastSpecificSubdivision(); + addIfNotNull(doc, "leastSpecificSubDivName", leastSubdivision.name()); + addIfNotNull(doc, "leastSpecificSubDivIsoCode", leastSubdivision.isoCode()); + addIfNotNull(doc, "leastSpecificSubDivConfidence", leastSubdivision.confidence()); + addIfNotNull(doc, "leastSpecificSubDivGeoNameId", leastSubdivision.geonameId()); + + Traits traits = response.traits(); + addIfNotNull(doc, "autonomousSystemNumber", traits.autonomousSystemNumber()); + addIfNotNull(doc, "autonomousSystemOrganization", traits.autonomousSystemOrganization()); + if (traits.connectionType() != null) { + addIfNotNull(doc, "connectionType", traits.connectionType().toString()); } - addIfNotNull(doc, "domain", traits.getDomain()); - addIfNotNull(doc, "isp", traits.getIsp()); - addIfNotNull(doc, "mobileCountryCode", traits.getMobileCountryCode()); - addIfNotNull(doc, "mobileNetworkCode", traits.getMobileNetworkCode()); - if (traits.getNetwork() != null) { - addIfNotNull(doc, CITY_NETWORK_ADDRESS, traits.getNetwork().toString()); + addIfNotNull(doc, "domain", traits.domain()); + addIfNotNull(doc, "isp", traits.isp()); + addIfNotNull(doc, "mobileCountryCode", traits.mobileCountryCode()); + addIfNotNull(doc, "mobileNetworkCode", traits.mobileNetworkCode()); + if (traits.network() != null) { + addIfNotNull(doc, CITY_NETWORK_ADDRESS, traits.network().toString()); } - addIfNotNull(doc, "organization", traits.getOrganization()); - addIfNotNull(doc, "staticIpScore", traits.getStaticIpScore()); - addIfNotNull(doc, "userCount", traits.getUserCount()); - addIfNotNull(doc, "userType", traits.getUserType()); - addIfNotNull(doc, "isAnonymous", traits.isAnonymous()); - addIfNotNull(doc, "isAnonymousVpn", traits.isAnonymousVpn()); + addIfNotNull(doc, "organization", traits.organization()); + addIfNotNull(doc, "staticIpScore", traits.staticIpScore()); + addIfNotNull(doc, "userCount", traits.userCount()); + addIfNotNull(doc, "userType", traits.userType()); addIfNotNull(doc, "isAnycast", traits.isAnycast()); - addIfNotNull(doc, "isHostingProvider", traits.isHostingProvider()); addIfNotNull(doc, "isLegitimateProxy", traits.isLegitimateProxy()); - addIfNotNull(doc, "isPublicProxy", traits.isPublicProxy()); - addIfNotNull(doc, "isResidentialProxy", traits.isResidentialProxy()); - addIfNotNull(doc, "isTorExitNode", traits.isTorExitNode()); return doc; } @@ -264,12 +259,12 @@ public static NutchDocument createDocFromConnectionDb(String serverIp, .getByName(serverIp)); if (opt.isPresent()) { ConnectionTypeResponse response = opt.get(); - addIfNotDuplicate(doc, "ip", response.getIpAddress()); - if (response.getConnectionType() != null) { - addIfNotNull(doc, "connectionType", response.getConnectionType().toString()); + addIfNotDuplicate(doc, "ip", response.ipAddress()); + if (response.connectionType() != null) { + addIfNotNull(doc, "connectionType", response.connectionType().toString()); } - if (response.getNetwork() != null) { - addIfNotNull(doc, CONNECTION_NETWORK_ADDRESS, response.getNetwork().toString()); + if (response.network() != null) { + addIfNotNull(doc, CONNECTION_NETWORK_ADDRESS, response.network().toString()); } } else { LOG.debug("'{}' IP address not found in Connection DB.", serverIp); @@ -295,33 +290,27 @@ public static NutchDocument createDocFromCountryDb(String serverIp, CountryResponse response = opt.get(); addIfNotDuplicate(doc, "ip", serverIp); - Continent continent = response.getContinent(); - addIfNotDuplicate(doc, "continentCode", continent.getCode()); - addIfNotDuplicate(doc, "continentGeoNameId", continent.getGeoNameId()); - addIfNotDuplicate(doc, "continentName", continent.getName()); + Continent continent = response.continent(); + addIfNotDuplicate(doc, "continentCode", continent.code()); + addIfNotDuplicate(doc, "continentGeoNameId", continent.geonameId()); + addIfNotDuplicate(doc, "continentName", continent.name()); - Country country = response.getRegisteredCountry(); - addIfNotDuplicate(doc, "countryIsoCode", country.getIsoCode()); - addIfNotDuplicate(doc, "countryName", country.getName()); - addIfNotDuplicate(doc, "countryConfidence", country.getConfidence()); - addIfNotDuplicate(doc, "countryGeoNameId", country.getGeoNameId()); + Country country = response.registeredCountry(); + addIfNotDuplicate(doc, "countryIsoCode", country.isoCode()); + addIfNotDuplicate(doc, "countryName", country.name()); + addIfNotDuplicate(doc, "countryConfidence", country.confidence()); + addIfNotDuplicate(doc, "countryGeoNameId", country.geonameId()); addIfNotDuplicate(doc, "countryInEuropeanUnion", country.isInEuropeanUnion()); - RepresentedCountry rCountry = response.getRepresentedCountry(); - addIfNotDuplicate(doc, "countryType", rCountry.getType()); + RepresentedCountry rCountry = response.representedCountry(); + addIfNotDuplicate(doc, "countryType", rCountry.type()); - Traits traits = response.getTraits(); - if (traits.getNetwork() != null) { - addIfNotNull(doc, COUNTRY_NETWORK_ADDRESS, traits.getNetwork().toString()); + Traits traits = response.traits(); + if (traits.network() != null) { + addIfNotNull(doc, COUNTRY_NETWORK_ADDRESS, traits.network().toString()); } - addIfNotDuplicate(doc, "isAnonymous", traits.isAnonymous()); - addIfNotDuplicate(doc, "isAnonymousVpn", traits.isAnonymousVpn()); addIfNotDuplicate(doc, "isAnycast", traits.isAnycast()); - addIfNotDuplicate(doc, "isHostingProvider", traits.isHostingProvider()); addIfNotDuplicate(doc, "isLegitimateProxy", traits.isLegitimateProxy()); - addIfNotDuplicate(doc, "isPublicProxy", traits.isPublicProxy()); - addIfNotDuplicate(doc, "isResidentialProxy", traits.isResidentialProxy()); - addIfNotDuplicate(doc, "isTorExitNode", traits.isTorExitNode()); } else { LOG.debug("'{}' IP address not found in Country DB.", serverIp); } @@ -343,9 +332,9 @@ public static NutchDocument createDocFromDomainDb(String serverIp, Optional opt = reader.tryDomain(InetAddress.getByName(serverIp)); if (opt.isPresent()) { DomainResponse response = opt.get(); - addIfNotDuplicate(doc, "ip", response.getIpAddress()); - addIfNotNull(doc, "domain", response.getDomain()); - addIfNotNull(doc, DOMAIN_NETWORK_ADDRESS, response.getNetwork().toString()); + addIfNotDuplicate(doc, "ip", response.ipAddress()); + addIfNotNull(doc, "domain", response.domain()); + addIfNotNull(doc, DOMAIN_NETWORK_ADDRESS, response.network().toString()); } else { LOG.debug("'{}' IP address not found in Domain DB.", serverIp); } @@ -369,75 +358,76 @@ public static NutchDocument createDocFromInsightsService(String serverIp, } private static NutchDocument processInsightsDocument(NutchDocument doc, InsightsResponse response) { - City city = response.getCity(); - addIfNotNull(doc, "cityName", city.getName()); - addIfNotNull(doc, "cityConfidence", city.getConfidence()); - addIfNotNull(doc, "cityGeoNameId", city.getGeoNameId()); - - Continent continent = response.getContinent(); - addIfNotNull(doc, "continentCode", continent.getCode()); - addIfNotNull(doc, "continentGeoNameId", continent.getGeoNameId()); - addIfNotNull(doc, "continentName", continent.getName()); - - Country country = response.getRegisteredCountry(); - addIfNotNull(doc, "countryIsoCode", country.getIsoCode()); - addIfNotNull(doc, "countryName", country.getName()); - addIfNotNull(doc, "countryConfidence", country.getConfidence()); - addIfNotNull(doc, "countryGeoNameId", country.getGeoNameId()); + City city = response.city(); + addIfNotNull(doc, "cityName", city.name()); + addIfNotNull(doc, "cityConfidence", city.confidence()); + addIfNotNull(doc, "cityGeoNameId", city.geonameId()); + + Continent continent = response.continent(); + addIfNotNull(doc, "continentCode", continent.code()); + addIfNotNull(doc, "continentGeoNameId", continent.geonameId()); + addIfNotNull(doc, "continentName", continent.name()); + + Country country = response.registeredCountry(); + addIfNotNull(doc, "countryIsoCode", country.isoCode()); + addIfNotNull(doc, "countryName", country.name()); + addIfNotNull(doc, "countryConfidence", country.confidence()); + addIfNotNull(doc, "countryGeoNameId", country.geonameId()); addIfNotNull(doc, "countryInEuropeanUnion", country.isInEuropeanUnion()); - Location location = response.getLocation(); - if (location.getLatitude() != null && location.getLongitude() != null) { - addIfNotNull(doc, "latLon", location.getLatitude() + "," + location.getLongitude()); + Location location = response.location(); + if (location.latitude() != null && location.longitude() != null) { + addIfNotNull(doc, "latLon", location.latitude() + "," + location.longitude()); } - addIfNotNull(doc, "accuracyRadius", location.getAccuracyRadius()); - addIfNotNull(doc, "timeZone", location.getTimeZone()); - addIfNotNull(doc, "populationDensity", location.getPopulationDensity()); - - Postal postal = response.getPostal(); - addIfNotNull(doc, "postalCode", postal.getCode()); - addIfNotNull(doc, "postalConfidence", postal.getConfidence()); - - RepresentedCountry rCountry = response.getRepresentedCountry(); - addIfNotNull(doc, "countryType", rCountry.getType()); - - Subdivision mostSubdivision = response.getMostSpecificSubdivision(); - addIfNotNull(doc, "mostSpecificSubDivName", mostSubdivision.getName()); - addIfNotNull(doc, "mostSpecificSubDivIsoCode", mostSubdivision.getIsoCode()); - addIfNotNull(doc, "mostSpecificSubDivConfidence", mostSubdivision.getConfidence()); - addIfNotNull(doc, "mostSpecificSubDivGeoNameId", mostSubdivision.getGeoNameId()); - - Subdivision leastSubdivision = response.getLeastSpecificSubdivision(); - addIfNotNull(doc, "leastSpecificSubDivName", leastSubdivision.getName()); - addIfNotNull(doc, "leastSpecificSubDivIsoCode", leastSubdivision.getIsoCode()); - addIfNotNull(doc, "leastSpecificSubDivConfidence", leastSubdivision.getConfidence()); - addIfNotNull(doc, "leastSpecificSubDivGeoNameId", leastSubdivision.getGeoNameId()); - - Traits traits = response.getTraits(); - addIfNotNull(doc, "autonomousSystemNumber", traits.getAutonomousSystemNumber()); - addIfNotNull(doc, "autonomousSystemOrganization", traits.getAutonomousSystemOrganization()); - if (traits.getConnectionType() != null) { - addIfNotNull(doc, "connectionType", traits.getConnectionType().toString()); + addIfNotNull(doc, "accuracyRadius", location.accuracyRadius()); + addIfNotNull(doc, "timeZone", location.timeZone()); + addIfNotNull(doc, "populationDensity", location.populationDensity()); + + Postal postal = response.postal(); + addIfNotNull(doc, "postalCode", postal.code()); + addIfNotNull(doc, "postalConfidence", postal.confidence()); + + RepresentedCountry rCountry = response.representedCountry(); + addIfNotNull(doc, "countryType", rCountry.type()); + + Subdivision mostSubdivision = response.mostSpecificSubdivision(); + addIfNotNull(doc, "mostSpecificSubDivName", mostSubdivision.name()); + addIfNotNull(doc, "mostSpecificSubDivIsoCode", mostSubdivision.isoCode()); + addIfNotNull(doc, "mostSpecificSubDivConfidence", mostSubdivision.confidence()); + addIfNotNull(doc, "mostSpecificSubDivGeoNameId", mostSubdivision.geonameId()); + + Subdivision leastSubdivision = response.leastSpecificSubdivision(); + addIfNotNull(doc, "leastSpecificSubDivName", leastSubdivision.name()); + addIfNotNull(doc, "leastSpecificSubDivIsoCode", leastSubdivision.isoCode()); + addIfNotNull(doc, "leastSpecificSubDivConfidence", leastSubdivision.confidence()); + addIfNotNull(doc, "leastSpecificSubDivGeoNameId", leastSubdivision.geonameId()); + + Traits traits = response.traits(); + addIfNotNull(doc, "autonomousSystemNumber", traits.autonomousSystemNumber()); + addIfNotNull(doc, "autonomousSystemOrganization", traits.autonomousSystemOrganization()); + if (traits.connectionType() != null) { + addIfNotNull(doc, "connectionType", traits.connectionType().toString()); } - addIfNotNull(doc, "domain", traits.getDomain()); - addIfNotNull(doc, "isp", traits.getIsp()); - addIfNotNull(doc, "mobileCountryCode", traits.getMobileCountryCode()); - addIfNotNull(doc, "mobileNetworkCode", traits.getMobileNetworkCode()); - if (traits.getNetwork() != null) { - addIfNotNull(doc, INSIGHTS_NETWORK_ADDRESS, traits.getNetwork().toString()); + addIfNotNull(doc, "domain", traits.domain()); + addIfNotNull(doc, "isp", traits.isp()); + addIfNotNull(doc, "mobileCountryCode", traits.mobileCountryCode()); + addIfNotNull(doc, "mobileNetworkCode", traits.mobileNetworkCode()); + if (traits.network() != null) { + addIfNotNull(doc, INSIGHTS_NETWORK_ADDRESS, traits.network().toString()); } - addIfNotNull(doc, "organization", traits.getOrganization()); - addIfNotNull(doc, "staticIpScore", traits.getStaticIpScore()); - addIfNotNull(doc, "userCount", traits.getUserCount()); - addIfNotNull(doc, "userType", traits.getUserType()); - addIfNotNull(doc, "isAnonymous", traits.isAnonymous()); - addIfNotNull(doc, "isAnonymousVpn", traits.isAnonymousVpn()); + addIfNotNull(doc, "organization", traits.organization()); + addIfNotNull(doc, "staticIpScore", traits.staticIpScore()); + addIfNotNull(doc, "userCount", traits.userCount()); + addIfNotNull(doc, "userType", traits.userType()); addIfNotNull(doc, "isAnycast", traits.isAnycast()); - addIfNotNull(doc, "isHostingProvider", traits.isHostingProvider()); addIfNotNull(doc, "isLegitimateProxy", traits.isLegitimateProxy()); - addIfNotNull(doc, "isPublicProxy", traits.isPublicProxy()); - addIfNotNull(doc, "isResidentialProxy", traits.isResidentialProxy()); - addIfNotNull(doc, "isTorExitNode", traits.isTorExitNode()); + Anonymizer anonymizer = response.anonymizer(); + addIfNotNull(doc, "isAnonymous", anonymizer.isAnonymous()); + addIfNotNull(doc, "isAnonymousVpn", anonymizer.isAnonymousVpn()); + addIfNotNull(doc, "isHostingProvider", anonymizer.isHostingProvider()); + addIfNotNull(doc, "isPublicProxy", anonymizer.isPublicProxy()); + addIfNotNull(doc, "isResidentialProxy", anonymizer.isResidentialProxy()); + addIfNotNull(doc, "isTorExitNode", anonymizer.isTorExitNode()); return doc; } @@ -456,11 +446,11 @@ public static NutchDocument createDocFromIspDb(String serverIp, Optional opt = reader.tryIsp(InetAddress.getByName(serverIp)); if (opt.isPresent()) { IspResponse response = opt.get(); - addIfNotDuplicate(doc, "ip", response.getIpAddress()); - addIfNotNull(doc, "autonSystemNum", response.getAutonomousSystemNumber()); - addIfNotNull(doc, "autonSystemOrg", response.getAutonomousSystemOrganization()); - addIfNotNull(doc, "isp", response.getIsp()); - addIfNotNull(doc, "org", response.getOrganization()); + addIfNotDuplicate(doc, "ip", response.ipAddress()); + addIfNotNull(doc, "autonSystemNum", response.autonomousSystemNumber()); + addIfNotNull(doc, "autonSystemOrg", response.autonomousSystemOrganization()); + addIfNotNull(doc, "isp", response.isp()); + addIfNotNull(doc, "org", response.organization()); } else { LOG.debug("'{}' IP address not found in ISP DB.", serverIp); } diff --git a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java index ecff85743a..501b95b546 100644 --- a/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java +++ b/src/plugin/indexer-cloudsearch/src/java/org/apache/nutch/indexwriter/cloudsearch/CloudSearchIndexWriter.java @@ -87,15 +87,8 @@ public class CloudSearchIndexWriter implements IndexWriter { private String endpoint; private String regionName; - @Override - public void open(Configuration conf, String name) throws IOException { - //Implementation not required - } - @Override public void open(IndexWriterParams parameters) throws IOException { - // LOG.debug("CloudSearchIndexWriter.open() name={} ", name); - endpoint = parameters.get(CloudSearchConstants.ENDPOINT); dumpBatchFilesToTemp = parameters .getBoolean(CloudSearchConstants.BATCH_DUMP, false); @@ -114,8 +107,7 @@ public void open(IndexWriterParams parameters) throws IOException { buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('['); if (dumpBatchFilesToTemp) { - // only dumping to local file - // no more config required + // only dumping to local file no more config required return; } diff --git a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java index a0ea4a738e..30d09d7502 100644 --- a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java +++ b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java @@ -191,11 +191,6 @@ protected int find(String value, int start) { private Path csvLocalOutFile; - @Override - public void open(Configuration conf, String name) throws IOException { - - } - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java index b24aa632d4..5fa7f10073 100644 --- a/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java +++ b/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java @@ -49,11 +49,6 @@ public class DummyIndexWriter implements IndexWriter { private boolean delete = false; private String path; - @Override - public void open(Configuration conf, String name) throws IOException { - //Implementation not required - } - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java index 84978dbc22..0c8450e579 100644 --- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java +++ b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java @@ -101,11 +101,6 @@ public class ElasticIndexWriter implements IndexWriter { private Configuration config; - @Override - public void open(Configuration conf, String name) throws IOException { - // Implementation not required - } - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/plugin/indexer-kafka/src/java/org/apache/nutch/indexwriter/kafka/KafkaIndexWriter.java b/src/plugin/indexer-kafka/src/java/org/apache/nutch/indexwriter/kafka/KafkaIndexWriter.java index 2fcf6de872..31514b01f7 100644 --- a/src/plugin/indexer-kafka/src/java/org/apache/nutch/indexwriter/kafka/KafkaIndexWriter.java +++ b/src/plugin/indexer-kafka/src/java/org/apache/nutch/indexwriter/kafka/KafkaIndexWriter.java @@ -65,11 +65,6 @@ public class KafkaIndexWriter implements IndexWriter { private JsonNode json = null; private List> inputDocs = null; - - @Override - public void open(Configuration job, String name) throws IOException { - //Implementation not required - } @Override public void open(IndexWriterParams params) throws IOException { diff --git a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java index a51004ebc3..ea1b98f12f 100644 --- a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java +++ b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java @@ -109,12 +109,6 @@ public class OpenSearch1xIndexWriter implements IndexWriter { private Configuration config; - - @Override - public void open(Configuration conf, String name) throws IOException { - // Implementation not required - } - /** * Initializes the internal variables from a given index writer * configuration. diff --git a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java index 37acf12a15..02d280813c 100644 --- a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java +++ b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java @@ -78,11 +78,6 @@ public void setConf(Configuration conf) { config = conf; } - @Override - public void open(Configuration conf, String name) throws IOException { - //Implementation not required - } - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java index bd2a518246..09ec93acbd 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java @@ -75,11 +75,6 @@ public class SolrIndexWriter implements IndexWriter { private String authHeaderName; private String authHeaderValue; - @Override - public void open(Configuration conf, String name) { - // Implementation not required - } - /** * Initializes the internal variables from a given index writer configuration. * diff --git a/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java b/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java index 20049d2fc6..dc35dc7b48 100644 --- a/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java +++ b/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java @@ -154,53 +154,4 @@ public void testCrawlDelay() { assertTrue((rules.getCrawlDelay() == Long.MIN_VALUE), "testing crawl delay for agent " + UNKNOWN_AGENT + " : "); } - - /** - * Test that the robots rules are interpreted correctly by the robots rules - * parser. - */ - @Deprecated - @Test - public void testRobotsAgentDeprecatedAPIMethod() { - rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, SINGLE_AGENT1); - testRulesOnPaths(SINGLE_AGENT1, TEST_PATHS, RESULTS_AGENT1); - - rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, SINGLE_AGENT2); - testRulesOnPaths(SINGLE_AGENT2, TEST_PATHS, RESULTS_AGENT2); - - rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, MULTIPLE_AGENTS); - testRulesOnPaths(MULTIPLE_AGENTS, TEST_PATHS, RESULTS_AGENT1_AND_AGENT2); - } - - /** - * Test that the crawl delay is extracted from the robots file for respective - * agent. If its not specified for a given agent, default value must be - * returned. - */ - @Deprecated - @Test - public void testCrawlDelayDeprecatedAPIMethod() { - // for SINGLE_AGENT1, the crawl delay of 10 seconds, i.e. 10000 msec must be - // returned by the parser - rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, SINGLE_AGENT1); - assertTrue((rules.getCrawlDelay() == 10000), - "testing crawl delay for agent " + SINGLE_AGENT1 + " : "); - - // for SINGLE_AGENT2, the crawl delay of 20 seconds, i.e. 20000 msec must be - // returned by the parser - rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, SINGLE_AGENT2); - assertTrue((rules.getCrawlDelay() == 20000), - "testing crawl delay for agent " + SINGLE_AGENT2 + " : "); - - // for UNKNOWN_AGENT, the default crawl delay must be returned. - rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), - CONTENT_TYPE, UNKNOWN_AGENT); - assertTrue((rules.getCrawlDelay() == Long.MIN_VALUE), - "testing crawl delay for agent " + UNKNOWN_AGENT + " : "); - } } diff --git a/src/plugin/mimetype-filter/src/java/org/apache/nutch/indexer/filter/MimeTypeIndexingFilter.java b/src/plugin/mimetype-filter/src/java/org/apache/nutch/indexer/filter/MimeTypeIndexingFilter.java index 9fb0b43fdd..b34fa050ae 100644 --- a/src/plugin/mimetype-filter/src/java/org/apache/nutch/indexer/filter/MimeTypeIndexingFilter.java +++ b/src/plugin/mimetype-filter/src/java/org/apache/nutch/indexer/filter/MimeTypeIndexingFilter.java @@ -16,17 +16,15 @@ */ package org.apache.nutch.indexer.filter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; import org.apache.commons.cli.UnrecognizedOptionException; +import org.apache.commons.cli.HelpFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; @@ -67,7 +65,6 @@ /** * An {@link org.apache.nutch.indexer.IndexingFilter} that allows filtering * of documents based on the MIME Type detected by Tika - * */ public class MimeTypeIndexingFilter implements IndexingFilter { @@ -139,9 +136,8 @@ public void setConf(Configuration conf) { if (file != null) { if (file.isEmpty()) { - LOG.warn(String - .format("Missing %s property, ALL mimetypes will be allowed", - MIMEFILTER_REGEX_FILE)); + LOG.warn("Missing {} property, ALL mimetypes will be allowed", + MIMEFILTER_REGEX_FILE); } else { Reader reader = conf.getConfResourceAsReader(file); @@ -196,38 +192,34 @@ public Configuration getConf() { * Main method for invoking this tool * @param args run with no arguments to print help * @throws IOException if there is a fatal I/O error processing the input args - * @throws IndexingException if there is a fatal error whils indexing + * @throws IndexingException if there is a fatal error whilst indexing */ public static void main(String[] args) throws IOException, IndexingException { - Option helpOpt = new Option("h", "help", false, "show this help message"); - @SuppressWarnings("static-access") - Option rulesOpt = OptionBuilder.withArgName("file").hasArg() - .withDescription( - "Rules file to be used in the tests relative to the conf directory") - .isRequired().create("rules"); + Option helpOpt = new Option("h", "help", false, "Show this help message."); + Option rulesOpt = Option.builder("rules").hasArg().argName("file") + .desc("Rules file to be used in the tests relative to the conf directory.") + .required().build(); Options options = new Options(); options.addOption(helpOpt).addOption(rulesOpt); - CommandLineParser parser = new GnuParser(); - HelpFormatter formatter = new HelpFormatter(); + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = HelpFormatter.builder().get(); String rulesFile; try { CommandLine line = parser.parse(options, args); if (line.hasOption("help") || !line.hasOption("rules")) { - formatter - .printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", - options, true); + formatter.printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", + "", options, "", true); return; } rulesFile = line.getOptionValue("rules"); } catch (UnrecognizedOptionException e) { - formatter - .printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", - options, true); + formatter.printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", + "", options, "", true); return; } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java index 8cf58f75e7..abf46e49f9 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java @@ -187,8 +187,10 @@ public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) { } } - @Override - protected void finalize() { + /** + * Disconnects the FTP client. Should be called explicitly when done with this instance. + */ + public void disconnect() { try { if (this.client != null && this.client.isConnected()) { this.client.logout(); diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java index 3afcbb25cf..02300f5e63 100644 --- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java +++ b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java @@ -41,7 +41,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.nutch.net.protocols.HttpDateFormat; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.ProtocolException; @@ -62,7 +62,7 @@ public class HttpResponse implements Response { private String base; private byte[] content; private int code; - private Metadata headers = new SpellCheckedMetadata(); + private Metadata headers = new CaseInsensitiveMetadata(); // used for storing the http headers verbatim private StringBuffer httpHeaders; diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java index 377b784c57..93c353b137 100644 --- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java +++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java @@ -40,7 +40,7 @@ import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.HttpHeaders; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.nutch.net.protocols.HttpDateFormat; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.ProtocolException; @@ -56,7 +56,7 @@ public class HttpResponse implements Response { private URL url; private byte[] content; private int code; - private Metadata headers = new SpellCheckedMetadata(); + private Metadata headers = new CaseInsensitiveMetadata(); // used for storing the http headers verbatim private StringBuffer httpHeaders; diff --git a/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestResponse.java b/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestResponse.java index 1126e8e84a..abe4f2076f 100644 --- a/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestResponse.java +++ b/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestResponse.java @@ -116,8 +116,8 @@ public void testGetHeader() throws Exception { "Testing non-standard HTTP header \"MyCustomHeader\": only exact matching"); headerTest(200, "MyCustomHeader", value, "MyCustomHeader"); /* - * The following case-insensitive or approximate look-ups are not supported - * for non-standard headers by SpellCheckedMetadata: + * Case-insensitive look-up for non-standard headers is supported by + * CaseInsensitiveMetadata; typo-tolerant look-up is not. */ // testHeader(200, "MyCustomHeader", value, "mycustomheader"); // testHeader(200, "mycustomheader", value, "MyCustomHeader"); diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java index 87ee0bb8ac..a72babed22 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java @@ -32,7 +32,7 @@ import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.nutch.net.protocols.HttpDateFormat; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.http.api.HttpBase; @@ -48,7 +48,7 @@ public class HttpResponse implements Response { private URL url; private byte[] content; private int code; - private Metadata headers = new SpellCheckedMetadata(); + private Metadata headers = new CaseInsensitiveMetadata(); /** * Fetches the given url and prepares HTTP response. diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java index 9081f14b81..79676d5629 100644 --- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java +++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java @@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.nutch.net.protocols.HttpDateFormat; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.ProtocolException; @@ -62,7 +62,7 @@ public class HttpResponse implements Response { private String base; private byte[] content; private int code; - private Metadata headers = new SpellCheckedMetadata(); + private Metadata headers = new CaseInsensitiveMetadata(); private InteractiveSeleniumHandler[] handlers; // used for storing the http headers verbatim private StringBuffer httpHeaders; diff --git a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestResponse.java b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestResponse.java index f0944ff4c5..cc91f042f0 100644 --- a/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestResponse.java +++ b/src/plugin/protocol-okhttp/src/test/org/apache/nutch/protocol/okhttp/TestResponse.java @@ -110,15 +110,15 @@ public void testGetHeader() throws Exception { headerTest(301, "Location", value, "location"); headerTest(301, "location", value, "Location"); headerTest(301, "LOCATION", value, "Location"); - // only with SpellCheckedMetadata: - // headerTest(301, "Loction", value, "Location"); + // Typo-tolerant lookup (e.g. "Loction" -> "Location") is not supported + // with CaseInsensitiveMetadata. LOG.info( - "Testing non-standard HTTP header \"MyCustomHeader\": only exact matching"); + "Testing non-standard HTTP header \"MyCustomHeader\": case-insensitive matching"); headerTest(200, "MyCustomHeader", value, "MyCustomHeader"); /* - * The following case-insensitive or approximate look-ups are not supported - * for non-standard headers by SpellCheckedMetadata: + * Case-insensitive look-up for non-standard headers is supported by + * CaseInsensitiveMetadata; typo-tolerant look-up is not. */ // testHeader(200, "MyCustomHeader", value, "mycustomheader"); // testHeader(200, "mycustomheader", value, "MyCustomHeader"); @@ -141,7 +141,7 @@ public void testMetadataBenchmark() throws MalformedURLException, ProtocolExcept headerTest(301, "Location", value, "location"); headerTest(301, "location", value, "Location"); headerTest(301, "LOCATION", value, "Location"); - // only with SpellCheckedMetadata: + // Typo-tolerant lookup not supported with CaseInsensitiveMetadata // headerTest(301, "Loction", value, "Location"); } long elapsed = System.currentTimeMillis() - start; diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java index ddfcf72417..235330538c 100644 --- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java +++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java @@ -41,7 +41,7 @@ import org.apache.hadoop.io.Text; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.nutch.net.protocols.HttpDateFormat; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.ProtocolException; @@ -58,7 +58,7 @@ public class HttpResponse implements Response { private String base; private byte[] content; private int code; - private Metadata headers = new SpellCheckedMetadata(); + private Metadata headers = new CaseInsensitiveMetadata(); // used for storing the http headers verbatim private StringBuffer httpHeaders; diff --git a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java index 581b528f31..9a06573d8a 100644 --- a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java +++ b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java @@ -263,12 +263,14 @@ public boolean getJobSetupCleanupNeeded() { @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public Path[] getLocalCacheArchives() throws IOException { return null; } @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public Path[] getLocalCacheFiles() throws IOException { return null; } @@ -350,6 +352,7 @@ public RawComparator getSortComparator() { @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public boolean getSymlink() { return false; } diff --git a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java index 1beab362be..0cf5464c9d 100644 --- a/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java +++ b/src/test/org/apache/nutch/crawl/CrawlDbUpdateUtil.java @@ -67,7 +67,7 @@ protected CrawlDbUpdateUtil(CrawlDbReducer red, Reducer.Context { - + private DummyContext() { reducer.super(); } @@ -230,12 +230,14 @@ public boolean getJobSetupCleanupNeeded() { @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public Path[] getLocalCacheArchives() throws IOException { return null; } @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public Path[] getLocalCacheFiles() throws IOException { return null; } @@ -317,6 +319,7 @@ public RawComparator getSortComparator() { @Override @Deprecated + // Implements deprecated Hadoop DistributedCache API; required by JobContext. public boolean getSymlink() { return false; } diff --git a/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java b/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java index b33d514477..db5d70ab31 100644 --- a/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java +++ b/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java @@ -31,10 +31,12 @@ /** * JUnit based tests of class * {@link org.apache.nutch.metadata.SpellCheckedMetadata}. - * + * Kept for backward compatibility until SpellCheckedMetadata is removed. + * * @author Chris Mattmann * @author Jérôme Charron */ +@SuppressWarnings("deprecation") public class TestSpellCheckedMetadata { private static final int NUM_ITERATIONS = 10000; diff --git a/src/test/org/apache/nutch/protocol/TestContent.java b/src/test/org/apache/nutch/protocol/TestContent.java index fa5c7d17da..36509945eb 100644 --- a/src/test/org/apache/nutch/protocol/TestContent.java +++ b/src/test/org/apache/nutch/protocol/TestContent.java @@ -17,7 +17,7 @@ package org.apache.nutch.protocol; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; +import org.apache.nutch.metadata.CaseInsensitiveMetadata; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.WritableTestUtils; @@ -42,7 +42,7 @@ public void testContent() throws Exception { String url = "http://www.foo.com/"; - SpellCheckedMetadata metaData = new SpellCheckedMetadata(); + Metadata metaData = new CaseInsensitiveMetadata(); metaData.add("Host", "www.foo.com"); metaData.add("Content-Type", "text/html"); @@ -52,7 +52,6 @@ public void testContent() throws Exception { WritableTestUtils.testWritable(r); assertEquals("text/html", r.getMetadata().get("Content-Type")); assertEquals("text/html", r.getMetadata().get("content-type")); - assertEquals("text/html", r.getMetadata().get("CONTENTYPE")); } /** Unit tests for getContentType(String, String, byte[]) method. */ diff --git a/src/test/org/apache/nutch/util/ReducerContextWrapper.java b/src/test/org/apache/nutch/util/ReducerContextWrapper.java index 196116c4cb..9ea4646a11 100644 --- a/src/test/org/apache/nutch/util/ReducerContextWrapper.java +++ b/src/test/org/apache/nutch/util/ReducerContextWrapper.java @@ -266,12 +266,14 @@ public boolean getJobSetupCleanupNeeded() { } @Override + @SuppressWarnings("deprecation") public Path[] getLocalCacheArchives() throws IOException { // Auto-generated return null; } @Override + @SuppressWarnings("deprecation") public Path[] getLocalCacheFiles() throws IOException { // Auto-generated return null; @@ -372,6 +374,7 @@ public RawComparator getSortComparator() { } @Override + @SuppressWarnings("deprecation") public boolean getSymlink() { // Auto-generated return false;