diff --git a/README.md b/README.md index e6777b45..54aa59ce 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,14 @@ vhost = / # Used as default for declare / delete / list for n in $(seq 1 30); do echo $n; rabbitmqadmin -c rmq.conf -N default -U rmq -p $RMQPW publish exchange=testexchange routing_key=testqueue payload="Message: ${n}" --vhost=/; done ``` +## Switching the Storage System +The Dataone Indexer can be configured to use different storage systems by setting the environmental +variable `DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME`. +By default, this variable is not set, and the indexer uses +`org.dataone.cn.indexer.object.hashstore.HashStoreObjManager`, which enables support for Hashstore. +To use the legacy storage system instead, set the variable to +`org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager`. + ## History This is a refactored version of the original DataONE [d1_cn_index_processor](https://github.com/DataONEorg/d1_cn_index_processor) that runs diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 195bf48f..e7521711 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,5 +1,27 @@ # dataone-indexer Release Notes +> [!CAUTION] +> **If upgrading from Helm chart v1.2.0 or earlier, note the section entitled: +> `Caution - ENSURE THAT THE RABBITMQ QUEUE IS EMPTY,` [in the release notes for helm chart v1.3.0 +> below!](#helm-chart-version-130)**, + +## dataone-indexer version 3.1.5 & helm chart version 1.3.2 + +### Release date: 2025-06-26 + +### dataone-indexer version 3.1.5 + +This is a patch release with the following minor fixes and upgrades + +- Dataone-indexer can handle legacy Metacat object repository ([Issue #222](https://github.com/DataONEorg/dataone-indexer/issues/222)) +- Remove some extra log statements (for version conflict retries) that are confusing to users ([Issue #243](https://github.com/DataONEorg/dataone-indexer/issues/243)) +- Indexer performance improvement: Decrease the re-try waiting time for a version conflict error ([Issue #245](https://github.com/DataONEorg/dataone-indexer/issues/245)) +- Remove unnecessary dependency on PostrgeSQL jar ([Issue #247](https://github.com/DataONEorg/dataone-indexer/issues/247)) + +### helm chart version 1.3.2 +- Bump indexer App version to 3.1.5 + + ## dataone-indexer version 3.1.4 & helm chart version 1.3.1 ### Release date: 2025-05-20 diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 62aa8ec1..d24abe8b 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -21,13 +21,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: "1.3.1" +version: "1.3.2" # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "3.1.4" +appVersion: "3.1.5" # Chart dependencies dependencies: diff --git a/helm/config/dataone-indexer.properties b/helm/config/dataone-indexer.properties index c4514edf..a572435e 100644 --- a/helm/config/dataone-indexer.properties +++ b/helm/config/dataone-indexer.properties @@ -38,7 +38,7 @@ dataone.mn.registration.serviceType.url={{ .Values.idxworker.d1_serviceType_url index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourcemapWaitMs }} index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }} -index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }} +index.solr.versionConflict.waiting.time={{ default 10 .Values.idxworker.solrVerConflictWaitMs }} index.solr.versionConflict.max.attempts={{ default 25000 .Values.idxworker.solrVerConflictMaxTries }} # Storage properties diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 9303fc66..b212fd30 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -117,7 +117,7 @@ spec: - name: IDX_JAVA_MEM value: {{ .Values.idxworker.javaMem | quote }} {{- end }} - - name: DATAONE_AUTH_TOKEN + - name: DATAONE_INDEXER_AUTH_TOKEN valueFrom: secretKeyRef: name: {{ .Release.Name }}-indexer-token diff --git a/helm/values.yaml b/helm/values.yaml index e31cdaf3..dfd1c698 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -167,7 +167,7 @@ idxworker: ## @param idxworker.solrVerConflictWaitMs wait time (mS) before indexer grabs a newer version ## of solr doc after a version conflict ## - solrVerConflictWaitMs: 1000 + solrVerConflictWaitMs: 10 ## @param idxworker.solrVerConflictMaxTries Number of tries to get a newer version of solr doc ## after a version conflict diff --git a/pom.xml b/pom.xml index d982ced3..56c8dbc5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.dataone dataone-index-worker - 3.1.4 + 3.1.5 jar dataone-index-worker http://maven.apache.org @@ -306,6 +306,12 @@ org.dataone hashstore 1.1.0 + + + org.postgresql + postgresql + + uk.org.webcompere diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 163245ab..1e412813 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -2,6 +2,7 @@ import java.io.File; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -18,14 +19,13 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; -import com.rabbitmq.client.ShutdownSignalException; import org.apache.commons.codec.EncoderException; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; import org.dataone.cn.indexer.annotation.OntologyModelService; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; import org.dataone.indexer.queue.IndexQueueMessageParser; @@ -59,7 +59,7 @@ public class IndexWorker { //The create is the index task type for the action when a new object was created. So the solr index will be generated. //delete is the index task type for the action when an object was deleted. So the solr index will be deleted //sysmeta is the index task type for the action when the system metadata of an existing object was updated. - public final static String CREATE_INDEXT_TYPE = "create"; + public final static String CREATE_INDEX_TYPE = "create"; public final static String DELETE_INDEX_TYPE = "delete"; public final static String SYSMETA_CHANGE_TYPE = "sysmeta"; //this handle for resource map only @@ -219,7 +219,10 @@ public static void loadAdditionalPropertyFile(String propertyFile) { * @throws TimeoutException * @throws ServiceFailure */ - public IndexWorker() throws IOException, TimeoutException, ServiceFailure { + public IndexWorker() + throws IOException, TimeoutException, ServiceFailure, ClassNotFoundException, + InvocationTargetException, NoSuchMethodException, InstantiationException, + IllegalAccessException { this(true); } @@ -231,7 +234,9 @@ public IndexWorker() throws IOException, TimeoutException, ServiceFailure { * @throws TimeoutException * @throws ServiceFailure */ - public IndexWorker(Boolean initialize) throws IOException, TimeoutException { + public IndexWorker(Boolean initialize) + throws IOException, TimeoutException, ClassNotFoundException, InvocationTargetException, + NoSuchMethodException, InstantiationException, IllegalAccessException { String value = System.getenv("KUBERNETES_SERVICE_HOST"); // Java doc says: the string value of the variable, or null if the variable is not defined // in the system environment @@ -243,7 +248,7 @@ public IndexWorker(Boolean initialize) throws IOException, TimeoutException { initExecutorService();//initialize the executor first initIndexQueue(); initIndexParsers(); - ObjectManager.getInstance(); + ObjectManagerFactory.getObjectManager(); OntologyModelService.getInstance(); } } @@ -385,9 +390,11 @@ public void run() { indexObject(parser, multipleThread); } } catch (InvalidRequest e) { - logger.error( - "cannot index the task for identifier " + parser.getIdentifier().getValue() - + " since " + e.getMessage()); + String error = "Cannot index the task for the object since " + e.getMessage(); + if (parser.getIdentifier() != null) { + error = error + " with the identifier " + parser.getIdentifier().getValue(); + } + logger.error(error); boolean requeue = false; rabbitMQchannel.basicReject(envelope.getDeliveryTag(), requeue); } @@ -449,21 +456,22 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread) Identifier pid = parser.getIdentifier(); String indexType = parser.getIndexType(); int priority = parser.getPriority(); + String docId = parser.getDocId();// It can be null. try { long threadId = Thread.currentThread().getId(); logger.info("IndexWorker.consumer.indexObject by multiple thread? " + multipleThread + ", with the thread id " + threadId + " - Received the index task from the index queue with the identifier: " + pid.getValue() + " , the index type: " + indexType - + ", the priority: " + priority); + + ", the priority: " + priority + ", the docId(can be null): " + docId); switch (indexType) { - case CREATE_INDEXT_TYPE -> { + case CREATE_INDEX_TYPE -> { boolean sysmetaOnly = false; - solrIndex.update(pid, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly, docId); } case SYSMETA_CHANGE_TYPE -> { boolean sysmetaOnly = true; - solrIndex.update(pid, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly, docId); } case DELETE_INDEX_TYPE -> solrIndex.remove(pid); default -> throw new InvalidRequest( @@ -482,7 +490,8 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread) ServiceFailure | XPathExpressionException | UnsupportedType | SAXException | ParserConfigurationException | SolrServerException | MarshallingException | EncoderException | InterruptedException | IOException | InstantiationException | - IllegalAccessException e) { + IllegalAccessException | ClassNotFoundException | InvocationTargetException | + NoSuchMethodException e) { logger.error("Cannot index the task for identifier " + pid.getValue() + " since " + e.getMessage(), e); } diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 2848d237..b2a5075b 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -21,7 +22,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; +import org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager; import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor; import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; import org.dataone.cn.indexer.parser.IDocumentSubprocessor; @@ -54,9 +56,9 @@ public class SolrIndex { public static final String ID = "id"; private static final String VERSION_CONFLICT = "version conflict"; private static final int VERSION_CONFLICT_MAX_ATTEMPTS = Settings.getConfiguration().getInt( - "index.solr.versionConflict.max.attempts", 25); + "index.solr.versionConflict.max.attempts", 25000); private static final int VERSION_CONFLICT_WAITING = Settings.getConfiguration().getInt( - "index.solr.versionConflict.waiting.time", 500); //milliseconds + "index.solr.versionConflict.waiting.time", 10); //milliseconds private static final List resourceMapFormatIdList = Settings.getConfiguration().getList( "index.resourcemap.namespace"); private static List subprocessors = null; @@ -138,20 +140,28 @@ public void setDeleteSubprocessors( * Generate the index for the given information * @param id the id which will be indexed * @param isSysmetaChangeOnly if this is a change on the system metadata only + * @param docId the docId (file name) of the object. This is only for LegacyObjManager * @return a map of solr doc with ids * @throws IOException * @throws XPathExpressionException - * @throws SolrServerException * @throws EncoderException + * @throws SolrServerException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException + * @throws InstantiationException + * @throws IllegalAccessException */ - private Map process(String id, boolean isSysmetaChangeOnly) - throws IOException, XPathExpressionException, EncoderException, - SolrServerException { + private Map process(String id, boolean isSysmetaChangeOnly, String docId) + throws IOException, XPathExpressionException, EncoderException, SolrServerException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException, + InstantiationException, IllegalAccessException { log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id); long start = System.currentTimeMillis(); Map docs = new HashMap<>(); // Load the System Metadata document - try (InputStream systemMetadataStream = ObjectManager.getInstance().getSystemMetadataStream(id)){ + try (InputStream systemMetadataStream = + ObjectManagerFactory.getObjectManager().getSystemMetadataStream(id)){ docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { log.error(e.getMessage(), e); @@ -179,7 +189,15 @@ private Map process(String id, boolean isSysmetaChangeOnly) + " for the id " + id); //if the objectPath is null, we should skip the other processes if (!skipOtherProcessor) { - log.debug("SolrIndex.process - Start to use subprocessor list to process " + id); + // The default object id is the identifier of the object (the hashstore case) + String objectID = id; + if (ObjectManagerFactory.getObjectManager() instanceof LegacyStoreObjManager) { + // In the LegacyStoreObjManager class, dataone-indexer uses the docid (which + // always is the file name) to get the object + objectID = docId; + } + log.debug("Start to use subprocessor list to process " + id); + log.debug("The object id for " + id + " is " + objectID); // Determine if subprocessors are available for this ID if (subprocessors != null) { // for each subprocessor loaded from the spring config @@ -188,7 +206,8 @@ private Map process(String id, boolean isSysmetaChangeOnly) if (subprocessor.canProcess(formatId)) { // if so, then extract the additional information from the // document. - try (InputStream dataStream = ObjectManager.getInstance().getObject(id)) { + try (InputStream dataStream = + ObjectManagerFactory.getObjectManager().getObject(objectID)) { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects @@ -316,20 +335,26 @@ private void checkParams(Identifier pid) throws InvalidRequest { * Insert the indexes for a document. * @param pid the id of this document * @param isSysmetaChangeOnly if this change is only for systemmetadata + * @param docId the docId (file name) of the object. This is only for LegacyObjManager * @throws IOException * @throws InvalidRequest * @throws XPathExpressionException * @throws SolrServerException * @throws EncoderException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException + * @throws InstantiationException + * @throws IllegalAccessException */ - private void insert(Identifier pid, boolean isSysmetaChangeOnly) - throws IOException, InvalidRequest, - XPathExpressionException, SolrServerException, - EncoderException { + private void insert(Identifier pid, boolean isSysmetaChangeOnly, String docId) + throws IOException, InvalidRequest, XPathExpressionException, SolrServerException, + EncoderException, ClassNotFoundException, InvocationTargetException, NoSuchMethodException, + InstantiationException, IllegalAccessException { checkParams(pid); log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue()); long start = System.currentTimeMillis(); - Map docs = process(pid.getValue(), isSysmetaChangeOnly); + Map docs = process(pid.getValue(), isSysmetaChangeOnly, docId); long end = System.currentTimeMillis(); log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds."); @@ -375,34 +400,38 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException * the index for the doc. * @param pid the identifier of object which will be indexed * @param isSysmetaChangeOnly the flag indicating if the change is system metadata only - * @throws NotFound - * @throws ServiceFailure - * @throws NotImplemented - * @throws NotAuthorized - * @throws InvalidToken - * @throws EncoderException - * @throws MarshallingException - * @throws SolrServerException - * @throws ParserConfigurationException - * @throws SAXException - * @throws UnsupportedType - * @throws XPathExpressionException - * @throws InterruptedException - * @throws IOException + * @param docId the docId (file name) of the object. This is only for LegacyObjManager + * @throws InvalidToken + * @throws NotAuthorized + * @throws NotImplemented + * @throws ServiceFailure + * @throws NotFound + * @throws XPathExpressionException + * @throws UnsupportedType + * @throws SAXException + * @throws ParserConfigurationException + * @throws SolrServerException + * @throws MarshallingException + * @throws EncoderException + * @throws InterruptedException + * @throws IOException * @throws InvalidRequest - * @throws IllegalAccessException - * @throws InstantiationException + * @throws InstantiationException + * @throws IllegalAccessException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException */ - public void update(Identifier pid, boolean isSysmetaChangeOnly) - throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, - XPathExpressionException, UnsupportedType, SAXException, - ParserConfigurationException, SolrServerException, MarshallingException, - EncoderException, InterruptedException, IOException, InvalidRequest, - InstantiationException, IllegalAccessException { + public void update(Identifier pid, boolean isSysmetaChangeOnly, String docId) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, + XPathExpressionException, UnsupportedType, SAXException, ParserConfigurationException, + SolrServerException, MarshallingException, EncoderException, InterruptedException, + IOException, InvalidRequest, InstantiationException, IllegalAccessException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException { log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object " + pid.getValue()); try { - insert(pid, isSysmetaChangeOnly); + insert(pid, isSysmetaChangeOnly, docId); } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { log.info("SolrIndex.update - Indexer grabbed an older version (version conflict) " @@ -412,14 +441,17 @@ public void update(Identifier pid, boolean isSysmetaChangeOnly) for (int i=0; i= (VERSION_CONFLICT_MAX_ATTEMPTS - 1)) { log.error("SolrIndex.update - Indexer grabbed an older version of " + "a solr doc when it processed object " + pid.getValue() diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 4a0f1b7b..7f133fd9 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -1,6 +1,5 @@ package org.dataone.cn.indexer.object; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -8,9 +7,9 @@ import java.security.NoSuchAlgorithmException; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dataone.client.D1Node; import org.dataone.client.auth.AuthTokenSession; import org.dataone.client.exception.ClientSideException; import org.dataone.client.rest.HttpMultipartRestClient; @@ -20,7 +19,6 @@ import org.dataone.client.v2.impl.MultipartMNode; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; -import org.dataone.indexer.storage.Storage; import org.dataone.service.exceptions.InvalidToken; import org.dataone.service.exceptions.NotAuthorized; import org.dataone.service.exceptions.NotFound; @@ -29,7 +27,6 @@ import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v1.Session; import org.dataone.service.types.v2.SystemMetadata; -import org.dataone.service.util.TypeMarshaller; /** @@ -37,42 +34,18 @@ * @author tao * */ -public class ObjectManager { - private static ObjectManager manager = null; - private static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); - private static String DataONEauthToken = null; +public abstract class ObjectManager { + // environmental variables' names + private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; + private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; + + protected static String nodeBaseURL; + private static String dataONEauthToken = null; private static Log logger = LogFactory.getLog(ObjectManager.class); - private static Storage storage = null; - private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN"; private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file"; - private static MultipartD1Node d1Node = null; - private static Session session = null; - - static { - try { - refreshD1Node(); - } catch (ServiceFailure e) { - logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage()); - } - storage = Storage.getInstance(); - manager = new ObjectManager(); - } - - - /** - * Private constructor - */ - private ObjectManager() { - } - - /** - * Get an ObjectManager instance through the singleton pattern. - * @return the instance of ObjectManager - */ - public static ObjectManager getInstance() { - return manager; - } + protected static MultipartD1Node d1Node = null; + protected static Session session = null; /** * Get the system metadata for the given id @@ -87,40 +60,9 @@ public static ObjectManager getInstance() { * @throws IOException * @throws NoSuchAlgorithmException */ - public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, - NotImplemented, ServiceFailure, NotFound, - NoSuchAlgorithmException, IOException, MarshallingException { - long start = System.currentTimeMillis(); - //try to get the system metadata from the storage system first - InputStream sysmetaInputStream = null; - try { - sysmetaInputStream = storage.retrieveSystemMetadata(id); - long end = System.currentTimeMillis(); - logger.info("Finish getting the system metadata via the file system for the pid " + id - + " and it took " + (end - start) + "milliseconds"); - } catch (FileNotFoundException exception ) { - if (d1Node != null) { - // Metacat can't find the system metadata from the storage system. - // So try to get it from the dataone api - SystemMetadata sysmeta = null; - Identifier identifier = new Identifier(); - identifier.setValue(id); - sysmeta = d1Node.getSystemMetadata(session, identifier); - logger.debug("Finish getting the system metadata via the DataONE API call for the pid " - + id); - if (sysmeta != null) { - ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); - TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); - sysmetaInputStream = - new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); - } - long end = System.currentTimeMillis(); - logger.info("Finish getting the system metadata via DataONE API for the pid " + id - + " and it took " + (end - start) + "milliseconds"); - } - } - return sysmetaInputStream; - } + public abstract InputStream getSystemMetadataStream(String id) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, + NoSuchAlgorithmException, IOException, MarshallingException; /** * Get the system metadata object for the given identifier @@ -137,30 +79,11 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu * @throws MarshallingException * @throws NoSuchAlgorithmException */ - public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + public abstract org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) throws InvalidToken, NotAuthorized, NoSuchAlgorithmException, NotImplemented, ServiceFailure, NotFound, InstantiationException, IllegalAccessException, - IOException, MarshallingException { - org.dataone.service.types.v1.SystemMetadata sysmeta = null; - try (InputStream input = getSystemMetadataStream(id)) { - if (input != null) { - try { - SystemMetadata sysmeta2 = TypeMarshaller - .unmarshalTypeFromStream(SystemMetadata.class, input); - sysmeta = sysmeta2; - } catch (Exception e) { - try (InputStream input2 = getSystemMetadataStream(id)) { - if (input2 != null) { - sysmeta = TypeMarshaller.unmarshalTypeFromStream( - org.dataone.service.types.v1.SystemMetadata.class, input2); - } - } - } - } - } - return sysmeta; - } + IOException, MarshallingException; /** * Get the input stream of the content of the given pid @@ -170,11 +93,11 @@ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) * @throws FileNotFoundException * @throws NoSuchAlgorithmException * @throws IOException + * @throws NotFound */ - public InputStream getObject(String pid) throws IllegalArgumentException, FileNotFoundException, - NoSuchAlgorithmException, IOException { - return storage.retrieveObject(pid); - } + public abstract InputStream getObject(String pid) + throws IllegalArgumentException, FileNotFoundException, NoSuchAlgorithmException, + IOException, NotFound; /** * Set the d1 node for this object manager. @@ -189,41 +112,60 @@ public static void setD1Node(MultipartD1Node node) { * In case the token expired, the method will retrieve the token and create a new d1 node * @throws ServiceFailure */ - private static void refreshD1Node() throws ServiceFailure { - //get the token - DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); - if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { - //can't get the token from the env variable. So try to get it from a file specified in the property + public static void refreshD1Node() throws ServiceFailure { + nodeBaseURL = System.getenv(NODE_BASE_URL_ENV_NAME); + logger.debug("The node base url from env variable is " + nodeBaseURL); + if (nodeBaseURL == null || nodeBaseURL.isBlank()) { + nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); + logger.debug("The node base url from the properties file is " + nodeBaseURL); + } + //get the token + dataONEauthToken = System.getenv(TOKEN_ENV_NAME); + if (dataONEauthToken == null || dataONEauthToken.isBlank()) { + //can't get the token from the env variable. So try to get it from a file specified + // in the property String tokenFilePath = Settings.getConfiguration().getString(TOKEN_FILE_PATH_PROP_NAME); if (tokenFilePath != null && !tokenFilePath.trim().equals("")) { - logger.info("ObjectManager.refreshD1Node - We can't get the token from the env variable so try to get the auth token from the file " + tokenFilePath); + logger.info( + "Can NOT get the token from the env variable so try to get the auth token " + + "from the file " + tokenFilePath); try { - DataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); + dataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); } catch (IOException e) { - DataONEauthToken = null; - logger.warn("ObjectManager.refreshD1Node - can NOT get the authen token from the file " + tokenFilePath + " since " + e.getMessage()); + dataONEauthToken = null; + logger.warn("Can NOT get the auth token from the file " + tokenFilePath + + " since " + e.getMessage()); } - if (DataONEauthToken != null && !DataONEauthToken.trim().equals("")) { - logger.info("ObjectManager.refreshD1Node - Got the auth token from the file "+ tokenFilePath); + if (dataONEauthToken != null && !dataONEauthToken.isBlank()) { + logger.info("Got the auth token from the file "+ tokenFilePath); } } } else { - logger.info("ObjectManager.refreshD1Node - Got the auth token from an env. variable"); + logger.info("Got the auth token from an env. variable"); } - - if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { - logger.warn("ObjectManager.refreshD1Node ------ Could NOT get an auth token from either an env. variable or the properties file. So it will act as the public user."); + if (dataONEauthToken == null || dataONEauthToken.isBlank()) { + String message = + "Could NOT get an auth token from either an env. variable or the properties file" + + ".So it will act as the public user."; + String className = ObjectManagerFactory.getObjManagerClassNameFromEnv(); + if (className != null && className.equals("org.dataone.cn.indexer.object.legacystore" + + ".LegacyStoreObjManager")) { + logger.error(message); + } else { + logger.warn(message); + } } - session = createSession(DataONEauthToken); - logger.info("ObjectManager.refreshD1Node ------ going to create the d1node with url " + nodeBaseURL); + session = createSession(dataONEauthToken); + logger.info("Going to create the d1node with url " + nodeBaseURL); try { d1Node = getMultipartD1Node(session, nodeBaseURL); } catch (IOException | ClientSideException e) { - logger.error("ObjectManager.refreshD1Node - couldn't create the d1node for the url " + nodeBaseURL + " since " + e.getMessage()); + logger.error("Couldn't create the d1node for the url " + nodeBaseURL + " since " + + e.getMessage()); throw new ServiceFailure("0000", e.getMessage()); } } - + /** * Get a DataONE authenticated session *

@@ -238,12 +180,29 @@ private static Session createSession(String authToken) { logger.info("ObjectManager.createSession - Creating the public session"); session = new Session(); } else { - logger.info("ObjectManger.createSession - Creating authentication session from token: " + authToken.substring(0, 5) + "..."); + logger.info("Creating authentication session from token: " + authToken.substring(0, 5) + + "..."); session = new AuthTokenSession(authToken); } return session; } - + + /** + * Only for testing + * @return + */ + protected static String getDataONEauthToken() { + return dataONEauthToken; + } + + /** + * Only for testing + * @return + */ + protected static D1Node getD1Node() { + return d1Node; + } + /** * Get a DataONE MultipartCNode object, which will be used to communication with a CN * @@ -253,7 +212,8 @@ private static Session createSession(String authToken) { * @throws ClientSideException * @throws IOException */ - private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException { + private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) + throws IOException, ClientSideException { MultipartRestClient mrc = null; MultipartD1Node d1Node = null; // First create a default HTTP client @@ -262,10 +222,10 @@ private static MultipartD1Node getMultipartD1Node(Session session, String servic Boolean isCN = isCN(serviceUrl); // Now create a DataONE object that uses the rest client if (isCN) { - logger.info("ObjectManager.getMultipartD1Node - creating cn MultipartMNode from the url " + serviceUrl); + logger.info("Creating cn MultipartMNode from the url " + serviceUrl); d1Node = new MultipartCNode(mrc, serviceUrl, session); } else { - logger.info("ObjectManager.getMultipartD1Node - creating mn MultipartMNode from the url " + serviceUrl); + logger.info("Creating mn MultipartMNode from the url " + serviceUrl); d1Node = new MultipartMNode(mrc, serviceUrl, session); } return d1Node; @@ -276,7 +236,7 @@ private static MultipartD1Node getMultipartD1Node(Session session, String servic * @param nodeStr either a DataONE node serviceURL (e.g. https://knb.ecoinformatics.org/knb/d1/mn) * or a DataONE node identifier (e.g. urn:node:CN) */ - private static Boolean isCN(String nodeStr) { + protected static Boolean isCN(String nodeStr) { Boolean isCN = false; // match node urn, e.g. "https://cn.dataone.org/cn" if (nodeStr.matches("^\\s*urn:node:.*")) { @@ -300,4 +260,18 @@ private static Boolean isCN(String nodeStr) { return isCN; } + protected static SystemMetadata getSystemMetadataByAPI(String id) + throws ServiceFailure, InvalidToken, NotImplemented, NotAuthorized, NotFound { + if (d1Node != null) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + Identifier identifier = new Identifier(); + identifier.setValue(id); + return d1Node.getSystemMetadata(session, identifier); + } else { + throw new ServiceFailure("0000", "The d1Node is null and Indexer cannot get the " + + "systemmetadata by a API call."); + } + } + } diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java new file mode 100644 index 00000000..85a3af35 --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -0,0 +1,72 @@ +package org.dataone.cn.indexer.object; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.lang.reflect.InvocationTargetException; + +/** + * Class to create a concrete ObjectManager Object + * @author Tao + */ +public class ObjectManagerFactory { + // environmental variables' names + private static final String OBJECT_MANAGER_CLASS_NAME_ENV_NAME = + "DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME"; + + private static volatile ObjectManager manager = null; + private static final String DEFAULT_ClASS_NAME = "org.dataone.cn.indexer.object.hashstore" + + ".HashStoreObjManager"; + private static Log logger = LogFactory.getLog(ObjectManagerFactory.class); + + + /** + * Create a Concrete ObjectManager object by the single pattern. + * First, Indexer will check if the env variable of DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME + * is defined. If it is defined, indexer will use it; otherwise it uses the default one - + * org.dataone.cn.indexer.object.hashstore.HashStoreObjManager + * @return an ObjectManager object + * @throws ClassNotFoundException + * @throws NoSuchMethodException + * @throws InvocationTargetException + * @throws InstantiationException + * @throws IllegalAccessException + */ + public static ObjectManager getObjectManager() + throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, + InstantiationException, IllegalAccessException { + String classNameFromEnv = getObjManagerClassNameFromEnv(); + String className = DEFAULT_ClASS_NAME; + if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { + logger.debug("The ObjectManager class name form env variable " + + OBJECT_MANAGER_CLASS_NAME_ENV_NAME + " is " + classNameFromEnv); + className = classNameFromEnv; + } + if (manager == null) { + synchronized (ObjectManagerFactory.class) { + if (manager == null) { + logger.info("The final ObjectManager class name from env variable is " + + classNameFromEnv); + Class managerClass = Class.forName(className); + manager = (ObjectManager) managerClass.getDeclaredConstructor().newInstance(); + } + } + } + return manager; + } + + /** + * Get the object manager class name from the env variable. + * @return the class name. It can be null if the env variable isn't set. + */ + protected static String getObjManagerClassNameFromEnv() { + return System.getenv(OBJECT_MANAGER_CLASS_NAME_ENV_NAME); + } + + /** + * This method is for testing only + */ + protected static void resetManagerNull() { + manager = null; + } +} diff --git a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java new file mode 100644 index 00000000..ba650913 --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java @@ -0,0 +1,111 @@ +package org.dataone.cn.indexer.object.hashstore; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.exceptions.MarshallingException; +import org.dataone.indexer.storage.Storage; +import org.dataone.service.exceptions.InvalidToken; +import org.dataone.service.exceptions.NotAuthorized; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.NotImplemented; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.security.NoSuchAlgorithmException; + +/** + * Implementation of ObjectManager based on a hash store + * @author Tao + */ +public class HashStoreObjManager extends ObjectManager { + private static Storage storage = null; + private static Log logger = LogFactory.getLog(ObjectManager.class); + static { + try { + refreshD1Node(); + } catch (ServiceFailure e) { + logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage()); + } + storage = Storage.getInstance(); + } + + /** + * Constructor + */ + public HashStoreObjManager() { + + } + + @Override + public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, NoSuchAlgorithmException, IOException, + MarshallingException { + long start = System.currentTimeMillis(); + //try to get the system metadata from the storage system first + InputStream sysmetaInputStream = null; + try { + sysmetaInputStream = storage.retrieveSystemMetadata(id); + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via the file system for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + } catch (FileNotFoundException exception ) { + if (d1Node != null) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + SystemMetadata sysmeta = getSystemMetadataByAPI(id); + logger.debug("Finish getting the system metadata via the DataONE API call for the" + + " pid " + id); + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + } + } + return sysmetaInputStream; + } + + @Override + public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + throws InvalidToken, NotAuthorized, NoSuchAlgorithmException, + NotImplemented, ServiceFailure, NotFound, + InstantiationException, IllegalAccessException, + IOException, MarshallingException { + org.dataone.service.types.v1.SystemMetadata sysmeta = null; + try (InputStream input = getSystemMetadataStream(id)) { + if (input != null) { + try { + SystemMetadata sysmeta2 = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, input); + sysmeta = sysmeta2; + } catch (Exception e) { + try (InputStream input2 = getSystemMetadataStream(id)) { + if (input2 != null) { + sysmeta = TypeMarshaller.unmarshalTypeFromStream( + org.dataone.service.types.v1.SystemMetadata.class, input2); + } + } + } + } + } + return sysmeta; + } + + @Override + public InputStream getObject(String pid) + throws IllegalArgumentException, NoSuchAlgorithmException, IOException { + return storage.retrieveObject(pid); + } + +} diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java new file mode 100644 index 00000000..4201348e --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -0,0 +1,227 @@ +package org.dataone.cn.indexer.object.legacystore; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.log4j.Logger; +import org.dataone.client.v2.formats.ObjectFormatCache; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.configuration.Settings; +import org.dataone.exceptions.MarshallingException; +import org.dataone.service.exceptions.InvalidToken; +import org.dataone.service.exceptions.NotAuthorized; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.NotImplemented; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v2.ObjectFormat; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; + +/** + * The class to get objects and system metadata from Metacat legacy store + * @author Tao + */ +public class LegacyStoreObjManager extends ObjectManager { + // environmental variables' names + private static final String DATA_ROOT_DIR_ENV_NAME = "DATAONE_INDEXER_METACAT_DATA_ROOT_DIR"; + private static final String DOCUMENT_ROOT_DIR_ENV_NAME = + "DATAONE_INDEXER_METACAT_DOCUMENT_ROOT_DIR"; + + private static final String DATA_ROOT_DIR_PROPERTY_NAME = "index.data.root.directory"; + private static final String DOCUMENT_ROOT_DIR_PROPERTY_NAME = "index.document.root.directory"; + private static String dataRootDir; + private static String documentRootDir; + private static Logger logger = Logger.getLogger(LegacyStoreObjManager.class); + + private static boolean ifDataAndDocRootSame = false; + + /** + * Constructor + * Read the Metacat legacy data and document directories from the environmental variables and + * the properties file. The values in the environmental variables overwrite the properties ones. + * @throws ServiceFailure + */ + public LegacyStoreObjManager() throws ServiceFailure { + refreshD1Node(); + dataRootDir = System.getenv(DATA_ROOT_DIR_ENV_NAME); + logger.debug("The data root dir from env " + DATA_ROOT_DIR_ENV_NAME + " is " + dataRootDir); + if (dataRootDir == null || dataRootDir.isBlank()) { + dataRootDir = Settings.getConfiguration().getString(DATA_ROOT_DIR_PROPERTY_NAME); + logger.debug("The data root dir from the properties is " + dataRootDir); + } + if (dataRootDir == null || dataRootDir.isBlank()) { + throw new ServiceFailure("0000", + "The data root directory specified by the env " + "variable " + + DATA_ROOT_DIR_ENV_NAME + " or the property " + + DATA_ROOT_DIR_PROPERTY_NAME + + " in the properties file is null/blank"); + } + documentRootDir = System.getenv(DOCUMENT_ROOT_DIR_ENV_NAME); + logger.debug("The document root dir from env " + DOCUMENT_ROOT_DIR_ENV_NAME + " is " + + documentRootDir); + if (documentRootDir == null || documentRootDir.isBlank()) { + documentRootDir = + Settings.getConfiguration().getString(DOCUMENT_ROOT_DIR_PROPERTY_NAME); + logger.debug("The document root dir from the properties is " + documentRootDir); + } + if (documentRootDir == null || documentRootDir.isBlank()) { + throw new ServiceFailure("0000", + "The document root directory specified by the env variable " + + DOCUMENT_ROOT_DIR_ENV_NAME + " or the property " + + DOCUMENT_ROOT_DIR_PROPERTY_NAME + + " in the properties file is blank."); + } + if (!Files.exists(FileSystems.getDefault().getPath(dataRootDir))) { + throw new ServiceFailure("0000", "The data root directory " + dataRootDir + + " specified in the env variable or the properties file doesn't exist"); + } + if (!Files.exists(FileSystems.getDefault().getPath(documentRootDir))) { + throw new ServiceFailure("0000", "The document root directory " + documentRootDir + + " specified in the env variable or the properties file doesn't exist"); + } + if (!dataRootDir.endsWith("/")) { + dataRootDir = dataRootDir + "/"; + } + if (!documentRootDir.endsWith("/")) { + documentRootDir = documentRootDir + "/"; + } + + if (documentRootDir.equals(dataRootDir)) { + ifDataAndDocRootSame = true; + } + logger.info( + "The root document directory is " + documentRootDir + " and the root data directory is " + + dataRootDir + " Are they same?" + ifDataAndDocRootSame); + + } + + + @Override + public InputStream getObject(String pid) + throws IllegalArgumentException, IOException, NotFound { + File object = new File(documentRootDir + pid); + if (!object.exists()) { + object = new File(dataRootDir + pid); + if (object.exists()) { + return new FileInputStream(object); + } else { + throw new FileNotFoundException( + "Neither " + documentRootDir + " nor " + dataRootDir + " have the docid " + + pid); + } + } else { + return new FileInputStream(object); + } + + } + + /** + * Get the absolute file path for a given relative path. If the relativePath is null or blank, + * null will be returned + * @param relativePath + * @param objectFormat + * @return the absolute file path + * @throws NotFound + */ + private String getFilePath(String relativePath, String objectFormat) throws NotFound { + String absolutePath = null; + if (relativePath != null && !relativePath.isBlank()) { + if (ifDataAndDocRootSame) { + absolutePath = documentRootDir + relativePath; + } else if (objectFormat != null && !objectFormat.isBlank()) { + ObjectFormat format =ObjectFormatCache.getInstance().getFormat(objectFormat); + if (format.getFormatType().equals("METADATA")) { + absolutePath = documentRootDir + relativePath; + } else { + absolutePath = dataRootDir + relativePath; + } + } + } + logger.debug("The absolute file path for the relative file path " + + relativePath + " is " + absolutePath); + return absolutePath; + } + + @Override + public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound { + SystemMetadata sysmeta = null; + long start = System.currentTimeMillis(); + try { + for (int i=0; i<5; i++) { + try { + sysmeta = getSystemMetadataByAPI(id); + break; + } catch (ServiceFailure ee) { + logger.warn("The DataONE api call doesn't get the system metadata since " + + ee.getMessage() + ". This is " + i + + " try and Indexer will try again."); + try { + Thread.sleep(300); + } catch (InterruptedException ie) { + logger.info("The sleep of the thread was interrupted."); + } + } + } + logger.debug("Finish getting the system metadata via the DataONE API call for the pid " + + id); + } catch (NotAuthorized e) { + logger.info( + "Failed to get the system metadata via the DataONE API call for the pid " + + id + " since it is not authorized. We will refresh the token and try again"); + refreshD1Node(); + sysmeta = getSystemMetadataByAPI(id); + } + long end = System.currentTimeMillis(); + logger.info( + "Finish getting the system metadata via DataONE API for the pid " + id + " and it took " + + (end - start) + "milliseconds"); + + return sysmeta; + } + + @Override + public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, IOException, MarshallingException { + long start = System.currentTimeMillis(); + //try to get the system metadata from the storage system first + InputStream sysmetaInputStream = null; + SystemMetadata sysmeta = (SystemMetadata) getSystemMetadata(id); + logger.debug("Finish getting the system metadata via the DataONE API call for the" + + " pid " + id); + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + return sysmetaInputStream; + } + + /** + * For testing only + * @return the data root dir + */ + protected String getDataRootDir() { + return dataRootDir; + } + + /** + * For testing only + * @return the document root dir + */ + protected String getDocumentRootDir() { + return documentRootDir; + } + +} diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java index bbe4b59d..5b8892d7 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java @@ -6,6 +6,7 @@ import java.io.InputStream; import java.io.StringReader; import java.io.UnsupportedEncodingException; +import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -19,7 +20,7 @@ import org.apache.commons.io.input.ReaderInputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.cn.indexer.parser.utility.SeriesIdResolver; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.cn.indexer.solrhttp.SolrElementField; @@ -232,14 +233,14 @@ private boolean isHeadVersion(Identifier pid, Identifier sid) { } private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument) - throws InvalidToken, NotAuthorized, NotImplemented, - NoSuchAlgorithmException, ServiceFailure, NotFound, InstantiationException, - IllegalAccessException, IOException, MarshallingException { + throws InvalidToken, NotAuthorized, NotImplemented, NoSuchAlgorithmException, ServiceFailure, + NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException { Identifier identifier = new Identifier(); identifier.setValue(mergeDocument.getIdentifier()); try { - SystemMetadata sysMeta = (SystemMetadata) ObjectManager.getInstance() + SystemMetadata sysMeta = (SystemMetadata) ObjectManagerFactory.getObjectManager() .getSystemMetadata(identifier.getValue()); if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null && !sysMeta.getSeriesId().getValue().trim().equals("")) { diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java index a105731f..c5e60b30 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java @@ -1,11 +1,12 @@ package org.dataone.cn.indexer.resourcemap; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; import java.security.NoSuchAlgorithmException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.exceptions.MarshallingException; import org.dataone.service.exceptions.InvalidToken; @@ -27,7 +28,7 @@ public IndexVisibilityDelegateImpl() { public boolean isDocumentVisible(Identifier pid) { boolean visible = false; try { - SystemMetadata systemMetadata = ObjectManager.getInstance() + SystemMetadata systemMetadata = ObjectManagerFactory.getObjectManager() .getSystemMetadata(pid.getValue()); // TODO: Is pid Identifier a SID? if (systemMetadata == null) { @@ -36,28 +37,12 @@ public boolean isDocumentVisible(Identifier pid) { if (SolrDoc.visibleInIndex(systemMetadata)) { visible = true; } - } catch (NullPointerException npe) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + npe.getMessage()); - } catch (InvalidToken e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotAuthorized e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + e.getMessage()); - } catch (NotImplemented e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ServiceFailure e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotFound e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (InstantiationException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IllegalAccessException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IOException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (MarshallingException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NoSuchAlgorithmException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); + } catch (NullPointerException | InvalidToken | NotAuthorized | NotImplemented | + ServiceFailure | NotFound | InstantiationException | IllegalAccessException | + IOException | MarshallingException | NoSuchAlgorithmException | + ClassNotFoundException | InvocationTargetException | NoSuchMethodException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); } return visible; } @@ -65,35 +50,20 @@ public boolean isDocumentVisible(Identifier pid) { public boolean documentExists(Identifier pid) { boolean exists = false; try { - SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue()); + SystemMetadata systemMetadata = + ObjectManagerFactory.getObjectManager().getSystemMetadata(pid.getValue()); if (systemMetadata != null) { exists = true; } else { // TODO: Is pid Identifier a SID? return true; } - } catch (NullPointerException npe) { - logger.warn("Could not get visible value for pid: " + pid.getValue()); - } catch (InvalidToken e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotAuthorized e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotImplemented e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ServiceFailure e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotFound e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (InstantiationException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IllegalAccessException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IOException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (MarshallingException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NoSuchAlgorithmException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); + } catch (NullPointerException | InvalidToken | NotAuthorized | NotImplemented | + ServiceFailure | NotFound | InstantiationException | IllegalAccessException | + IOException | MarshallingException | NoSuchAlgorithmException | + ClassNotFoundException | InvocationTargetException | NoSuchMethodException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); } return exists; } diff --git a/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java b/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java index 55fb59a1..4ed63cea 100644 --- a/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java +++ b/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java @@ -125,7 +125,6 @@ public void sendUpdate(String uri, SolrElementAdd data, String encoding, String ByteArrayOutputStream baosResponse = new ByteArrayOutputStream(); org.apache.commons.io.IOUtils.copy(inputStreamResponse, baosResponse); String error = new String(baosResponse.toByteArray()); - log.error(error); post.abort(); throw new SolrServerException("unable to update solr, non 200 response code." + error); } diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java index ea28e279..0aaf2f99 100644 --- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java +++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java @@ -21,11 +21,13 @@ public class IndexQueueMessageParser { private final static String HEADER_ID = "id"; //The header name in the message to store the index type private final static String HEADER_INDEX_TYPE = "index_type"; - + //The header name in the message to store the docid of the object + private final static String HEADER_DOCID = "doc_id"; private Identifier identifier = null; private String indexType = null; private int priority = 1; - + private String docId = null; + private static Log logger = LogFactory.getLog(IndexQueueMessageParser.class); /** @@ -46,11 +48,13 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe } Object pidObj = headers.get(HEADER_ID); if (pidObj == null) { - throw new InvalidRequest("0000", "The identifier cannot be null in the index queue message."); + throw new InvalidRequest( + "0000", "The identifier cannot be null in the index queue message."); } String pid = ((LongString)pidObj).toString(); - if (pid == null || pid.trim().equals("")) { - throw new InvalidRequest("0000", "The identifier cannot be null or blank in the index queue message."); + if (pid == null || pid.isBlank()) { + throw new InvalidRequest( + "0000", "The identifier cannot be null or blank in the index queue message."); } logger.debug("IndexQueueMessageParser.parse - the identifier in the message is " + pid); identifier = new Identifier(); @@ -58,21 +62,35 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe Object typeObj = headers.get(HEADER_INDEX_TYPE); if (typeObj == null) { - throw new InvalidRequest("0000", "The index type cannot be null in the index queue message."); + throw new InvalidRequest( + "0000", "The index type cannot be null in the index queue message for " + pid); } indexType = ((LongString)typeObj).toString(); - if (indexType == null || indexType.trim().equals("")) { - throw new InvalidRequest("0000", "The index type cannot be null or blank in the index queue message."); + if (indexType == null || indexType.isBlank()) { + throw new InvalidRequest( + "0000", + "The index type cannot be null or blank in the index queue message for " + pid); + } + logger.debug("The index type in the message is " + indexType + " for " + pid); + Object docIdObject = headers.get(HEADER_DOCID); + if (docIdObject != null) { + docId = ((LongString)docIdObject).toString(); } - logger.debug("IndexQueueMessageParser.parse - the index type in the message is " + indexType); + logger.debug( + "The docId of the object which will be indexed in the message is " + docId + + " for " + pid); try { priority = properties.getPriority(); } catch (NullPointerException e) { - logger.info("IndexQueueMessageParser.parse - the priority is not set in the message and we will set it one."); - priority =1; + logger.info( + "IndexQueueMessageParser.parse - the priority is not set in the message and we " + + "will set it to 1."); + priority = 1; } - logger.debug("IndexQueueMessageParser.parse - the priority in the message is " + priority); + logger.debug( + "IndexQueueMessageParser.parse - the priority in the message is " + priority + " for " + + pid); } /** @@ -99,4 +117,14 @@ public int getPriority() { return priority; } + /** + * Get the docId of the object, which will be indexed, + * after calling the parse method to parse the index queue message. + * @return the docId of the object. DocId is an iternal id of Metacat, which is a file name in + * the system. It can be null or blank, which means we don't have the object in the system. + */ + public String getDocId() { + return docId; + } + } diff --git a/src/main/resources/org/dataone/configuration/index-processor.properties b/src/main/resources/org/dataone/configuration/index-processor.properties index 88aec825..de37d2be 100644 --- a/src/main/resources/org/dataone/configuration/index-processor.properties +++ b/src/main/resources/org/dataone/configuration/index-processor.properties @@ -28,7 +28,7 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=1000 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened index.solr.versionConflict.max.attempts=25000 diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index 80cdeb0f..8a181c2f 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -94,7 +94,8 @@ protected void indexObjectToSolr(String identifier, Resource objectFile) throws } Identifier pid = new Identifier(); pid.setValue(identifier); - solrIndexService.update(pid, isSysmetaChangeOnly); + //null is the value for docId + solrIndexService.update(pid, isSysmetaChangeOnly, null); } /** diff --git a/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java b/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java index 9a92a6d7..3f964a24 100644 --- a/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java +++ b/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java @@ -36,8 +36,7 @@ public class IndexWorkerIT { * @throws InterruptedException */ @Test - public void testRestoreRabbitMQConnectionAndChannel() - throws ServiceFailure, IOException, TimeoutException, InterruptedException { + public void testRestoreRabbitMQConnectionAndChannel() throws Exception { IndexWorker worker = new IndexWorker(); worker.start(); Connection connection = worker.getRabbitMQconnection(); diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java new file mode 100644 index 00000000..24db829f --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java @@ -0,0 +1,64 @@ +package org.dataone.cn.indexer.object; + +import org.dataone.cn.indexer.object.hashstore.HashStoreObjManager; +import org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager; +import org.junit.Rule; +import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * A Junit test class for ObjectManagerFactory + * @author Tao + */ +public class ObjectManagerFactoryTest { + + private static final String envName = "DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME"; + @Rule + public EnvironmentVariablesRule environmentVariablesClassName = + new EnvironmentVariablesRule(envName, null); + /** + * Test to create a HashStoreObjManager instance + * @throws Exception + */ + @Test + public void testHashStoreObjManager() throws Exception { + environmentVariablesClassName.set(envName, null); + ObjectManagerFactory.resetManagerNull(); + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof HashStoreObjManager); + } + + /** + * Test to create a LegacyStroeObjectManager instance + * @throws Exception + */ + @Test + public void testLegacyStoreObjManager() throws Exception { + environmentVariablesClassName.set( + envName, "org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager"); + ObjectManagerFactory.resetManagerNull(); + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof LegacyStoreObjManager); + environmentVariablesClassName.set(envName, null); + } + + /** + * Test the failure with a wrong class name + * @throws Exception + */ + @Test + public void testWrongClassName() throws Exception { + environmentVariablesClassName.set(envName, "foo.foo1.className"); + ObjectManagerFactory.resetManagerNull(); + try { + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + fail("Test shouldn't get here since the class doesn't exist with the given name."); + } catch (Exception e) { + assertTrue( e instanceof ClassNotFoundException); + } + environmentVariablesClassName.set(envName, null); + } +} diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 120286b9..615ae2bc 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -1,88 +1,77 @@ package org.dataone.cn.indexer.object; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; - -import java.io.InputStream; -import java.io.OutputStream; -import java.security.MessageDigest; - -import javax.xml.bind.DatatypeConverter; - - -import org.dataone.indexer.storage.Storage; -import org.dataone.service.types.v1.Identifier; +import org.dataone.configuration.Settings; import org.dataone.service.types.v2.SystemMetadata; -import org.dataone.service.util.TypeMarshaller; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; /** - * A junit test class for the ObjecManager class. + * A junit test class for the ObjectManager class. * @author tao * */ public class ObjectManagerTest { + public static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; + private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; - private String identifier; + @Rule + public EnvironmentVariablesRule environmentVariables = + new EnvironmentVariablesRule(TOKEN_ENV_NAME, null); @Before public void setUp() throws Exception { - identifier = "ObjectManagerTest-" + System.currentTimeMillis(); - File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" - + "fgdc/nasa_d_FEDGPS1293.xml"); - try (InputStream object = new FileInputStream(objectFile)) { - Storage.getInstance().storeObject(object, identifier); - } - File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" - + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); - try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) { - SystemMetadata sysmeta = TypeMarshaller - .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream); - Identifier pid = new Identifier(); - pid.setValue(identifier); - sysmeta.setIdentifier(pid); - try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { - TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); - try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { - Storage.getInstance().storeMetadata(input, identifier); - } - } - } + String propertyFilePath = + "./src/main/resources/org/dataone/configuration/index-processor.properties"; + Settings.augmentConfiguration(propertyFilePath); } - /** - * Test the getObject and getSystemMetadata method + * Test the isCN method * @throws Exception */ @Test - public void testGetObjectAndSystemMetadata() throws Exception { - try (InputStream input = ObjectManager.getInstance().getObject(identifier)) { - assertNotNull(input); - try (OutputStream os = new ByteArrayOutputStream()) { - MessageDigest md5 = MessageDigest.getInstance("MD5"); - // Calculate hex digests - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = input.read(buffer)) != -1) { - os.write(buffer, 0, bytesRead); - md5.update(buffer, 0, bytesRead); - } - String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); - assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); - } - } - org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManager.getInstance() - .getSystemMetadata(identifier); - assertEquals(identifier, sysmeta.getIdentifier().getValue()); - assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); - assertEquals(14828, sysmeta.getSize().intValue()); + public void testIsCN() throws Exception { + String url = "https://knb.ecoinformatics.org/knb/d1/mn"; + assertFalse(ObjectManager.isCN(url)); + url = "https://cn-orc-1.dataone.org/cn"; + assertTrue(ObjectManager.isCN(url)); } + /** + * Test the refreshD1Node method based the settings from properties + */ + @Test + public void testRefreshD1NodeFromProperties() throws Exception { + ObjectManager.refreshD1Node(); + assertEquals("https://valley.duckdns.org/metacat/d1/mn/v2", + ObjectManager.getD1Node().getNodeBaseServiceUrl()); + } + + /** + * Test the getSystemMetadataByAPI method base the env values. + * @throws Exception + */ + @Test + public void testGetSystemMetadataByAPI() throws Exception { + String url = "https://knb.ecoinformatics.org/knb/d1/mn"; + String token = "fake_token"; + environmentVariables.set(NODE_BASE_URL_ENV_NAME, url); + environmentVariables.set(TOKEN_ENV_NAME, token); + ObjectManager.refreshD1Node(); + assertEquals(url + "/v2", ObjectManager.getD1Node().getNodeBaseServiceUrl()); + assertEquals(token, ObjectManager.getDataONEauthToken()); + String id = "doi:10.5063/F1N0150S"; + SystemMetadata sys = ObjectManager.getSystemMetadataByAPI(id); + assertNotNull(sys); + assertEquals(id, sys.getIdentifier().getValue()); + environmentVariables.set(NODE_BASE_URL_ENV_NAME, null); + environmentVariables.set(TOKEN_ENV_NAME, null); + } } diff --git a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java new file mode 100644 index 00000000..3779f95c --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java @@ -0,0 +1,118 @@ +package org.dataone.cn.indexer.object.hashstore; + +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; +import org.dataone.indexer.storage.Storage; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v1.Identifier; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; +import org.junit.Before; +import org.junit.Test; + +import javax.xml.bind.DatatypeConverter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.MessageDigest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * The test class for HashStoreObjManager + * @author Tao + */ +public class HashStoreObjManagerTest { + private String identifier; + + @Before + public void setUp() throws Exception { + identifier = "ObjectManagerTest-" + System.currentTimeMillis(); + File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" + + "fgdc/nasa_d_FEDGPS1293.xml"); + try (InputStream object = new FileInputStream(objectFile)) { + Storage.getInstance().storeObject(object, identifier); + } + File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" + + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); + try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) { + SystemMetadata sysmeta = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream); + Identifier pid = new Identifier(); + pid.setValue(identifier); + sysmeta.setIdentifier(pid); + try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { + TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); + try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { + Storage.getInstance().storeMetadata(input, identifier); + } + } + } + } + + /** + * Test the getObject and getSystemMetadata method + * @throws Exception + */ + @Test + public void testGetObjectAndSystemMetadata() throws Exception { + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof HashStoreObjManager); + try (InputStream input = manager.getObject(identifier)) { + assertNotNull(input); + try (OutputStream os = new ByteArrayOutputStream()) { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + // Calculate hex digests + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + os.write(buffer, 0, bytesRead); + md5.update(buffer, 0, bytesRead); + } + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); + } + } + org.dataone.service.types.v1.SystemMetadata sysmeta = manager + .getSystemMetadata(identifier); + assertEquals(identifier, sysmeta.getIdentifier().getValue()); + assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); + assertEquals(14828, sysmeta.getSize().intValue()); + } + + /** + * Test the failed scenarios for the methods of getSystemMetadata and getObject. + * @throws Exception + */ + @Test + public void testFailuresInGetSystemMetadataAndGetObject() throws Exception { + String id = "foo-fake-id-123"; + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + try { + InputStream stream = manager.getSystemMetadataStream(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof ServiceFailure || e instanceof NotFound); + } + try { + manager.getSystemMetadata(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof ServiceFailure || e instanceof NotFound); + } + try { + manager.getObject(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + } +} diff --git a/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java new file mode 100644 index 00000000..5a67d503 --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java @@ -0,0 +1,127 @@ +package org.dataone.cn.indexer.object.legacystore; + +import org.apache.commons.io.IOUtils; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerTest; +import org.dataone.configuration.Settings; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v1.SystemMetadata; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; + +import java.io.FileNotFoundException; +import java.io.InputStream; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * A junit test class for LegacyStoreObjManager + * @author Tao + */ +public class LegacyStoreObjManagerTest { + private static final String DATA_ROOT_DIR_ENV_NAME = "DATAONE_INDEXER_METACAT_DATA_ROOT_DIR"; + private static final String DOCUMENT_ROOT_DIR_ENV_NAME = + "DATAONE_INDEXER_METACAT_DOCUMENT_ROOT_DIR"; + + private static final String DATA_ROOT_DIR_PROPERTY_NAME = "index.data.root.directory"; + private static final String DOCUMENT_ROOT_DIR_PROPERTY_NAME = "index.document.root.directory"; + + @Rule + public EnvironmentVariablesRule environmentVariables = + new EnvironmentVariablesRule(DATA_ROOT_DIR_ENV_NAME, null); + + @Before + public void setUp() throws Exception { + String propertyFilePath = + "./src/main/resources/org/dataone/configuration/index-processor.properties"; + Settings.augmentConfiguration(propertyFilePath); + } + + /** + * Test the constructor from the properties + */ + @Test + public void testConstructorFromProperties() throws Exception { + String dataDir = "/var/metacat/data/"; + String documentDir = "/var/metacat/documents/"; + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + Settings.getConfiguration().setProperty(DATA_ROOT_DIR_PROPERTY_NAME, null); + try { + manager = new LegacyStoreObjManager(); + fail("Test shouldn't get here since the previous statement should throw an exception"); + } catch (Exception e) { + assertTrue( e instanceof ServiceFailure); + } + Settings.getConfiguration().setProperty(DATA_ROOT_DIR_PROPERTY_NAME, dataDir); + manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + Settings.getConfiguration().setProperty(DOCUMENT_ROOT_DIR_PROPERTY_NAME, null); + try { + manager = new LegacyStoreObjManager(); + fail("Test shouldn't get here since the previous statement should throw an exception"); + } catch (Exception e) { + assertTrue( e instanceof ServiceFailure); + } + } + + /** + * Test the getObject method + * @throws Exception + */ + @Test + public void testGetObject() throws Exception { + String dataDir = "src/test/resources/org/dataone/configuration/"; + String documentDir = "src/test/resources/"; + environmentVariables.set(DATA_ROOT_DIR_ENV_NAME, dataDir); + environmentVariables.set(DOCUMENT_ROOT_DIR_ENV_NAME, documentDir); + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + InputStream inputData = manager.getObject("config.xml"); + assertNotNull(inputData); + InputStream inputDocument = manager.getObject("commons-logging.properties"); + assertNotNull(inputDocument); + try { + InputStream input = manager.getObject("foo"); + fail("Test shouldn't get here since the foo file doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + } + + /** + * Test the getSystemMetacat and getSystemMetadataStream methods + * @throws Exception + */ + @Test + public void testGetSystemMetadata() throws Exception { + String id = "doi:10.18739/A21J9795R"; + String url = "https://cn.dataone.org/cn"; + environmentVariables.set(ObjectManagerTest.NODE_BASE_URL_ENV_NAME, url); + ObjectManager.refreshD1Node(); + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + SystemMetadata systemMetadata = manager.getSystemMetadata(id); + assertEquals(id, systemMetadata.getIdentifier().getValue()); + InputStream inputStream = manager.getSystemMetadataStream(id); + String sysStr = IOUtils.toString(inputStream, "UTF-8"); + assertTrue(sysStr.contains("checksum")); + assertTrue(sysStr.contains("rightsHolder")); + assertTrue(sysStr.contains("authoritativeMemberNode")); + try { + systemMetadata = manager.getSystemMetadata("fake-id-foo231"); + fail("Test should get here since the object doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof NotFound); + } + + } +} diff --git a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java index 201a55f7..2776bbe3 100644 --- a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java +++ b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java @@ -1,7 +1,7 @@ package org.dataone.indexer.queue; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; import java.util.HashMap; @@ -24,6 +24,7 @@ public class IndexQueueMessageParserTest { private final static String HEADER_ID = "id"; //The header name in the message to store the index type private final static String HEADER_INDEX_TYPE = "index_type"; + private final static String HEADER_DOCID = "doc_id"; /** * Test the invalid messages @@ -34,7 +35,8 @@ public void testInvalidRequest() throws Exception { LongString id = null; LongString index_type = LongStringHelper.asLongString("create"); int priority = 1; - AMQP.BasicProperties properties = generateProperties(id, index_type, priority); + LongString docId = LongStringHelper.asLongString("foo.1.1"); + AMQP.BasicProperties properties = generateProperties(id, index_type, priority, null); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); try { @@ -47,7 +49,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString(" "); index_type = LongStringHelper.asLongString("create"); priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, docId); try { parser.parse(properties, body); fail("Since the idenitifer is null, we shouldn't get here"); @@ -58,7 +60,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString("foo"); index_type = null; priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, docId); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); @@ -69,7 +71,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString("foo"); index_type = LongStringHelper.asLongString(""); priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, null); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); @@ -86,66 +88,107 @@ public void testInvalidRequest() throws Exception { public void testParse() throws Exception { String id = "doi:10.5063/F1HX1B4Q"; String indexType = "create"; + String docId = "foo.1.1"; int priority = 1; LongString longId = LongStringHelper.asLongString(id); LongString longIndexType = LongStringHelper.asLongString(indexType); - AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority); + LongString longDocId = LongStringHelper.asLongString(docId); + AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority, + longDocId); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "urn:uuid:45298965-f867-440c-841f-91d3abd729b7"; indexType = "delete"; priority = 2; + docId = "foo.2.1"; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); + + id = "urn:uuid:45298965-f867-440c-841f-000000"; + indexType = "create"; + priority = 1; + longId = LongStringHelper.asLongString(id); + longIndexType = LongStringHelper.asLongString(indexType); + properties = generateProperties(longId, longIndexType, priority, null); + parser = new IndexQueueMessageParser(); + parser.parse(properties, body); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + assertNull(parser.getDocId()); + + id = "urn:uuid:45298965-f867-440c-841f-000000"; + indexType = "create"; + priority = 1; + docId = ""; + longId = LongStringHelper.asLongString(id); + longIndexType = LongStringHelper.asLongString(indexType); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); + parser = new IndexQueueMessageParser(); + parser.parse(properties, body); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "test-foo"; indexType = "sysmeta"; priority = 10; + docId = "foo.3.1"; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "test-foo2"; indexType = "sysmeta2"; priority = 10; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + properties = generateProperties(longId, longIndexType, priority, null); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertNull(parser.getDocId()); } - + /** * Generate the BasicProperties for the given values * @param id - * @param index_type + * @param indexType * @param priority - * @param filePath + * @param docId * @return */ - private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority) { + private AMQP.BasicProperties generateProperties(LongString id, LongString indexType, + int priority, LongString docId) { Map headers = new HashMap(); headers.put(HEADER_ID, id); - headers.put(HEADER_INDEX_TYPE, index_type); + headers.put(HEADER_INDEX_TYPE, indexType); + headers.put(HEADER_DOCID, docId); AMQP.BasicProperties basicProperties = new AMQP.BasicProperties.Builder() .contentType("text/plain") .deliveryMode(2) // set this message to persistent diff --git a/src/test/python/index_task_sender.py b/src/test/python/index_task_sender.py new file mode 100644 index 00000000..d5423f08 --- /dev/null +++ b/src/test/python/index_task_sender.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import pika +import sys +arguments = sys.argv[1:] +if len(arguments) == 2: + print("Arguments:", arguments) + id = arguments[0] + doc_id = arguments[1] +else: + print("Usage: python3 index_task_sender.py pid docId") + sys.exit() + +connection = pika.BlockingConnection( + pika.ConnectionParameters(host='localhost')) +channel = connection.channel() + +channel.queue_declare(queue='index', durable=True, arguments={'x-max-priority': 10}) +channel.queue_bind(exchange='dataone-index', + queue='index', + routing_key='index') +headers={'index_type': 'create', 'id': id, 'doc_id': doc_id} +properties = pika.BasicProperties(headers=headers) +message = '' +channel.basic_publish( + exchange='dataone-index', + routing_key='index', + body=message, + properties=properties + ) +print(f" [x] Sent {message}") +connection.close() \ No newline at end of file diff --git a/src/test/resources/org/dataone/configuration/index-processor-2.properties b/src/test/resources/org/dataone/configuration/index-processor-2.properties index 320eaa54..913432fc 100644 --- a/src/test/resources/org/dataone/configuration/index-processor-2.properties +++ b/src/test/resources/org/dataone/configuration/index-processor-2.properties @@ -19,9 +19,9 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=500 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.max.attempts=25 +index.solr.versionConflict.max.attempts=25000 #You may specify the exact number of threads the indexer will use. #If you keep it blank, Metacat will use the default one - the system processors number minus one. If calculation result is 0, 1 will be used as the default value. #If the one you specify exceeds the default number or is less than 1, the default one will be used as well. diff --git a/src/test/resources/org/dataone/configuration/index-processor.properties b/src/test/resources/org/dataone/configuration/index-processor.properties index 256dd8d8..734a045a 100644 --- a/src/test/resources/org/dataone/configuration/index-processor.properties +++ b/src/test/resources/org/dataone/configuration/index-processor.properties @@ -19,9 +19,9 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=500 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.max.attempts=25 +index.solr.versionConflict.max.attempts=25000 #You may specify the exact number of threads the indexer will use. #If you keep it blank, Metacat will use the default one - the system processors number minus one. If calculation result is 0, 1 will be used as the default value. #If the one you specify exceeds the default number or is less than 1, the default one will be used as well.