From 1cfa50d58d266b7a7f2e18288b5ecff30bd32baa Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 9 May 2025 16:09:25 -0700 Subject: [PATCH 01/36] Add a new field - objectPath and format the class. --- .../queue/IndexQueueMessageParser.java | 50 +++++++++++++++---- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java index ea28e279..9099212c 100644 --- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java +++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java @@ -21,11 +21,13 @@ public class IndexQueueMessageParser { private final static String HEADER_ID = "id"; //The header name in the message to store the index type private final static String HEADER_INDEX_TYPE = "index_type"; - + //The header name in the message to store the path of the object + private final static String HEADER_PATH = "path"; private Identifier identifier = null; private String indexType = null; private int priority = 1; - + private String objectPath; + private static Log logger = LogFactory.getLog(IndexQueueMessageParser.class); /** @@ -46,11 +48,13 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe } Object pidObj = headers.get(HEADER_ID); if (pidObj == null) { - throw new InvalidRequest("0000", "The identifier cannot be null in the index queue message."); + throw new InvalidRequest( + "0000", "The identifier cannot be null in the index queue message."); } String pid = ((LongString)pidObj).toString(); - if (pid == null || pid.trim().equals("")) { - throw new InvalidRequest("0000", "The identifier cannot be null or blank in the index queue message."); + if (pid == null || pid.isBlank()) { + throw new InvalidRequest( + "0000", "The identifier cannot be null or blank in the index queue message."); } logger.debug("IndexQueueMessageParser.parse - the identifier in the message is " + pid); identifier = new Identifier(); @@ -58,21 +62,35 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe Object typeObj = headers.get(HEADER_INDEX_TYPE); if (typeObj == null) { - throw new InvalidRequest("0000", "The index type cannot be null in the index queue message."); + throw new InvalidRequest( + "0000", "The index type cannot be null in the index queue message for " + pid); } indexType = ((LongString)typeObj).toString(); - if (indexType == null || indexType.trim().equals("")) { - throw new InvalidRequest("0000", "The index type cannot be null or blank in the index queue message."); + if (indexType == null || indexType.isBlank()) { + throw new InvalidRequest( + "0000", + "The index type cannot be null or blank in the index queue message for " + pid); + } + logger.debug("The index type in the message is " + indexType + " for " + pid); + Object pathObject = headers.get(HEADER_PATH); + if (pathObject != null) { + objectPath = ((LongString)pathObject).toString(); } - logger.debug("IndexQueueMessageParser.parse - the index type in the message is " + indexType); + logger.debug( + "The file path of the object which will be indexed in the message is " + objectPath + + " for " + pid); try { priority = properties.getPriority(); } catch (NullPointerException e) { - logger.info("IndexQueueMessageParser.parse - the priority is not set in the message and we will set it one."); + logger.info( + "IndexQueueMessageParser.parse - the priority is not set in the message and we " + + "will set it one."); priority =1; } - logger.debug("IndexQueueMessageParser.parse - the priority in the message is " + priority); + logger.debug( + "IndexQueueMessageParser.parse - the priority in the message is " + priority + " for " + + pid); } /** @@ -99,4 +117,14 @@ public int getPriority() { return priority; } + /** + * Get the file path of the object, which will be indexed, + * after calling the parse method to parse the index queue message. + * @return the file path of the object. It can be null or blank, which + * means we don't have the object in the system. + */ + public String getObjectPath() { + return objectPath; + } + } From b5e16257732acd3b8f9a585d3158a3f4d60cd5ae Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 13 May 2025 11:06:00 -0700 Subject: [PATCH 02/36] Parse docid rather than file path. --- .../queue/IndexQueueMessageParser.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java index 9099212c..7ef6c171 100644 --- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java +++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java @@ -21,12 +21,12 @@ public class IndexQueueMessageParser { private final static String HEADER_ID = "id"; //The header name in the message to store the index type private final static String HEADER_INDEX_TYPE = "index_type"; - //The header name in the message to store the path of the object - private final static String HEADER_PATH = "path"; + //The header name in the message to store the docid of the object + private final static String HEADER_DOCID = "doc_id"; private Identifier identifier = null; private String indexType = null; private int priority = 1; - private String objectPath; + private String docId = null; private static Log logger = LogFactory.getLog(IndexQueueMessageParser.class); @@ -72,12 +72,12 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe "The index type cannot be null or blank in the index queue message for " + pid); } logger.debug("The index type in the message is " + indexType + " for " + pid); - Object pathObject = headers.get(HEADER_PATH); - if (pathObject != null) { - objectPath = ((LongString)pathObject).toString(); + Object docIdObject = headers.get(HEADER_DOCID); + if (docIdObject != null) { + docId = ((LongString)docIdObject).toString(); } logger.debug( - "The file path of the object which will be indexed in the message is " + objectPath + + "The docId of the object which will be indexed in the message is " + docId + " for " + pid); try { @@ -118,13 +118,13 @@ public int getPriority() { } /** - * Get the file path of the object, which will be indexed, + * Get the docId of the object, which will be indexed, * after calling the parse method to parse the index queue message. - * @return the file path of the object. It can be null or blank, which - * means we don't have the object in the system. + * @return the docId of the object. DocId is an iternal id of Metacat, which is a file name in + * the system. It can be null or blank, which means we don't have the object in the system. */ - public String getObjectPath() { - return objectPath; + public String getDocId() { + return docId; } } From aab8f068c3a6b17c170dfd83fbec5f27dbbe7bc4 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 13 May 2025 11:33:04 -0700 Subject: [PATCH 03/36] Added the tests for the header doc_id. --- .../queue/IndexQueueMessageParserTest.java | 71 +++++++++++++++---- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java index 201a55f7..2776bbe3 100644 --- a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java +++ b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java @@ -1,7 +1,7 @@ package org.dataone.indexer.queue; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; import java.util.HashMap; @@ -24,6 +24,7 @@ public class IndexQueueMessageParserTest { private final static String HEADER_ID = "id"; //The header name in the message to store the index type private final static String HEADER_INDEX_TYPE = "index_type"; + private final static String HEADER_DOCID = "doc_id"; /** * Test the invalid messages @@ -34,7 +35,8 @@ public void testInvalidRequest() throws Exception { LongString id = null; LongString index_type = LongStringHelper.asLongString("create"); int priority = 1; - AMQP.BasicProperties properties = generateProperties(id, index_type, priority); + LongString docId = LongStringHelper.asLongString("foo.1.1"); + AMQP.BasicProperties properties = generateProperties(id, index_type, priority, null); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); try { @@ -47,7 +49,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString(" "); index_type = LongStringHelper.asLongString("create"); priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, docId); try { parser.parse(properties, body); fail("Since the idenitifer is null, we shouldn't get here"); @@ -58,7 +60,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString("foo"); index_type = null; priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, docId); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); @@ -69,7 +71,7 @@ public void testInvalidRequest() throws Exception { id = LongStringHelper.asLongString("foo"); index_type = LongStringHelper.asLongString(""); priority = 1; - properties = generateProperties(id, index_type, priority); + properties = generateProperties(id, index_type, priority, null); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); @@ -86,66 +88,107 @@ public void testInvalidRequest() throws Exception { public void testParse() throws Exception { String id = "doi:10.5063/F1HX1B4Q"; String indexType = "create"; + String docId = "foo.1.1"; int priority = 1; LongString longId = LongStringHelper.asLongString(id); LongString longIndexType = LongStringHelper.asLongString(indexType); - AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority); + LongString longDocId = LongStringHelper.asLongString(docId); + AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority, + longDocId); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "urn:uuid:45298965-f867-440c-841f-91d3abd729b7"; indexType = "delete"; priority = 2; + docId = "foo.2.1"; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); + + id = "urn:uuid:45298965-f867-440c-841f-000000"; + indexType = "create"; + priority = 1; + longId = LongStringHelper.asLongString(id); + longIndexType = LongStringHelper.asLongString(indexType); + properties = generateProperties(longId, longIndexType, priority, null); + parser = new IndexQueueMessageParser(); + parser.parse(properties, body); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + assertNull(parser.getDocId()); + + id = "urn:uuid:45298965-f867-440c-841f-000000"; + indexType = "create"; + priority = 1; + docId = ""; + longId = LongStringHelper.asLongString(id); + longIndexType = LongStringHelper.asLongString(indexType); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); + parser = new IndexQueueMessageParser(); + parser.parse(properties, body); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "test-foo"; indexType = "sysmeta"; priority = 10; + docId = "foo.3.1"; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + longDocId = LongStringHelper.asLongString(docId); + properties = generateProperties(longId, longIndexType, priority, longDocId); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertEquals(docId, parser.getDocId()); id = "test-foo2"; indexType = "sysmeta2"; priority = 10; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - properties = generateProperties(longId, longIndexType, priority); + properties = generateProperties(longId, longIndexType, priority, null); parser = new IndexQueueMessageParser(); parser.parse(properties, body); assertEquals(id, parser.getIdentifier().getValue()); assertEquals(indexType, parser.getIndexType()); assertEquals(priority, parser.getPriority()); + assertNull(parser.getDocId()); } - + /** * Generate the BasicProperties for the given values * @param id - * @param index_type + * @param indexType * @param priority - * @param filePath + * @param docId * @return */ - private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority) { + private AMQP.BasicProperties generateProperties(LongString id, LongString indexType, + int priority, LongString docId) { Map headers = new HashMap(); headers.put(HEADER_ID, id); - headers.put(HEADER_INDEX_TYPE, index_type); + headers.put(HEADER_INDEX_TYPE, indexType); + headers.put(HEADER_DOCID, docId); AMQP.BasicProperties basicProperties = new AMQP.BasicProperties.Builder() .contentType("text/plain") .deliveryMode(2) // set this message to persistent From 98f696fda03861261b38c9665230957f873a0819 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 13 May 2025 15:47:21 -0700 Subject: [PATCH 04/36] Splitted long statements. --- .../cn/indexer/object/ObjectManager.java | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 4a0f1b7b..dca318d1 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -193,37 +193,43 @@ private static void refreshD1Node() throws ServiceFailure { //get the token DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { - //can't get the token from the env variable. So try to get it from a file specified in the property + //can't get the token from the env variable. So try to get it from a file specified + // in the property String tokenFilePath = Settings.getConfiguration().getString(TOKEN_FILE_PATH_PROP_NAME); if (tokenFilePath != null && !tokenFilePath.trim().equals("")) { - logger.info("ObjectManager.refreshD1Node - We can't get the token from the env variable so try to get the auth token from the file " + tokenFilePath); + logger.info( + "Can NOT get the token from the env variable so try to get the auth token " + + "from the file " + tokenFilePath); try { DataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); } catch (IOException e) { DataONEauthToken = null; - logger.warn("ObjectManager.refreshD1Node - can NOT get the authen token from the file " + tokenFilePath + " since " + e.getMessage()); + logger.warn("Can NOT get the authen token from the file " + tokenFilePath + + " since " + e.getMessage()); } if (DataONEauthToken != null && !DataONEauthToken.trim().equals("")) { - logger.info("ObjectManager.refreshD1Node - Got the auth token from the file "+ tokenFilePath); + logger.info("Got the auth token from the file "+ tokenFilePath); } } } else { - logger.info("ObjectManager.refreshD1Node - Got the auth token from an env. variable"); + logger.info("Got the auth token from an env. variable"); } - if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { - logger.warn("ObjectManager.refreshD1Node ------ Could NOT get an auth token from either an env. variable or the properties file. So it will act as the public user."); + logger.warn( + "Could NOT get an auth token from either an env. variable or the properties file." + + " So it will act as the public user."); } session = createSession(DataONEauthToken); - logger.info("ObjectManager.refreshD1Node ------ going to create the d1node with url " + nodeBaseURL); + logger.info("Going to create the d1node with url " + nodeBaseURL); try { d1Node = getMultipartD1Node(session, nodeBaseURL); } catch (IOException | ClientSideException e) { - logger.error("ObjectManager.refreshD1Node - couldn't create the d1node for the url " + nodeBaseURL + " since " + e.getMessage()); + logger.error("Couldn't create the d1node for the url " + nodeBaseURL + " since " + + e.getMessage()); throw new ServiceFailure("0000", e.getMessage()); } } - + /** * Get a DataONE authenticated session *

@@ -238,7 +244,8 @@ private static Session createSession(String authToken) { logger.info("ObjectManager.createSession - Creating the public session"); session = new Session(); } else { - logger.info("ObjectManger.createSession - Creating authentication session from token: " + authToken.substring(0, 5) + "..."); + logger.info("Creating authentication session from token: " + authToken.substring(0, 5) + + "..."); session = new AuthTokenSession(authToken); } return session; @@ -253,7 +260,8 @@ private static Session createSession(String authToken) { * @throws ClientSideException * @throws IOException */ - private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException { + private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) + throws IOException, ClientSideException { MultipartRestClient mrc = null; MultipartD1Node d1Node = null; // First create a default HTTP client @@ -262,10 +270,10 @@ private static MultipartD1Node getMultipartD1Node(Session session, String servic Boolean isCN = isCN(serviceUrl); // Now create a DataONE object that uses the rest client if (isCN) { - logger.info("ObjectManager.getMultipartD1Node - creating cn MultipartMNode from the url " + serviceUrl); + logger.info("Creating cn MultipartMNode from the url " + serviceUrl); d1Node = new MultipartCNode(mrc, serviceUrl, session); } else { - logger.info("ObjectManager.getMultipartD1Node - creating mn MultipartMNode from the url " + serviceUrl); + logger.info("Creating mn MultipartMNode from the url " + serviceUrl); d1Node = new MultipartMNode(mrc, serviceUrl, session); } return d1Node; From e71c01dfdfcd1294c690fc34c32c9e78c5713dfb Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 10:16:02 -0700 Subject: [PATCH 05/36] Refactoried the code to make the DataManager class support both hashstore and the metacat legacy store. --- .../org/dataone/cn/indexer/IndexWorker.java | 15 ++-- .../org/dataone/cn/indexer/SolrIndex.java | 8 +- .../cn/indexer/object/ObjectManager.java | 85 +++--------------- .../indexer/object/ObjectManagerFactory.java | 52 +++++++++++ .../object/hashstore/HashStoreObjManager.java | 89 +++++++++++++++++++ .../resourcemap/ForesiteResourceMap.java | 11 +-- .../IndexVisibilityDelegateImpl.java | 26 +++++- .../org/dataone/cn/indexer/IndexWorkerIT.java | 3 +- .../cn/indexer/object/ObjectManagerTest.java | 4 +- 9 files changed, 199 insertions(+), 94 deletions(-) create mode 100644 src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java create mode 100644 src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 4c4b8d8f..7e170c1f 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -2,6 +2,7 @@ import java.io.File; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -18,14 +19,13 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; -import com.rabbitmq.client.ShutdownSignalException; import org.apache.commons.codec.EncoderException; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; import org.dataone.cn.indexer.annotation.OntologyModelService; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; import org.dataone.indexer.queue.IndexQueueMessageParser; @@ -214,7 +214,10 @@ public static void loadAdditionalPropertyFile(String propertyFile) { * @throws TimeoutException * @throws ServiceFailure */ - public IndexWorker() throws IOException, TimeoutException, ServiceFailure { + public IndexWorker() + throws IOException, TimeoutException, ServiceFailure, ClassNotFoundException, + InvocationTargetException, NoSuchMethodException, InstantiationException, + IllegalAccessException { this(true); } @@ -226,7 +229,9 @@ public IndexWorker() throws IOException, TimeoutException, ServiceFailure { * @throws TimeoutException * @throws ServiceFailure */ - public IndexWorker(Boolean initialize) throws IOException, TimeoutException { + public IndexWorker(Boolean initialize) + throws IOException, TimeoutException, ClassNotFoundException, InvocationTargetException, + NoSuchMethodException, InstantiationException, IllegalAccessException { String value = System.getenv("KUBERNETES_SERVICE_HOST"); // Java doc says: the string value of the variable, or null if the variable is not defined // in the system environment @@ -238,7 +243,7 @@ public IndexWorker(Boolean initialize) throws IOException, TimeoutException { initExecutorService();//initialize the executor first initIndexQueue(); initIndexParsers(); - ObjectManager.getInstance(); + ObjectManagerFactory.getInstance(); OntologyModelService.getInstance(); } } diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 2848d237..c05dc2fa 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -21,7 +21,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor; import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; import org.dataone.cn.indexer.parser.IDocumentSubprocessor; @@ -151,7 +151,8 @@ private Map process(String id, boolean isSysmetaChangeOnly) long start = System.currentTimeMillis(); Map docs = new HashMap<>(); // Load the System Metadata document - try (InputStream systemMetadataStream = ObjectManager.getInstance().getSystemMetadataStream(id)){ + try (InputStream systemMetadataStream = + ObjectManagerFactory.getInstance().getSystemMetadataStream(id)){ docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { log.error(e.getMessage(), e); @@ -188,7 +189,8 @@ private Map process(String id, boolean isSysmetaChangeOnly) if (subprocessor.canProcess(formatId)) { // if so, then extract the additional information from the // document. - try (InputStream dataStream = ObjectManager.getInstance().getObject(id)) { + try (InputStream dataStream = + ObjectManagerFactory.getInstance().getObject(id)) { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index dca318d1..38743276 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -1,6 +1,5 @@ package org.dataone.cn.indexer.object; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -8,7 +7,6 @@ import java.security.NoSuchAlgorithmException; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.client.auth.AuthTokenSession; @@ -20,13 +18,11 @@ import org.dataone.client.v2.impl.MultipartMNode; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; -import org.dataone.indexer.storage.Storage; import org.dataone.service.exceptions.InvalidToken; import org.dataone.service.exceptions.NotAuthorized; import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; -import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v1.Session; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -37,42 +33,15 @@ * @author tao * */ -public class ObjectManager { - private static ObjectManager manager = null; - private static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); +public abstract class ObjectManager { + protected static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); private static String DataONEauthToken = null; private static Log logger = LogFactory.getLog(ObjectManager.class); - private static Storage storage = null; private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN"; private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file"; - private static MultipartD1Node d1Node = null; - private static Session session = null; - - static { - try { - refreshD1Node(); - } catch (ServiceFailure e) { - logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage()); - } - storage = Storage.getInstance(); - manager = new ObjectManager(); - } - - - /** - * Private constructor - */ - private ObjectManager() { - } - - /** - * Get an ObjectManager instance through the singleton pattern. - * @return the instance of ObjectManager - */ - public static ObjectManager getInstance() { - return manager; - } + protected static MultipartD1Node d1Node = null; + protected static Session session = null; /** * Get the system metadata for the given id @@ -87,40 +56,9 @@ public static ObjectManager getInstance() { * @throws IOException * @throws NoSuchAlgorithmException */ - public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, - NotImplemented, ServiceFailure, NotFound, - NoSuchAlgorithmException, IOException, MarshallingException { - long start = System.currentTimeMillis(); - //try to get the system metadata from the storage system first - InputStream sysmetaInputStream = null; - try { - sysmetaInputStream = storage.retrieveSystemMetadata(id); - long end = System.currentTimeMillis(); - logger.info("Finish getting the system metadata via the file system for the pid " + id - + " and it took " + (end - start) + "milliseconds"); - } catch (FileNotFoundException exception ) { - if (d1Node != null) { - // Metacat can't find the system metadata from the storage system. - // So try to get it from the dataone api - SystemMetadata sysmeta = null; - Identifier identifier = new Identifier(); - identifier.setValue(id); - sysmeta = d1Node.getSystemMetadata(session, identifier); - logger.debug("Finish getting the system metadata via the DataONE API call for the pid " - + id); - if (sysmeta != null) { - ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); - TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); - sysmetaInputStream = - new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); - } - long end = System.currentTimeMillis(); - logger.info("Finish getting the system metadata via DataONE API for the pid " + id - + " and it took " + (end - start) + "milliseconds"); - } - } - return sysmetaInputStream; - } + public abstract InputStream getSystemMetadataStream(String id) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, + NoSuchAlgorithmException, IOException, MarshallingException; /** * Get the system metadata object for the given identifier @@ -171,10 +109,9 @@ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) * @throws NoSuchAlgorithmException * @throws IOException */ - public InputStream getObject(String pid) throws IllegalArgumentException, FileNotFoundException, - NoSuchAlgorithmException, IOException { - return storage.retrieveObject(pid); - } + public abstract InputStream getObject(String pid) + throws IllegalArgumentException, FileNotFoundException, NoSuchAlgorithmException, + IOException; /** * Set the d1 node for this object manager. @@ -189,7 +126,7 @@ public static void setD1Node(MultipartD1Node node) { * In case the token expired, the method will retrieve the token and create a new d1 node * @throws ServiceFailure */ - private static void refreshD1Node() throws ServiceFailure { + protected static void refreshD1Node() throws ServiceFailure { //get the token DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java new file mode 100644 index 00000000..5ff61832 --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -0,0 +1,52 @@ +package org.dataone.cn.indexer.object; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.lang.reflect.InvocationTargetException; + +/** + * Class to create a concrete ObjectManager Object + * @author Tao + */ +public class ObjectManagerFactory { + private static volatile ObjectManager manager = null; + private static String className = "org.dataone.cn.indexer.object.hashstore.HashStoreObjManager"; + private static Log logger = LogFactory.getLog(ObjectManagerFactory.class); + private static final String OBJECT_MANAGER_CLASSNAME_ENV = + "DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME"; + + /** + * Create a Concrete ObjectManager object by the single pattern. + * First, Indexer will check if the env variable of DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME + * is defined. If it is defined, indexer will use it; otherwise it uses the default one - + * org.dataone.cn.indexer.object.hashstore.HashStoreObjManager + * @return an ObjectManager object + * @throws ClassNotFoundException + * @throws NoSuchMethodException + * @throws InvocationTargetException + * @throws InstantiationException + * @throws IllegalAccessException + */ + public static ObjectManager getInstance() + throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, + InstantiationException, IllegalAccessException { + String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV); + if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { + logger.debug("The ObjectManager class name form env variable " + + OBJECT_MANAGER_CLASSNAME_ENV + " is " + classNameFromEnv); + className = classNameFromEnv; + } + if (manager == null) { + synchronized (ObjectManagerFactory.class) { + if (manager == null) { + logger.info("The final ObjectManager class name form env variable is " + + classNameFromEnv); + Class managerClass = Class.forName(className); + manager = (ObjectManager) managerClass.getDeclaredConstructor().newInstance(); + } + } + } + return manager; + } +} diff --git a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java new file mode 100644 index 00000000..9b09443f --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java @@ -0,0 +1,89 @@ +package org.dataone.cn.indexer.object.hashstore; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.exceptions.MarshallingException; +import org.dataone.indexer.storage.Storage; +import org.dataone.service.exceptions.InvalidToken; +import org.dataone.service.exceptions.NotAuthorized; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.NotImplemented; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v1.Identifier; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.security.NoSuchAlgorithmException; + +/** + * Implementation of ObjectManager based on a hash store + * @author Tao + */ +public class HashStoreObjManager extends ObjectManager { + private static Storage storage = null; + private static Log logger = LogFactory.getLog(ObjectManager.class); + static { + try { + refreshD1Node(); + } catch (ServiceFailure e) { + logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage()); + } + storage = Storage.getInstance(); + } + + /** + * Constructor + */ + public HashStoreObjManager() { + + } + + @Override + public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, NoSuchAlgorithmException, IOException, + MarshallingException { + long start = System.currentTimeMillis(); + //try to get the system metadata from the storage system first + InputStream sysmetaInputStream = null; + try { + sysmetaInputStream = storage.retrieveSystemMetadata(id); + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via the file system for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + } catch (FileNotFoundException exception ) { + if (d1Node != null) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + SystemMetadata sysmeta = null; + Identifier identifier = new Identifier(); + identifier.setValue(id); + sysmeta = d1Node.getSystemMetadata(session, identifier); + logger.debug("Finish getting the system metadata via the DataONE API call for the pid " + + id); + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + } + } + return sysmetaInputStream; + } + + @Override + public InputStream getObject(String pid) + throws IllegalArgumentException, NoSuchAlgorithmException, IOException { + return storage.retrieveObject(pid); + } + +} diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java index bbe4b59d..b4b83991 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java @@ -6,6 +6,7 @@ import java.io.InputStream; import java.io.StringReader; import java.io.UnsupportedEncodingException; +import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -19,7 +20,7 @@ import org.apache.commons.io.input.ReaderInputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.cn.indexer.parser.utility.SeriesIdResolver; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.cn.indexer.solrhttp.SolrElementField; @@ -232,14 +233,14 @@ private boolean isHeadVersion(Identifier pid, Identifier sid) { } private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument) - throws InvalidToken, NotAuthorized, NotImplemented, - NoSuchAlgorithmException, ServiceFailure, NotFound, InstantiationException, - IllegalAccessException, IOException, MarshallingException { + throws InvalidToken, NotAuthorized, NotImplemented, NoSuchAlgorithmException, ServiceFailure, + NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException { Identifier identifier = new Identifier(); identifier.setValue(mergeDocument.getIdentifier()); try { - SystemMetadata sysMeta = (SystemMetadata) ObjectManager.getInstance() + SystemMetadata sysMeta = (SystemMetadata) ObjectManagerFactory.getInstance() .getSystemMetadata(identifier.getValue()); if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null && !sysMeta.getSeriesId().getValue().trim().equals("")) { diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java index a105731f..cf3cb099 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java @@ -1,11 +1,12 @@ package org.dataone.cn.indexer.resourcemap; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; import java.security.NoSuchAlgorithmException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.exceptions.MarshallingException; import org.dataone.service.exceptions.InvalidToken; @@ -27,7 +28,7 @@ public IndexVisibilityDelegateImpl() { public boolean isDocumentVisible(Identifier pid) { boolean visible = false; try { - SystemMetadata systemMetadata = ObjectManager.getInstance() + SystemMetadata systemMetadata = ObjectManagerFactory.getInstance() .getSystemMetadata(pid.getValue()); // TODO: Is pid Identifier a SID? if (systemMetadata == null) { @@ -58,6 +59,15 @@ public boolean isDocumentVisible(Identifier pid) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); } catch (NoSuchAlgorithmException e) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); + } catch (ClassNotFoundException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); + } catch (InvocationTargetException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); + } catch (NoSuchMethodException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); } return visible; } @@ -65,7 +75,8 @@ public boolean isDocumentVisible(Identifier pid) { public boolean documentExists(Identifier pid) { boolean exists = false; try { - SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue()); + SystemMetadata systemMetadata = + ObjectManagerFactory.getInstance().getSystemMetadata(pid.getValue()); if (systemMetadata != null) { exists = true; } else { @@ -94,6 +105,15 @@ public boolean documentExists(Identifier pid) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); } catch (NoSuchAlgorithmException e) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); + } catch (ClassNotFoundException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); + } catch (InvocationTargetException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); + } catch (NoSuchMethodException e) { + logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + + e.getMessage()); } return exists; } diff --git a/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java b/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java index 9a92a6d7..3f964a24 100644 --- a/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java +++ b/src/test/java/org/dataone/cn/indexer/IndexWorkerIT.java @@ -36,8 +36,7 @@ public class IndexWorkerIT { * @throws InterruptedException */ @Test - public void testRestoreRabbitMQConnectionAndChannel() - throws ServiceFailure, IOException, TimeoutException, InterruptedException { + public void testRestoreRabbitMQConnectionAndChannel() throws Exception { IndexWorker worker = new IndexWorker(); worker.start(); Connection connection = worker.getRabbitMQconnection(); diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 120286b9..ad6e53f2 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -62,7 +62,7 @@ public void setUp() throws Exception { */ @Test public void testGetObjectAndSystemMetadata() throws Exception { - try (InputStream input = ObjectManager.getInstance().getObject(identifier)) { + try (InputStream input = ObjectManagerFactory.getInstance().getObject(identifier)) { assertNotNull(input); try (OutputStream os = new ByteArrayOutputStream()) { MessageDigest md5 = MessageDigest.getInstance("MD5"); @@ -77,7 +77,7 @@ public void testGetObjectAndSystemMetadata() throws Exception { assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); } } - org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManager.getInstance() + org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManagerFactory.getInstance() .getSystemMetadata(identifier); assertEquals(identifier, sysmeta.getIdentifier().getValue()); assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); From de4e4353ae92690c7e8ff9d398346924f30786d2 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 10:41:15 -0700 Subject: [PATCH 06/36] In the ObjectManagerFactory class, renamed the method from getInstance to getDataManager. --- src/main/java/org/dataone/cn/indexer/IndexWorker.java | 2 +- src/main/java/org/dataone/cn/indexer/SolrIndex.java | 4 ++-- .../org/dataone/cn/indexer/object/ObjectManagerFactory.java | 2 +- .../dataone/cn/indexer/resourcemap/ForesiteResourceMap.java | 2 +- .../cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java | 4 ++-- .../java/org/dataone/cn/indexer/object/ObjectManagerTest.java | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 7e170c1f..8a1c59c0 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -243,7 +243,7 @@ public IndexWorker(Boolean initialize) initExecutorService();//initialize the executor first initIndexQueue(); initIndexParsers(); - ObjectManagerFactory.getInstance(); + ObjectManagerFactory.getObjectManager(); OntologyModelService.getInstance(); } } diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index c05dc2fa..1e5bbab9 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -152,7 +152,7 @@ private Map process(String id, boolean isSysmetaChangeOnly) Map docs = new HashMap<>(); // Load the System Metadata document try (InputStream systemMetadataStream = - ObjectManagerFactory.getInstance().getSystemMetadataStream(id)){ + ObjectManagerFactory.getObjectManager().getSystemMetadataStream(id)){ docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { log.error(e.getMessage(), e); @@ -190,7 +190,7 @@ private Map process(String id, boolean isSysmetaChangeOnly) // if so, then extract the additional information from the // document. try (InputStream dataStream = - ObjectManagerFactory.getInstance().getObject(id)) { + ObjectManagerFactory.getObjectManager().getObject(id)) { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index 5ff61832..ca485a25 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -28,7 +28,7 @@ public class ObjectManagerFactory { * @throws InstantiationException * @throws IllegalAccessException */ - public static ObjectManager getInstance() + public static ObjectManager getObjectManager() throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV); diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java index b4b83991..5b8892d7 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java @@ -240,7 +240,7 @@ private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc merge Identifier identifier = new Identifier(); identifier.setValue(mergeDocument.getIdentifier()); try { - SystemMetadata sysMeta = (SystemMetadata) ObjectManagerFactory.getInstance() + SystemMetadata sysMeta = (SystemMetadata) ObjectManagerFactory.getObjectManager() .getSystemMetadata(identifier.getValue()); if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null && !sysMeta.getSeriesId().getValue().trim().equals("")) { diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java index cf3cb099..97ea7e75 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java @@ -28,7 +28,7 @@ public IndexVisibilityDelegateImpl() { public boolean isDocumentVisible(Identifier pid) { boolean visible = false; try { - SystemMetadata systemMetadata = ObjectManagerFactory.getInstance() + SystemMetadata systemMetadata = ObjectManagerFactory.getObjectManager() .getSystemMetadata(pid.getValue()); // TODO: Is pid Identifier a SID? if (systemMetadata == null) { @@ -76,7 +76,7 @@ public boolean documentExists(Identifier pid) { boolean exists = false; try { SystemMetadata systemMetadata = - ObjectManagerFactory.getInstance().getSystemMetadata(pid.getValue()); + ObjectManagerFactory.getObjectManager().getSystemMetadata(pid.getValue()); if (systemMetadata != null) { exists = true; } else { diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index ad6e53f2..6748d367 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -62,7 +62,7 @@ public void setUp() throws Exception { */ @Test public void testGetObjectAndSystemMetadata() throws Exception { - try (InputStream input = ObjectManagerFactory.getInstance().getObject(identifier)) { + try (InputStream input = ObjectManagerFactory.getObjectManager().getObject(identifier)) { assertNotNull(input); try (OutputStream os = new ByteArrayOutputStream()) { MessageDigest md5 = MessageDigest.getInstance("MD5"); @@ -77,7 +77,7 @@ public void testGetObjectAndSystemMetadata() throws Exception { assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); } } - org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManagerFactory.getInstance() + org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManagerFactory.getObjectManager() .getSystemMetadata(identifier); assertEquals(identifier, sysmeta.getIdentifier().getValue()); assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); From 565e45be199f8a8cb7a6d3c7858f221d438cb4fe Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 11:40:38 -0700 Subject: [PATCH 07/36] Added the junit test for the ObjectmanagerFactory class. --- .../indexer/object/ObjectManagerFactory.java | 11 ++++- .../object/ObjectManagerFactoryTest.java | 48 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index ca485a25..eae93650 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -11,7 +11,8 @@ */ public class ObjectManagerFactory { private static volatile ObjectManager manager = null; - private static String className = "org.dataone.cn.indexer.object.hashstore.HashStoreObjManager"; + private static final String DEFAULT_ClASS_NAME = "org.dataone.cn.indexer.object.hashstore" + + ".HashStoreObjManager"; private static Log logger = LogFactory.getLog(ObjectManagerFactory.class); private static final String OBJECT_MANAGER_CLASSNAME_ENV = "DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME"; @@ -32,6 +33,7 @@ public static ObjectManager getObjectManager() throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV); + String className = DEFAULT_ClASS_NAME; if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { logger.debug("The ObjectManager class name form env variable " + OBJECT_MANAGER_CLASSNAME_ENV + " is " + classNameFromEnv); @@ -49,4 +51,11 @@ public static ObjectManager getObjectManager() } return manager; } + + /** + * This method is for testing only + */ + protected static void resetManagerNull() { + manager = null; + } } diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java new file mode 100644 index 00000000..e9c3416f --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java @@ -0,0 +1,48 @@ +package org.dataone.cn.indexer.object; + +import org.dataone.cn.indexer.object.hashstore.HashStoreObjManager; +import org.junit.Rule; +import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * A Junit test class for ObjectManagerFactory + * @author Tao + */ +public class ObjectManagerFactoryTest { + + private static final String envName = "DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME"; + @Rule + public EnvironmentVariablesRule environmentVariablesClassName = + new EnvironmentVariablesRule(envName, null); + /** + * Test to create a HashStoreObjManager + * @throws Exception + */ + @Test + public void testHashStoreObjManager() throws Exception { + environmentVariablesClassName.set(envName, null); + ObjectManagerFactory.resetManagerNull(); + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof HashStoreObjManager); + } + + /** + * Test the failure with a wrong class name + * @throws Exception + */ + @Test + public void testWrongClassName() throws Exception { + environmentVariablesClassName.set(envName, "foo.foo1.className"); + ObjectManagerFactory.resetManagerNull(); + try { + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + fail("Test shouldn't get here since the class doesn't exist with the given name."); + } catch (Exception e) { + assertTrue( e instanceof ClassNotFoundException); + } + } +} From 12c8dba236e2f1f1fff2352aa926fe5194c3df27 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 15:06:05 -0700 Subject: [PATCH 08/36] Added a new class - LegacyStoreObjManager to handle the Metacat old style storage system. --- .../cn/indexer/object/ObjectManager.java | 49 +++-- .../object/hashstore/HashStoreObjManager.java | 35 +++- .../legacystore/LegacyStoreObjManager.java | 191 ++++++++++++++++++ .../cn/indexer/object/ObjectManagerTest.java | 55 ----- .../hashstore/HashStoreObjManagerTest.java | 86 ++++++++ 5 files changed, 330 insertions(+), 86 deletions(-) create mode 100644 src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java create mode 100644 src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 38743276..ce14a961 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -1,5 +1,6 @@ package org.dataone.cn.indexer.object; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -7,6 +8,7 @@ import java.security.NoSuchAlgorithmException; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.client.auth.AuthTokenSession; @@ -23,6 +25,7 @@ import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v1.Session; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -75,30 +78,11 @@ public abstract InputStream getSystemMetadataStream(String id) * @throws MarshallingException * @throws NoSuchAlgorithmException */ - public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + public abstract org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) throws InvalidToken, NotAuthorized, NoSuchAlgorithmException, NotImplemented, ServiceFailure, NotFound, InstantiationException, IllegalAccessException, - IOException, MarshallingException { - org.dataone.service.types.v1.SystemMetadata sysmeta = null; - try (InputStream input = getSystemMetadataStream(id)) { - if (input != null) { - try { - SystemMetadata sysmeta2 = TypeMarshaller - .unmarshalTypeFromStream(SystemMetadata.class, input); - sysmeta = sysmeta2; - } catch (Exception e) { - try (InputStream input2 = getSystemMetadataStream(id)) { - if (input2 != null) { - sysmeta = TypeMarshaller.unmarshalTypeFromStream( - org.dataone.service.types.v1.SystemMetadata.class, input2); - } - } - } - } - } - return sysmeta; - } + IOException, MarshallingException; /** * Get the input stream of the content of the given pid @@ -108,10 +92,11 @@ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) * @throws FileNotFoundException * @throws NoSuchAlgorithmException * @throws IOException + * @throws NotFound */ public abstract InputStream getObject(String pid) throws IllegalArgumentException, FileNotFoundException, NoSuchAlgorithmException, - IOException; + IOException, NotFound; /** * Set the d1 node for this object manager. @@ -129,7 +114,7 @@ public static void setD1Node(MultipartD1Node node) { protected static void refreshD1Node() throws ServiceFailure { //get the token DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); - if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { + if (DataONEauthToken == null || DataONEauthToken.isBlank()) { //can't get the token from the env variable. So try to get it from a file specified // in the property String tokenFilePath = Settings.getConfiguration().getString(TOKEN_FILE_PATH_PROP_NAME); @@ -144,14 +129,14 @@ protected static void refreshD1Node() throws ServiceFailure { logger.warn("Can NOT get the authen token from the file " + tokenFilePath + " since " + e.getMessage()); } - if (DataONEauthToken != null && !DataONEauthToken.trim().equals("")) { + if (DataONEauthToken != null && !DataONEauthToken.isBlank()) { logger.info("Got the auth token from the file "+ tokenFilePath); } } } else { logger.info("Got the auth token from an env. variable"); } - if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { + if (DataONEauthToken == null || DataONEauthToken.isBlank()) { logger.warn( "Could NOT get an auth token from either an env. variable or the properties file." + " So it will act as the public user."); @@ -245,4 +230,18 @@ private static Boolean isCN(String nodeStr) { return isCN; } + protected SystemMetadata getSystemMetadataByAPI(String id) + throws ServiceFailure, InvalidToken, NotImplemented, NotAuthorized, NotFound { + if (d1Node != null) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + Identifier identifier = new Identifier(); + identifier.setValue(id); + return d1Node.getSystemMetadata(session, identifier); + } else { + throw new ServiceFailure("0000", "The d1Node is null and Indexer cannot get the " + + "systemmetadata by a API call."); + } + } + } diff --git a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java index 9b09443f..d533136b 100644 --- a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java @@ -60,12 +60,9 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu if (d1Node != null) { // Metacat can't find the system metadata from the storage system. // So try to get it from the dataone api - SystemMetadata sysmeta = null; - Identifier identifier = new Identifier(); - identifier.setValue(id); - sysmeta = d1Node.getSystemMetadata(session, identifier); - logger.debug("Finish getting the system metadata via the DataONE API call for the pid " - + id); + SystemMetadata sysmeta = getSystemMetadataByAPI(id); + logger.debug("Finish getting the system metadata via the DataONE API call for the" + + " pid " + id); if (sysmeta != null) { ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); @@ -80,6 +77,32 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu return sysmetaInputStream; } + @Override + public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + throws InvalidToken, NotAuthorized, NoSuchAlgorithmException, + NotImplemented, ServiceFailure, NotFound, + InstantiationException, IllegalAccessException, + IOException, MarshallingException { + org.dataone.service.types.v1.SystemMetadata sysmeta = null; + try (InputStream input = getSystemMetadataStream(id)) { + if (input != null) { + try { + SystemMetadata sysmeta2 = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, input); + sysmeta = sysmeta2; + } catch (Exception e) { + try (InputStream input2 = getSystemMetadataStream(id)) { + if (input2 != null) { + sysmeta = TypeMarshaller.unmarshalTypeFromStream( + org.dataone.service.types.v1.SystemMetadata.class, input2); + } + } + } + } + } + return sysmeta; + } + @Override public InputStream getObject(String pid) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java new file mode 100644 index 00000000..6e970b8c --- /dev/null +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -0,0 +1,191 @@ +package org.dataone.cn.indexer.object.legacystore; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.FileSystems; +import java.nio.file.Files; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.log4j.Logger; +import org.dataone.client.v2.formats.ObjectFormatCache; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.configuration.Settings; +import org.dataone.exceptions.MarshallingException; +import org.dataone.service.exceptions.InvalidToken; +import org.dataone.service.exceptions.NotAuthorized; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.NotImplemented; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v2.ObjectFormat; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; + +/** + * The class to get objects and system metadata from Metacat legacy store + * @author Tao + */ +public class LegacyStoreObjManager extends ObjectManager { + + private static String dataRootDir = + Settings.getConfiguration().getString("index.data.root.directory"); + private static String documentRootDir = + Settings.getConfiguration().getString("index.document.root.directory"); + private static Logger logger = Logger.getLogger(LegacyStoreObjManager.class); + + private static boolean ifDataAndDocRootSame = false; + + /** + * Constructor + * @throws ServiceFailure + */ + public LegacyStoreObjManager() throws ServiceFailure { + if (dataRootDir == null || dataRootDir.trim().equals("")) { + throw new ServiceFailure( + "0000", + "The data root directory specified by the property index.data.root.directory is " + + "blank in the properties file"); + } + if (documentRootDir == null || documentRootDir.trim().equals("")) { + throw new ServiceFailure( + "0000", + "The metadata root directory specified by the property index.document.root" + + ".directory is blank in the properties file"); + } + if (!Files.exists(FileSystems.getDefault().getPath(dataRootDir))) { + throw new ServiceFailure("0000", "The data root directory " + dataRootDir + + " specified in the properties file doesn't exist"); + } + if (!Files.exists(FileSystems.getDefault().getPath(documentRootDir))) { + throw new ServiceFailure("0000", "The document root directory " + documentRootDir + + " specified in the properties file doesn't exist"); + } + if (!dataRootDir.endsWith("/")) { + dataRootDir = dataRootDir + "/"; + } + if (!documentRootDir.endsWith("/")) { + documentRootDir = documentRootDir + "/"; + } + + if (documentRootDir.equals(dataRootDir)) { + ifDataAndDocRootSame = true; + } + logger.info("ObjectManager.constructor - the root document directory is " + + documentRootDir + " and the root data directory is " + dataRootDir + + " Are they same?" + ifDataAndDocRootSame); + + } + + + @Override + public InputStream getObject(String pid) + throws IllegalArgumentException, IOException, NotFound { + File object = new File(documentRootDir + pid); + if (!object.exists()) { + object = new File(dataRootDir + pid); + if (object.exists()) { + return new FileInputStream(object); + } else { + throw new FileNotFoundException( + "Neither " + documentRootDir + " nor " + dataRootDir + " have the docid " + + pid); + } + } else { + return new FileInputStream(object); + } + + } + + /** + * Get the absolute file path for a given relative path. If the relativePath is null or blank, + * null will be returned + * @param relativePath + * @return the absolute file path + * @throws NotFound + */ + private String getFilePath(String relativePath, String objectFormat) throws NotFound { + String absolutePath = null; + if (relativePath != null && !relativePath.isBlank()) { + if (ifDataAndDocRootSame) { + absolutePath = documentRootDir + relativePath; + } else if (objectFormat != null && !objectFormat.isBlank()) { + ObjectFormat format =ObjectFormatCache.getInstance().getFormat(objectFormat); + if (format.getFormatType().equals("METADATA")) { + absolutePath = documentRootDir + relativePath; + } else { + absolutePath = dataRootDir + relativePath; + } + } + } + logger.debug("The absolute file path for the relative file path " + + relativePath + " is " + absolutePath); + return absolutePath; + } + + @Override + public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound { + SystemMetadata sysmeta = null; + long start = System.currentTimeMillis(); + try { + for (int i=0; i<5; i++) { + try { + sysmeta = getSystemMetadataByAPI(id); + break; + } catch (ServiceFailure ee) { + logger.warn("The DataONE api call doesn't get the system metadata since " + + ee.getMessage() + ". This is " + i + + " try and Indexer will try again."); + try { + Thread.sleep(300); + } catch (InterruptedException ie) { + logger.info("The sleep of the thread was interrupted."); + } + } + } + logger.debug( + "ObjectManager.getSystemMetadata - finish getting the system metadata via the " + + "DataONE API call for the pid " + id); + } catch (NotAuthorized e) { + logger.info( + "ObjectManager.getSystemMetadata - failed to get the system metadata via the " + + "DataONE API call for the pid " + + id + " since it is not authorized. We will refresh the token and try again"); + refreshD1Node(); + sysmeta = getSystemMetadataByAPI(id); + } + long end = System.currentTimeMillis(); + logger.info( + "ObjectManager.getSystemMetadata - finish getting the system metadata via DataONE API" + + " for the pid " + + id + " and it took " + (end - start) + "milliseconds"); + + return sysmeta; + } + + @Override + public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, IOException, MarshallingException { + long start = System.currentTimeMillis(); + //try to get the system metadata from the storage system first + InputStream sysmetaInputStream = null; + SystemMetadata sysmeta = getSystemMetadataByAPI(id); + logger.debug("Finish getting the system metadata via the DataONE API call for the" + + " pid " + id); + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + return sysmetaInputStream; + } + + +} diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 6748d367..8032e07a 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -29,60 +29,5 @@ */ public class ObjectManagerTest { - private String identifier; - - @Before - public void setUp() throws Exception { - identifier = "ObjectManagerTest-" + System.currentTimeMillis(); - File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" - + "fgdc/nasa_d_FEDGPS1293.xml"); - try (InputStream object = new FileInputStream(objectFile)) { - Storage.getInstance().storeObject(object, identifier); - } - File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" - + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); - try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) { - SystemMetadata sysmeta = TypeMarshaller - .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream); - Identifier pid = new Identifier(); - pid.setValue(identifier); - sysmeta.setIdentifier(pid); - try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { - TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); - try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { - Storage.getInstance().storeMetadata(input, identifier); - } - } - } - } - - /** - * Test the getObject and getSystemMetadata method - * @throws Exception - */ - @Test - public void testGetObjectAndSystemMetadata() throws Exception { - try (InputStream input = ObjectManagerFactory.getObjectManager().getObject(identifier)) { - assertNotNull(input); - try (OutputStream os = new ByteArrayOutputStream()) { - MessageDigest md5 = MessageDigest.getInstance("MD5"); - // Calculate hex digests - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = input.read(buffer)) != -1) { - os.write(buffer, 0, bytesRead); - md5.update(buffer, 0, bytesRead); - } - String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); - assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); - } - } - org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManagerFactory.getObjectManager() - .getSystemMetadata(identifier); - assertEquals(identifier, sysmeta.getIdentifier().getValue()); - assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); - assertEquals(14828, sysmeta.getSize().intValue()); - } - } diff --git a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java new file mode 100644 index 00000000..382575b9 --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java @@ -0,0 +1,86 @@ +package org.dataone.cn.indexer.object.hashstore; + +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerFactory; +import org.dataone.indexer.storage.Storage; +import org.dataone.service.types.v1.Identifier; +import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; +import org.junit.Before; +import org.junit.Test; + +import javax.xml.bind.DatatypeConverter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.MessageDigest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * The test class for HashStoreObjManager + * @author Tao + */ +public class HashStoreObjManagerTest { + private String identifier; + + @Before + public void setUp() throws Exception { + identifier = "ObjectManagerTest-" + System.currentTimeMillis(); + File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" + + "fgdc/nasa_d_FEDGPS1293.xml"); + try (InputStream object = new FileInputStream(objectFile)) { + Storage.getInstance().storeObject(object, identifier); + } + File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" + + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); + try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) { + SystemMetadata sysmeta = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream); + Identifier pid = new Identifier(); + pid.setValue(identifier); + sysmeta.setIdentifier(pid); + try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { + TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); + try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { + Storage.getInstance().storeMetadata(input, identifier); + } + } + } + } + + /** + * Test the getObject and getSystemMetadata method + * @throws Exception + */ + @Test + public void testGetObjectAndSystemMetadata() throws Exception { + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof HashStoreObjManager); + try (InputStream input = manager.getObject(identifier)) { + assertNotNull(input); + try (OutputStream os = new ByteArrayOutputStream()) { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + // Calculate hex digests + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + os.write(buffer, 0, bytesRead); + md5.update(buffer, 0, bytesRead); + } + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); + } + } + org.dataone.service.types.v1.SystemMetadata sysmeta = manager + .getSystemMetadata(identifier); + assertEquals(identifier, sysmeta.getIdentifier().getValue()); + assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); + assertEquals(14828, sysmeta.getSize().intValue()); + } +} From a1d16f4f27947e0aa2739da612e29f0ad919aee8 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 17:02:16 -0700 Subject: [PATCH 09/36] Added test methods for ObjectManager. --- .../cn/indexer/object/ObjectManager.java | 59 +++++++++++++------ .../indexer/object/ObjectManagerFactory.java | 11 ++-- .../object/hashstore/HashStoreObjManager.java | 1 - .../legacystore/LegacyStoreObjManager.java | 1 + .../cn/indexer/object/ObjectManagerTest.java | 46 ++++++++++----- 5 files changed, 81 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index ce14a961..54f3d3a2 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -1,6 +1,5 @@ package org.dataone.cn.indexer.object; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -8,9 +7,9 @@ import java.security.NoSuchAlgorithmException; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dataone.client.D1Node; import org.dataone.client.auth.AuthTokenSession; import org.dataone.client.exception.ClientSideException; import org.dataone.client.rest.HttpMultipartRestClient; @@ -28,7 +27,6 @@ import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v1.Session; import org.dataone.service.types.v2.SystemMetadata; -import org.dataone.service.util.TypeMarshaller; /** @@ -37,10 +35,13 @@ * */ public abstract class ObjectManager { - protected static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); - private static String DataONEauthToken = null; + + private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; + private static final String TOKEN_ENV_NAME = "DATAONE_AUTH_TOKEN"; + + protected static String nodeBaseURL; + private static String dataONEauthToken = null; private static Log logger = LogFactory.getLog(ObjectManager.class); - private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN"; private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file"; protected static MultipartD1Node d1Node = null; @@ -112,9 +113,17 @@ public static void setD1Node(MultipartD1Node node) { * @throws ServiceFailure */ protected static void refreshD1Node() throws ServiceFailure { - //get the token - DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); - if (DataONEauthToken == null || DataONEauthToken.isBlank()) { + nodeBaseURL = null; + nodeBaseURL = System.getenv(NODE_BASE_URL_ENV_NAME); + logger.debug("The node base url from env variable is " + nodeBaseURL); + if (nodeBaseURL == null || nodeBaseURL.isBlank()) { + nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); + logger.debug("The node base url from the properties file is " + nodeBaseURL); + } + //get the token + dataONEauthToken = null; + dataONEauthToken = System.getenv(TOKEN_ENV_NAME); + if (dataONEauthToken == null || dataONEauthToken.isBlank()) { //can't get the token from the env variable. So try to get it from a file specified // in the property String tokenFilePath = Settings.getConfiguration().getString(TOKEN_FILE_PATH_PROP_NAME); @@ -123,25 +132,25 @@ protected static void refreshD1Node() throws ServiceFailure { "Can NOT get the token from the env variable so try to get the auth token " + "from the file " + tokenFilePath); try { - DataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); + dataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); } catch (IOException e) { - DataONEauthToken = null; + dataONEauthToken = null; logger.warn("Can NOT get the authen token from the file " + tokenFilePath + " since " + e.getMessage()); } - if (DataONEauthToken != null && !DataONEauthToken.isBlank()) { + if (dataONEauthToken != null && !dataONEauthToken.isBlank()) { logger.info("Got the auth token from the file "+ tokenFilePath); } } } else { logger.info("Got the auth token from an env. variable"); } - if (DataONEauthToken == null || DataONEauthToken.isBlank()) { + if (dataONEauthToken == null || dataONEauthToken.isBlank()) { logger.warn( "Could NOT get an auth token from either an env. variable or the properties file." + " So it will act as the public user."); } - session = createSession(DataONEauthToken); + session = createSession(dataONEauthToken); logger.info("Going to create the d1node with url " + nodeBaseURL); try { d1Node = getMultipartD1Node(session, nodeBaseURL); @@ -172,7 +181,23 @@ private static Session createSession(String authToken) { } return session; } - + + /** + * Only for testing + * @return + */ + protected static String getDataONEauthToken() { + return dataONEauthToken; + } + + /** + * Only for testing + * @return + */ + protected static D1Node getD1Node() { + return d1Node; + } + /** * Get a DataONE MultipartCNode object, which will be used to communication with a CN * @@ -206,7 +231,7 @@ private static MultipartD1Node getMultipartD1Node(Session session, String servic * @param nodeStr either a DataONE node serviceURL (e.g. https://knb.ecoinformatics.org/knb/d1/mn) * or a DataONE node identifier (e.g. urn:node:CN) */ - private static Boolean isCN(String nodeStr) { + protected static Boolean isCN(String nodeStr) { Boolean isCN = false; // match node urn, e.g. "https://cn.dataone.org/cn" if (nodeStr.matches("^\\s*urn:node:.*")) { @@ -230,7 +255,7 @@ private static Boolean isCN(String nodeStr) { return isCN; } - protected SystemMetadata getSystemMetadataByAPI(String id) + protected static SystemMetadata getSystemMetadataByAPI(String id) throws ServiceFailure, InvalidToken, NotImplemented, NotAuthorized, NotFound { if (d1Node != null) { // Metacat can't find the system metadata from the storage system. diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index eae93650..05cf3d61 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -10,12 +10,15 @@ * @author Tao */ public class ObjectManagerFactory { + + private static final String OBJECT_MANAGER_CLASSNAME_ENV_NAME = + "DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME"; + private static volatile ObjectManager manager = null; private static final String DEFAULT_ClASS_NAME = "org.dataone.cn.indexer.object.hashstore" + ".HashStoreObjManager"; private static Log logger = LogFactory.getLog(ObjectManagerFactory.class); - private static final String OBJECT_MANAGER_CLASSNAME_ENV = - "DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME"; + /** * Create a Concrete ObjectManager object by the single pattern. @@ -32,11 +35,11 @@ public class ObjectManagerFactory { public static ObjectManager getObjectManager() throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { - String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV); + String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV_NAME); String className = DEFAULT_ClASS_NAME; if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { logger.debug("The ObjectManager class name form env variable " - + OBJECT_MANAGER_CLASSNAME_ENV + " is " + classNameFromEnv); + + OBJECT_MANAGER_CLASSNAME_ENV_NAME + " is " + classNameFromEnv); className = classNameFromEnv; } if (manager == null) { diff --git a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java index d533136b..ba650913 100644 --- a/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManager.java @@ -11,7 +11,6 @@ import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; -import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java index 6e970b8c..888852f2 100644 --- a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -103,6 +103,7 @@ public InputStream getObject(String pid) * Get the absolute file path for a given relative path. If the relativePath is null or blank, * null will be returned * @param relativePath + * @param objectFormat * @return the absolute file path * @throws NotFound */ diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 8032e07a..5d3ecda3 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -1,24 +1,13 @@ package org.dataone.cn.indexer.object; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.io.OutputStream; -import java.security.MessageDigest; - -import javax.xml.bind.DatatypeConverter; - - -import org.dataone.indexer.storage.Storage; -import org.dataone.service.types.v1.Identifier; -import org.dataone.service.types.v2.SystemMetadata; -import org.dataone.service.util.TypeMarshaller; +import org.dataone.cn.indexer.IndexWorker; +import org.dataone.configuration.Settings; import org.junit.Before; import org.junit.Test; @@ -29,5 +18,32 @@ */ public class ObjectManagerTest { + @Before + public void setUp() throws Exception { + String propertyFilePath = + "./src/main/resources/org/dataone/configuration/index-processor.properties"; + Settings.augmentConfiguration(propertyFilePath); + } + /** + * Test the isCN method + * @throws Exception + */ + @Test + public void testIsCN() throws Exception { + String url = "https://knb.ecoinformatics.org/knb/d1/mn"; + assertFalse(ObjectManager.isCN(url)); + url = "https://cn-orc-1.dataone.org/cn"; + assertTrue(ObjectManager.isCN(url)); + } + + /** + * Test the getSystemMetadataByAPI based the settings from properties + */ + @Test + public void testRefreshD1NodeFromProperties() throws Exception { + ObjectManager.refreshD1Node(); + assertEquals("https://valley.duckdns.org/metacat/d1/mn/v2", + ObjectManager.getD1Node().getNodeBaseServiceUrl()); + } } From 23b67a1140ad8efd9955b19d73cfa42d1bc05893 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 May 2025 17:11:20 -0700 Subject: [PATCH 10/36] Changed the evn variable name. --- .../org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java index e9c3416f..0616adf1 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java @@ -14,7 +14,7 @@ */ public class ObjectManagerFactoryTest { - private static final String envName = "DATAONE_INDEXER_OBJECT_MANAGER_CLASSNAME"; + private static final String envName = "DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME"; @Rule public EnvironmentVariablesRule environmentVariablesClassName = new EnvironmentVariablesRule(envName, null); From e99406aaa69e43bcee05ab4821f4a8d50b29bde3 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 15 May 2025 11:08:05 -0700 Subject: [PATCH 11/36] Added the method to test the getSystemMetadataByAPI method. --- .../cn/indexer/object/ObjectManager.java | 4 +-- .../cn/indexer/object/ObjectManagerTest.java | 36 ++++++++++++++++--- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 54f3d3a2..e9c6c6f5 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -37,7 +37,7 @@ public abstract class ObjectManager { private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; - private static final String TOKEN_ENV_NAME = "DATAONE_AUTH_TOKEN"; + private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; protected static String nodeBaseURL; private static String dataONEauthToken = null; @@ -113,7 +113,6 @@ public static void setD1Node(MultipartD1Node node) { * @throws ServiceFailure */ protected static void refreshD1Node() throws ServiceFailure { - nodeBaseURL = null; nodeBaseURL = System.getenv(NODE_BASE_URL_ENV_NAME); logger.debug("The node base url from env variable is " + nodeBaseURL); if (nodeBaseURL == null || nodeBaseURL.isBlank()) { @@ -121,7 +120,6 @@ protected static void refreshD1Node() throws ServiceFailure { logger.debug("The node base url from the properties file is " + nodeBaseURL); } //get the token - dataONEauthToken = null; dataONEauthToken = System.getenv(TOKEN_ENV_NAME); if (dataONEauthToken == null || dataONEauthToken.isBlank()) { //can't get the token from the env variable. So try to get it from a file specified diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 5d3ecda3..2534503e 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -5,18 +5,25 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; - -import org.dataone.cn.indexer.IndexWorker; import org.dataone.configuration.Settings; +import org.dataone.service.types.v2.SystemMetadata; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; /** - * A junit test class for the ObjecManager class. + * A junit test class for the ObjectManager class. * @author tao * */ public class ObjectManagerTest { + private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; + private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; + + @Rule + public EnvironmentVariablesRule environmentVariables = + new EnvironmentVariablesRule(TOKEN_ENV_NAME, null); @Before public void setUp() throws Exception { @@ -37,7 +44,7 @@ public void testIsCN() throws Exception { } /** - * Test the getSystemMetadataByAPI based the settings from properties + * Test the refreshD1Node method based the settings from properties */ @Test public void testRefreshD1NodeFromProperties() throws Exception { @@ -46,4 +53,25 @@ public void testRefreshD1NodeFromProperties() throws Exception { ObjectManager.getD1Node().getNodeBaseServiceUrl()); } + /** + * Test the getSystemMetadataByAPI method base the env values. + * @throws Exception + */ + @Test + public void testGetSystemMetadataByAPI() throws Exception { + String url = "https://knb.ecoinformatics.org/knb/d1/mn"; + String token = "fake_token"; + environmentVariables.set(NODE_BASE_URL_ENV_NAME, url); + environmentVariables.set(TOKEN_ENV_NAME, token); + ObjectManager.refreshD1Node(); + assertEquals(url + "/v2", ObjectManager.getD1Node().getNodeBaseServiceUrl()); + assertEquals(token, ObjectManager.getDataONEauthToken()); + String id = "doi:10.5063/F1N0150S"; + SystemMetadata sys = ObjectManager.getSystemMetadataByAPI(id); + assertNotNull(sys); + assertEquals(id, sys.getIdentifier().getValue()); + environmentVariables.set(NODE_BASE_URL_ENV_NAME, null); + environmentVariables.set(TOKEN_ENV_NAME, null); + } + } From bd12080c6e44fc9c7f0c28e69079b0c48f182d00 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 15 May 2025 15:59:45 -0700 Subject: [PATCH 12/36] Env variables overwrites the properies values. --- .../cn/indexer/object/ObjectManager.java | 2 +- .../indexer/object/ObjectManagerFactory.java | 8 +-- .../legacystore/LegacyStoreObjManager.java | 58 +++++++++++++------ 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index e9c6c6f5..79c9b058 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -35,7 +35,7 @@ * */ public abstract class ObjectManager { - + // environmental variables' names private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index 05cf3d61..f0c22a72 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -10,8 +10,8 @@ * @author Tao */ public class ObjectManagerFactory { - - private static final String OBJECT_MANAGER_CLASSNAME_ENV_NAME = + // environmental variables' names + private static final String OBJECT_MANAGER_CLASS_NAME_ENV_NAME = "DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME"; private static volatile ObjectManager manager = null; @@ -35,11 +35,11 @@ public class ObjectManagerFactory { public static ObjectManager getObjectManager() throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { - String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASSNAME_ENV_NAME); + String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASS_NAME_ENV_NAME); String className = DEFAULT_ClASS_NAME; if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { logger.debug("The ObjectManager class name form env variable " - + OBJECT_MANAGER_CLASSNAME_ENV_NAME + " is " + classNameFromEnv); + + OBJECT_MANAGER_CLASS_NAME_ENV_NAME + " is " + classNameFromEnv); className = classNameFromEnv; } if (manager == null) { diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java index 888852f2..3d849928 100644 --- a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -29,39 +29,61 @@ * @author Tao */ public class LegacyStoreObjManager extends ObjectManager { - - private static String dataRootDir = - Settings.getConfiguration().getString("index.data.root.directory"); - private static String documentRootDir = - Settings.getConfiguration().getString("index.document.root.directory"); + // environmental variables' names + private static final String DATA_ROOT_DIR_ENV_NAME = "DATAONE_INDEXER_METACAT_DATA_ROOT_DIR"; + private static final String DOCUMENT_ROOT_DIR_ENV_NAME = + "DATAONE_INDEXER_METACAT_DOCUMENT_ROOT_DIR"; + + private static final String DATA_ROOT_DIR_PROPERTY_NAME = "index.data.root.directory"; + private static final String DOCUMENT_ROOT_DIR_PROPERTY_NAME = "index.document.root.directory"; + private static String dataRootDir; + private static String documentRootDir; private static Logger logger = Logger.getLogger(LegacyStoreObjManager.class); private static boolean ifDataAndDocRootSame = false; /** * Constructor + * Read the Metacat legacy data and document directories from the environmental variables and + * the properties file. The values in the environmental variables overwrite the properties ones. * @throws ServiceFailure */ public LegacyStoreObjManager() throws ServiceFailure { - if (dataRootDir == null || dataRootDir.trim().equals("")) { - throw new ServiceFailure( - "0000", - "The data root directory specified by the property index.data.root.directory is " - + "blank in the properties file"); + dataRootDir = System.getenv(DATA_ROOT_DIR_ENV_NAME); + logger.debug("The data root dir from env " + DATA_ROOT_DIR_ENV_NAME + " is " + dataRootDir); + if (dataRootDir == null || dataRootDir.isBlank()) { + dataRootDir = Settings.getConfiguration().getString(DATA_ROOT_DIR_PROPERTY_NAME); + logger.debug("The data root dir from the properties is " + dataRootDir); + } + if (dataRootDir == null || dataRootDir.isBlank()) { + throw new ServiceFailure("0000", + "The data root directory specified by the env " + "variable " + + DATA_ROOT_DIR_ENV_NAME + " or the property " + + DATA_ROOT_DIR_PROPERTY_NAME + + " in the properties file is null/blank"); + } + documentRootDir = System.getenv(DOCUMENT_ROOT_DIR_ENV_NAME); + logger.debug("The document root dir from env " + DOCUMENT_ROOT_DIR_ENV_NAME + " is " + + documentRootDir); + if (documentRootDir == null || documentRootDir.isBlank()) { + documentRootDir = + Settings.getConfiguration().getString(DOCUMENT_ROOT_DIR_PROPERTY_NAME); + logger.debug("The document root dir from the properties is " + documentRootDir); } - if (documentRootDir == null || documentRootDir.trim().equals("")) { - throw new ServiceFailure( - "0000", - "The metadata root directory specified by the property index.document.root" - + ".directory is blank in the properties file"); + if (documentRootDir == null || documentRootDir.isBlank()) { + throw new ServiceFailure("0000", + "The document root directory specified by the env variable " + + DOCUMENT_ROOT_DIR_ENV_NAME + " or the property " + + DOCUMENT_ROOT_DIR_PROPERTY_NAME + + " in the properties file is blank."); } if (!Files.exists(FileSystems.getDefault().getPath(dataRootDir))) { throw new ServiceFailure("0000", "The data root directory " + dataRootDir + - " specified in the properties file doesn't exist"); + " specified in the env variable or the properties file doesn't exist"); } if (!Files.exists(FileSystems.getDefault().getPath(documentRootDir))) { throw new ServiceFailure("0000", "The document root directory " + documentRootDir + - " specified in the properties file doesn't exist"); + " specified in the env variable or the properties file doesn't exist"); } if (!dataRootDir.endsWith("/")) { dataRootDir = dataRootDir + "/"; @@ -173,7 +195,7 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu long start = System.currentTimeMillis(); //try to get the system metadata from the storage system first InputStream sysmetaInputStream = null; - SystemMetadata sysmeta = getSystemMetadataByAPI(id); + SystemMetadata sysmeta = (SystemMetadata) getSystemMetadata(id); logger.debug("Finish getting the system metadata via the DataONE API call for the" + " pid " + id); if (sysmeta != null) { From 707af758bef0a3c8c5de96d44ac9f8ed7cae4c49 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 15 May 2025 16:24:28 -0700 Subject: [PATCH 13/36] Rewored some log statements. --- .../legacystore/LegacyStoreObjManager.java | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java index 3d849928..3c7487bb 100644 --- a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -95,9 +95,9 @@ public LegacyStoreObjManager() throws ServiceFailure { if (documentRootDir.equals(dataRootDir)) { ifDataAndDocRootSame = true; } - logger.info("ObjectManager.constructor - the root document directory is " + - documentRootDir + " and the root data directory is " + dataRootDir + - " Are they same?" + ifDataAndDocRootSame); + logger.info( + "The root document directory is " + documentRootDir + " and the root data directory is " + + dataRootDir + " Are they same?" + ifDataAndDocRootSame); } @@ -169,22 +169,19 @@ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) } } } - logger.debug( - "ObjectManager.getSystemMetadata - finish getting the system metadata via the " - + "DataONE API call for the pid " + id); + logger.debug("Finish getting the system metadata via the DataONE API call for the pid " + + id); } catch (NotAuthorized e) { logger.info( - "ObjectManager.getSystemMetadata - failed to get the system metadata via the " - + "DataONE API call for the pid " + "Failed to get the system metadata via the DataONE API call for the pid " + id + " since it is not authorized. We will refresh the token and try again"); refreshD1Node(); sysmeta = getSystemMetadataByAPI(id); } long end = System.currentTimeMillis(); logger.info( - "ObjectManager.getSystemMetadata - finish getting the system metadata via DataONE API" - + " for the pid " - + id + " and it took " + (end - start) + "milliseconds"); + "Finish getting the system metadata via DataONE API for the pid " + id + " and it took " + + (end - start) + "milliseconds"); return sysmeta; } From d34348e2b1f434e756ef2b1c9744f9a1e1b4da12 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 16 May 2025 14:19:06 -0700 Subject: [PATCH 14/36] Added a new class to test LegacyStoreObjManager. --- .../cn/indexer/object/ObjectManager.java | 2 +- .../legacystore/LegacyStoreObjManager.java | 15 +++ .../object/ObjectManagerFactoryTest.java | 18 ++- .../cn/indexer/object/ObjectManagerTest.java | 2 +- .../LegacyStoreObjManagerTest.java | 127 ++++++++++++++++++ 5 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 79c9b058..6096889a 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -112,7 +112,7 @@ public static void setD1Node(MultipartD1Node node) { * In case the token expired, the method will retrieve the token and create a new d1 node * @throws ServiceFailure */ - protected static void refreshD1Node() throws ServiceFailure { + public static void refreshD1Node() throws ServiceFailure { nodeBaseURL = System.getenv(NODE_BASE_URL_ENV_NAME); logger.debug("The node base url from env variable is " + nodeBaseURL); if (nodeBaseURL == null || nodeBaseURL.isBlank()) { diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java index 3c7487bb..dfec3704 100644 --- a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -207,5 +207,20 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu return sysmetaInputStream; } + /** + * For testing only + * @return the data root dir + */ + protected String getDataRootDir() { + return dataRootDir; + } + + /** + * For testing only + * @return the document root dir + */ + protected String getDocumentRootDir() { + return documentRootDir; + } } diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java index 0616adf1..24db829f 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerFactoryTest.java @@ -1,6 +1,7 @@ package org.dataone.cn.indexer.object; import org.dataone.cn.indexer.object.hashstore.HashStoreObjManager; +import org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager; import org.junit.Rule; import org.junit.Test; import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; @@ -19,7 +20,7 @@ public class ObjectManagerFactoryTest { public EnvironmentVariablesRule environmentVariablesClassName = new EnvironmentVariablesRule(envName, null); /** - * Test to create a HashStoreObjManager + * Test to create a HashStoreObjManager instance * @throws Exception */ @Test @@ -30,6 +31,20 @@ public void testHashStoreObjManager() throws Exception { assertTrue(manager instanceof HashStoreObjManager); } + /** + * Test to create a LegacyStroeObjectManager instance + * @throws Exception + */ + @Test + public void testLegacyStoreObjManager() throws Exception { + environmentVariablesClassName.set( + envName, "org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager"); + ObjectManagerFactory.resetManagerNull(); + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + assertTrue(manager instanceof LegacyStoreObjManager); + environmentVariablesClassName.set(envName, null); + } + /** * Test the failure with a wrong class name * @throws Exception @@ -44,5 +59,6 @@ public void testWrongClassName() throws Exception { } catch (Exception e) { assertTrue( e instanceof ClassNotFoundException); } + environmentVariablesClassName.set(envName, null); } } diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 2534503e..615ae2bc 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -18,7 +18,7 @@ * */ public class ObjectManagerTest { - private static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; + public static final String NODE_BASE_URL_ENV_NAME = "DATAONE_INDEXER_NODE_BASE_URL"; private static final String TOKEN_ENV_NAME = "DATAONE_INDEXER_AUTH_TOKEN"; @Rule diff --git a/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java new file mode 100644 index 00000000..5a67d503 --- /dev/null +++ b/src/test/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManagerTest.java @@ -0,0 +1,127 @@ +package org.dataone.cn.indexer.object.legacystore; + +import org.apache.commons.io.IOUtils; +import org.dataone.cn.indexer.object.ObjectManager; +import org.dataone.cn.indexer.object.ObjectManagerTest; +import org.dataone.configuration.Settings; +import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.ServiceFailure; +import org.dataone.service.types.v1.SystemMetadata; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import uk.org.webcompere.systemstubs.rules.EnvironmentVariablesRule; + +import java.io.FileNotFoundException; +import java.io.InputStream; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * A junit test class for LegacyStoreObjManager + * @author Tao + */ +public class LegacyStoreObjManagerTest { + private static final String DATA_ROOT_DIR_ENV_NAME = "DATAONE_INDEXER_METACAT_DATA_ROOT_DIR"; + private static final String DOCUMENT_ROOT_DIR_ENV_NAME = + "DATAONE_INDEXER_METACAT_DOCUMENT_ROOT_DIR"; + + private static final String DATA_ROOT_DIR_PROPERTY_NAME = "index.data.root.directory"; + private static final String DOCUMENT_ROOT_DIR_PROPERTY_NAME = "index.document.root.directory"; + + @Rule + public EnvironmentVariablesRule environmentVariables = + new EnvironmentVariablesRule(DATA_ROOT_DIR_ENV_NAME, null); + + @Before + public void setUp() throws Exception { + String propertyFilePath = + "./src/main/resources/org/dataone/configuration/index-processor.properties"; + Settings.augmentConfiguration(propertyFilePath); + } + + /** + * Test the constructor from the properties + */ + @Test + public void testConstructorFromProperties() throws Exception { + String dataDir = "/var/metacat/data/"; + String documentDir = "/var/metacat/documents/"; + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + Settings.getConfiguration().setProperty(DATA_ROOT_DIR_PROPERTY_NAME, null); + try { + manager = new LegacyStoreObjManager(); + fail("Test shouldn't get here since the previous statement should throw an exception"); + } catch (Exception e) { + assertTrue( e instanceof ServiceFailure); + } + Settings.getConfiguration().setProperty(DATA_ROOT_DIR_PROPERTY_NAME, dataDir); + manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + Settings.getConfiguration().setProperty(DOCUMENT_ROOT_DIR_PROPERTY_NAME, null); + try { + manager = new LegacyStoreObjManager(); + fail("Test shouldn't get here since the previous statement should throw an exception"); + } catch (Exception e) { + assertTrue( e instanceof ServiceFailure); + } + } + + /** + * Test the getObject method + * @throws Exception + */ + @Test + public void testGetObject() throws Exception { + String dataDir = "src/test/resources/org/dataone/configuration/"; + String documentDir = "src/test/resources/"; + environmentVariables.set(DATA_ROOT_DIR_ENV_NAME, dataDir); + environmentVariables.set(DOCUMENT_ROOT_DIR_ENV_NAME, documentDir); + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + assertEquals(documentDir, manager.getDocumentRootDir()); + assertEquals(dataDir, manager.getDataRootDir()); + InputStream inputData = manager.getObject("config.xml"); + assertNotNull(inputData); + InputStream inputDocument = manager.getObject("commons-logging.properties"); + assertNotNull(inputDocument); + try { + InputStream input = manager.getObject("foo"); + fail("Test shouldn't get here since the foo file doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + } + + /** + * Test the getSystemMetacat and getSystemMetadataStream methods + * @throws Exception + */ + @Test + public void testGetSystemMetadata() throws Exception { + String id = "doi:10.18739/A21J9795R"; + String url = "https://cn.dataone.org/cn"; + environmentVariables.set(ObjectManagerTest.NODE_BASE_URL_ENV_NAME, url); + ObjectManager.refreshD1Node(); + LegacyStoreObjManager manager = new LegacyStoreObjManager(); + SystemMetadata systemMetadata = manager.getSystemMetadata(id); + assertEquals(id, systemMetadata.getIdentifier().getValue()); + InputStream inputStream = manager.getSystemMetadataStream(id); + String sysStr = IOUtils.toString(inputStream, "UTF-8"); + assertTrue(sysStr.contains("checksum")); + assertTrue(sysStr.contains("rightsHolder")); + assertTrue(sysStr.contains("authoritativeMemberNode")); + try { + systemMetadata = manager.getSystemMetadata("fake-id-foo231"); + fail("Test should get here since the object doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof NotFound); + } + + } +} From e7939936f2a7d0ec9a2232a569705ae1fca9c4a3 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 16 May 2025 15:37:09 -0700 Subject: [PATCH 15/36] Added the tests of the failed scenarios of getSystemMetadata and getObject methods. --- .../hashstore/HashStoreObjManagerTest.java | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java index 382575b9..023916ea 100644 --- a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java @@ -3,6 +3,7 @@ import org.dataone.cn.indexer.object.ObjectManager; import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.indexer.storage.Storage; +import org.dataone.service.exceptions.NotFound; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -14,6 +15,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.InputStream; import java.io.OutputStream; import java.security.MessageDigest; @@ -21,6 +23,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; /** * The test class for HashStoreObjManager @@ -83,4 +86,32 @@ public void testGetObjectAndSystemMetadata() throws Exception { assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); assertEquals(14828, sysmeta.getSize().intValue()); } + + /** + * Test the failed scenarios for the methods of getSystemMetadata and getObject. + * @throws Exception + */ + @Test + public void testFailuresInGetSystemMetadataAndGetObject() throws Exception { + String id = "foo-fake-id-123"; + ObjectManager manager = ObjectManagerFactory.getObjectManager(); + try { + InputStream stream = manager.getSystemMetadataStream(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof NotFound); + } + try { + manager.getSystemMetadata(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof NotFound); + } + try { + manager.getObject(id); + fail("Test can't get here since the id doesn't exist"); + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + } } From c2984c8aa037ecc62f03b010a00a9f5d6faf0d65 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 19 May 2025 12:53:33 -0700 Subject: [PATCH 16/36] Added the code to handle docid. --- .../org/dataone/cn/indexer/IndexWorker.java | 10 +- .../org/dataone/cn/indexer/SolrIndex.java | 97 ++++++++++++------- .../cn/index/DataONESolrJettyTestBase.java | 3 +- 3 files changed, 70 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 8a1c59c0..4f18e939 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -449,21 +449,22 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread) Identifier pid = parser.getIdentifier(); String indexType = parser.getIndexType(); int priority = parser.getPriority(); + String docId = parser.getDocId();// It can be null. try { long threadId = Thread.currentThread().getId(); logger.info("IndexWorker.consumer.indexObject by multiple thread? " + multipleThread + ", with the thread id " + threadId + " - Received the index task from the index queue with the identifier: " + pid.getValue() + " , the index type: " + indexType - + ", the priority: " + priority); + + ", the priority: " + priority + ", the docId(can be null): " + docId); switch (indexType) { case CREATE_INDEXT_TYPE -> { boolean sysmetaOnly = false; - solrIndex.update(pid, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly, docId); } case SYSMETA_CHANGE_TYPE -> { boolean sysmetaOnly = true; - solrIndex.update(pid, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly, docId); } case DELETE_INDEX_TYPE -> solrIndex.remove(pid); default -> throw new InvalidRequest( @@ -482,7 +483,8 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread) ServiceFailure | XPathExpressionException | UnsupportedType | SAXException | ParserConfigurationException | SolrServerException | MarshallingException | EncoderException | InterruptedException | IOException | InstantiationException | - IllegalAccessException e) { + IllegalAccessException | ClassNotFoundException | InvocationTargetException | + NoSuchMethodException e) { logger.error("Cannot index the task for identifier " + pid.getValue() + " since " + e.getMessage(), e); } diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 1e5bbab9..58db570b 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -22,6 +23,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; import org.dataone.cn.indexer.object.ObjectManagerFactory; +import org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager; import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor; import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; import org.dataone.cn.indexer.parser.IDocumentSubprocessor; @@ -138,15 +140,22 @@ public void setDeleteSubprocessors( * Generate the index for the given information * @param id the id which will be indexed * @param isSysmetaChangeOnly if this is a change on the system metadata only + * @param docId the docId (file name) of the object. This is only for LegacyObjManager * @return a map of solr doc with ids * @throws IOException * @throws XPathExpressionException - * @throws SolrServerException * @throws EncoderException + * @throws SolrServerException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException + * @throws InstantiationException + * @throws IllegalAccessException */ - private Map process(String id, boolean isSysmetaChangeOnly) - throws IOException, XPathExpressionException, EncoderException, - SolrServerException { + private Map process(String id, boolean isSysmetaChangeOnly, String docId) + throws IOException, XPathExpressionException, EncoderException, SolrServerException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException, + InstantiationException, IllegalAccessException { log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id); long start = System.currentTimeMillis(); Map docs = new HashMap<>(); @@ -180,7 +189,15 @@ private Map process(String id, boolean isSysmetaChangeOnly) + " for the id " + id); //if the objectPath is null, we should skip the other processes if (!skipOtherProcessor) { - log.debug("SolrIndex.process - Start to use subprocessor list to process " + id); + // The default object id is the identifier of the object (the hashstore case) + String objectID = id; + if (ObjectManagerFactory.getObjectManager() instanceof LegacyStoreObjManager) { + // In the LegacyStoreObjManager class, dataone-indexer uses the docid (which + // always is the file name) to get the object + objectID = docId; + } + log.debug("Start to use subprocessor list to process " + id); + log.debug("The object id for " + id + " is " + objectID); // Determine if subprocessors are available for this ID if (subprocessors != null) { // for each subprocessor loaded from the spring config @@ -190,7 +207,7 @@ private Map process(String id, boolean isSysmetaChangeOnly) // if so, then extract the additional information from the // document. try (InputStream dataStream = - ObjectManagerFactory.getObjectManager().getObject(id)) { + ObjectManagerFactory.getObjectManager().getObject(objectID)) { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects @@ -318,20 +335,26 @@ private void checkParams(Identifier pid) throws InvalidRequest { * Insert the indexes for a document. * @param pid the id of this document * @param isSysmetaChangeOnly if this change is only for systemmetadata + * @param docId the docId (file name) of the object. This is only for LegacyObjManager * @throws IOException * @throws InvalidRequest * @throws XPathExpressionException * @throws SolrServerException * @throws EncoderException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException + * @throws InstantiationException + * @throws IllegalAccessException */ - private void insert(Identifier pid, boolean isSysmetaChangeOnly) - throws IOException, InvalidRequest, - XPathExpressionException, SolrServerException, - EncoderException { + private void insert(Identifier pid, boolean isSysmetaChangeOnly, String docId) + throws IOException, InvalidRequest, XPathExpressionException, SolrServerException, + EncoderException, ClassNotFoundException, InvocationTargetException, NoSuchMethodException, + InstantiationException, IllegalAccessException { checkParams(pid); log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue()); long start = System.currentTimeMillis(); - Map docs = process(pid.getValue(), isSysmetaChangeOnly); + Map docs = process(pid.getValue(), isSysmetaChangeOnly, docId); long end = System.currentTimeMillis(); log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds."); @@ -377,34 +400,38 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException * the index for the doc. * @param pid the identifier of object which will be indexed * @param isSysmetaChangeOnly the flag indicating if the change is system metadata only - * @throws NotFound - * @throws ServiceFailure - * @throws NotImplemented - * @throws NotAuthorized - * @throws InvalidToken - * @throws EncoderException - * @throws MarshallingException - * @throws SolrServerException - * @throws ParserConfigurationException - * @throws SAXException - * @throws UnsupportedType - * @throws XPathExpressionException - * @throws InterruptedException - * @throws IOException + * @param docId the docId (file name) of the object. This is only for LegacyObjManager + * @throws InvalidToken + * @throws NotAuthorized + * @throws NotImplemented + * @throws ServiceFailure + * @throws NotFound + * @throws XPathExpressionException + * @throws UnsupportedType + * @throws SAXException + * @throws ParserConfigurationException + * @throws SolrServerException + * @throws MarshallingException + * @throws EncoderException + * @throws InterruptedException + * @throws IOException * @throws InvalidRequest - * @throws IllegalAccessException - * @throws InstantiationException + * @throws InstantiationException + * @throws IllegalAccessException + * @throws ClassNotFoundException + * @throws InvocationTargetException + * @throws NoSuchMethodException */ - public void update(Identifier pid, boolean isSysmetaChangeOnly) - throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, - XPathExpressionException, UnsupportedType, SAXException, - ParserConfigurationException, SolrServerException, MarshallingException, - EncoderException, InterruptedException, IOException, InvalidRequest, - InstantiationException, IllegalAccessException { + public void update(Identifier pid, boolean isSysmetaChangeOnly, String docId) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, + XPathExpressionException, UnsupportedType, SAXException, ParserConfigurationException, + SolrServerException, MarshallingException, EncoderException, InterruptedException, + IOException, InvalidRequest, InstantiationException, IllegalAccessException, + ClassNotFoundException, InvocationTargetException, NoSuchMethodException { log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object " + pid.getValue()); try { - insert(pid, isSysmetaChangeOnly); + insert(pid, isSysmetaChangeOnly, docId); } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { log.info("SolrIndex.update - Indexer grabbed an older version (version conflict) " @@ -414,7 +441,7 @@ public void update(Identifier pid, boolean isSysmetaChangeOnly) for (int i=0; i Date: Tue, 20 May 2025 13:15:46 -0700 Subject: [PATCH 17/36] Evaluated the exception as ServiceFailure as well. --- .../cn/indexer/object/hashstore/HashStoreObjManagerTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java index 023916ea..3779f95c 100644 --- a/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/hashstore/HashStoreObjManagerTest.java @@ -4,6 +4,7 @@ import org.dataone.cn.indexer.object.ObjectManagerFactory; import org.dataone.indexer.storage.Storage; import org.dataone.service.exceptions.NotFound; +import org.dataone.service.exceptions.ServiceFailure; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -99,13 +100,13 @@ public void testFailuresInGetSystemMetadataAndGetObject() throws Exception { InputStream stream = manager.getSystemMetadataStream(id); fail("Test can't get here since the id doesn't exist"); } catch (Exception e) { - assertTrue(e instanceof NotFound); + assertTrue(e instanceof ServiceFailure || e instanceof NotFound); } try { manager.getSystemMetadata(id); fail("Test can't get here since the id doesn't exist"); } catch (Exception e) { - assertTrue(e instanceof NotFound); + assertTrue(e instanceof ServiceFailure || e instanceof NotFound); } try { manager.getObject(id); From f9a05d081a244a04f3ca9ada093b3d077948db2e Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 20 May 2025 15:34:41 -0700 Subject: [PATCH 18/36] Added a test python client file. --- src/test/python/index_task_sender.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 src/test/python/index_task_sender.py diff --git a/src/test/python/index_task_sender.py b/src/test/python/index_task_sender.py new file mode 100644 index 00000000..ba9f3135 --- /dev/null +++ b/src/test/python/index_task_sender.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +import pika +import sys + +connection = pika.BlockingConnection( + pika.ConnectionParameters(host='localhost')) +channel = connection.channel() + +channel.queue_declare(queue='index', durable=True, arguments={'x-max-priority': 10}) +channel.queue_bind(exchange='dataone-index', + queue='index', + routing_key='index') +properties = pika.BasicProperties(headers={'index_type': 'create', 'id': sys.argv[1:], 'docId': sys.argv[2:]}) +message = '' +channel.basic_publish( + exchange='dataone-index', + routing_key='index', + body=message, + properties=pika.BasicProperties( + delivery_mode=pika.DeliveryMode.Persistent + )) +print(f" [x] Sent {message}") +connection.close() \ No newline at end of file From 269045a6f72aaf0bd28384b8afeeb86006d43b18 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 20 May 2025 16:25:05 -0700 Subject: [PATCH 19/36] Fixed a bug in the log statement when the identifier is null. --- src/main/java/org/dataone/cn/indexer/IndexWorker.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 9673bb51..61057078 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -390,9 +390,11 @@ public void run() { indexObject(parser, multipleThread); } } catch (InvalidRequest e) { - logger.error( - "cannot index the task for identifier " + parser.getIdentifier().getValue() - + " since " + e.getMessage()); + String error = "Cannot index the task for the object since " + e.getMessage(); + if (parser.getIdentifier() != null) { + error = error + " with the identifier " + parser.getIdentifier().getValue(); + } + logger.error(error); boolean requeue = false; rabbitMQchannel.basicReject(envelope.getDeliveryTag(), requeue); } From 127095d2b6e30e4cfa637f6645c9327c62662187 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 20 May 2025 18:43:43 -0700 Subject: [PATCH 20/36] In the constructor, added the code to initialize the d1node. --- .../cn/indexer/object/legacystore/LegacyStoreObjManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java index dfec3704..4201348e 100644 --- a/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/legacystore/LegacyStoreObjManager.java @@ -49,6 +49,7 @@ public class LegacyStoreObjManager extends ObjectManager { * @throws ServiceFailure */ public LegacyStoreObjManager() throws ServiceFailure { + refreshD1Node(); dataRootDir = System.getenv(DATA_ROOT_DIR_ENV_NAME); logger.debug("The data root dir from env " + DATA_ROOT_DIR_ENV_NAME + " is " + dataRootDir); if (dataRootDir == null || dataRootDir.isBlank()) { From c1c95333141d8bb3cfca879beaed49afbc76f773 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 20 May 2025 18:45:40 -0700 Subject: [PATCH 21/36] Added the code to parse the command line. --- src/test/python/index_task_sender.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/python/index_task_sender.py b/src/test/python/index_task_sender.py index ba9f3135..d5423f08 100644 --- a/src/test/python/index_task_sender.py +++ b/src/test/python/index_task_sender.py @@ -1,6 +1,14 @@ #!/usr/bin/env python3 import pika import sys +arguments = sys.argv[1:] +if len(arguments) == 2: + print("Arguments:", arguments) + id = arguments[0] + doc_id = arguments[1] +else: + print("Usage: python3 index_task_sender.py pid docId") + sys.exit() connection = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) @@ -10,14 +18,14 @@ channel.queue_bind(exchange='dataone-index', queue='index', routing_key='index') -properties = pika.BasicProperties(headers={'index_type': 'create', 'id': sys.argv[1:], 'docId': sys.argv[2:]}) +headers={'index_type': 'create', 'id': id, 'doc_id': doc_id} +properties = pika.BasicProperties(headers=headers) message = '' channel.basic_publish( exchange='dataone-index', routing_key='index', body=message, - properties=pika.BasicProperties( - delivery_mode=pika.DeliveryMode.Persistent - )) + properties=properties + ) print(f" [x] Sent {message}") connection.close() \ No newline at end of file From 6505045dc4146f09001ef11ec56fb013f0fadb69 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 11 Jun 2025 14:56:10 -0700 Subject: [PATCH 22/36] Remove an unneeded log statement for error. --- src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java b/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java index 55fb59a1..4ed63cea 100644 --- a/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java +++ b/src/main/java/org/dataone/cn/indexer/solrhttp/HTTPService.java @@ -125,7 +125,6 @@ public void sendUpdate(String uri, SolrElementAdd data, String encoding, String ByteArrayOutputStream baosResponse = new ByteArrayOutputStream(); org.apache.commons.io.IOUtils.copy(inputStreamResponse, baosResponse); String error = new String(baosResponse.toByteArray()); - log.error(error); post.abort(); throw new SolrServerException("unable to update solr, non 200 response code." + error); } From 3b8d77a34da74ed25ac26aef6b198801d12dce14 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 11 Jun 2025 16:19:42 -0700 Subject: [PATCH 23/36] shorten the conflict version waiting time to 10 mini-seconds. Increase the conflict version max attempt to try to 25000 --- helm/config/dataone-indexer.properties | 2 +- src/main/java/org/dataone/cn/indexer/SolrIndex.java | 11 +++++++---- .../dataone/configuration/index-processor.properties | 2 +- .../configuration/index-processor-2.properties | 4 ++-- .../dataone/configuration/index-processor.properties | 4 ++-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/helm/config/dataone-indexer.properties b/helm/config/dataone-indexer.properties index c4514edf..a572435e 100644 --- a/helm/config/dataone-indexer.properties +++ b/helm/config/dataone-indexer.properties @@ -38,7 +38,7 @@ dataone.mn.registration.serviceType.url={{ .Values.idxworker.d1_serviceType_url index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourcemapWaitMs }} index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }} -index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }} +index.solr.versionConflict.waiting.time={{ default 10 .Values.idxworker.solrVerConflictWaitMs }} index.solr.versionConflict.max.attempts={{ default 25000 .Values.idxworker.solrVerConflictMaxTries }} # Storage properties diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 2848d237..f574cf14 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -54,9 +54,9 @@ public class SolrIndex { public static final String ID = "id"; private static final String VERSION_CONFLICT = "version conflict"; private static final int VERSION_CONFLICT_MAX_ATTEMPTS = Settings.getConfiguration().getInt( - "index.solr.versionConflict.max.attempts", 25); + "index.solr.versionConflict.max.attempts", 25000); private static final int VERSION_CONFLICT_WAITING = Settings.getConfiguration().getInt( - "index.solr.versionConflict.waiting.time", 500); //milliseconds + "index.solr.versionConflict.waiting.time", 10); //milliseconds private static final List resourceMapFormatIdList = Settings.getConfiguration().getList( "index.resourcemap.namespace"); private static List subprocessors = null; @@ -418,8 +418,11 @@ public void update(Identifier pid, boolean isSysmetaChangeOnly) if (ee.getMessage().contains(VERSION_CONFLICT)) { log.info("SolrIndex.update - Indexer grabbed an older version " + "(version conflict) of a solr doc when it processed object " - + pid.getValue() + ". It will process it again in oder to get " - + "the new solr doc copy. This is attempt number: " + (i+1)); + + pid.getValue() + ". It will wait " + VERSION_CONFLICT_WAITING + + " mini-seconds and process it again in oder to get " + + "the new solr doc copy. This is attempt number: " + (i+1) + + " and the max attempt number is " + + VERSION_CONFLICT_MAX_ATTEMPTS); if (i >= (VERSION_CONFLICT_MAX_ATTEMPTS - 1)) { log.error("SolrIndex.update - Indexer grabbed an older version of " + "a solr doc when it processed object " + pid.getValue() diff --git a/src/main/resources/org/dataone/configuration/index-processor.properties b/src/main/resources/org/dataone/configuration/index-processor.properties index 88aec825..de37d2be 100644 --- a/src/main/resources/org/dataone/configuration/index-processor.properties +++ b/src/main/resources/org/dataone/configuration/index-processor.properties @@ -28,7 +28,7 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=1000 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened index.solr.versionConflict.max.attempts=25000 diff --git a/src/test/resources/org/dataone/configuration/index-processor-2.properties b/src/test/resources/org/dataone/configuration/index-processor-2.properties index 320eaa54..913432fc 100644 --- a/src/test/resources/org/dataone/configuration/index-processor-2.properties +++ b/src/test/resources/org/dataone/configuration/index-processor-2.properties @@ -19,9 +19,9 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=500 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.max.attempts=25 +index.solr.versionConflict.max.attempts=25000 #You may specify the exact number of threads the indexer will use. #If you keep it blank, Metacat will use the default one - the system processors number minus one. If calculation result is 0, 1 will be used as the default value. #If the one you specify exceeds the default number or is less than 1, the default one will be used as well. diff --git a/src/test/resources/org/dataone/configuration/index-processor.properties b/src/test/resources/org/dataone/configuration/index-processor.properties index 256dd8d8..734a045a 100644 --- a/src/test/resources/org/dataone/configuration/index-processor.properties +++ b/src/test/resources/org/dataone/configuration/index-processor.properties @@ -19,9 +19,9 @@ index.resourcemap.waitingComponent.time=600 #The number of the attempts that the resource map processor tries to wait for the solr doc readiness of its components index.resourcemap.waitingComponent.max.attempts=15 #The time (millisecond) that indexer will wait to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.waiting.time=500 +index.solr.versionConflict.waiting.time=10 #The number of the attempts that indexer tries to grab a newer version of solr doc when a version conflict happened -index.solr.versionConflict.max.attempts=25 +index.solr.versionConflict.max.attempts=25000 #You may specify the exact number of threads the indexer will use. #If you keep it blank, Metacat will use the default one - the system processors number minus one. If calculation result is 0, 1 will be used as the default value. #If the one you specify exceeds the default number or is less than 1, the default one will be used as well. From 3984d2d7b70d757ef55b68a2490670fe55034ddd Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 12 Jun 2025 08:55:06 -0700 Subject: [PATCH 24/36] Fixed a typo. --- src/main/java/org/dataone/cn/indexer/SolrIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index f574cf14..4974e544 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -419,7 +419,7 @@ public void update(Identifier pid, boolean isSysmetaChangeOnly) log.info("SolrIndex.update - Indexer grabbed an older version " + "(version conflict) of a solr doc when it processed object " + pid.getValue() + ". It will wait " + VERSION_CONFLICT_WAITING - + " mini-seconds and process it again in oder to get " + + " milliseconds and process it again in oder to get " + "the new solr doc copy. This is attempt number: " + (i+1) + " and the max attempt number is " + VERSION_CONFLICT_MAX_ATTEMPTS); From 3f6b77b6d310fe7406193add5ad6978ab670e468 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Thu, 12 Jun 2025 08:57:04 -0700 Subject: [PATCH 25/36] Change the default version conflict waiting time to 10 milliseconds. --- helm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/values.yaml b/helm/values.yaml index e31cdaf3..dfd1c698 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -167,7 +167,7 @@ idxworker: ## @param idxworker.solrVerConflictWaitMs wait time (mS) before indexer grabs a newer version ## of solr doc after a version conflict ## - solrVerConflictWaitMs: 1000 + solrVerConflictWaitMs: 10 ## @param idxworker.solrVerConflictMaxTries Number of tries to get a newer version of solr doc ## after a version conflict From 9b7b06cdd4af378c52ece95be6b3772ec782b388 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 17 Jun 2025 11:15:41 -0700 Subject: [PATCH 26/36] Excluded the postgresql jar. --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index d982ced3..cff1f176 100644 --- a/pom.xml +++ b/pom.xml @@ -306,6 +306,12 @@ org.dataone hashstore 1.1.0 + + + org.postgresql + postgresql + + uk.org.webcompere From db6ff56019f38645ccd139abd076b46c59c45f92 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 18 Jun 2025 13:13:19 -0700 Subject: [PATCH 27/36] Added the documentation about switching the storage system. --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index e6777b45..54aa59ce 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,14 @@ vhost = / # Used as default for declare / delete / list for n in $(seq 1 30); do echo $n; rabbitmqadmin -c rmq.conf -N default -U rmq -p $RMQPW publish exchange=testexchange routing_key=testqueue payload="Message: ${n}" --vhost=/; done ``` +## Switching the Storage System +The Dataone Indexer can be configured to use different storage systems by setting the environmental +variable `DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME`. +By default, this variable is not set, and the indexer uses +`org.dataone.cn.indexer.object.hashstore.HashStoreObjManager`, which enables support for Hashstore. +To use the legacy storage system instead, set the variable to +`org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager`. + ## History This is a refactored version of the original DataONE [d1_cn_index_processor](https://github.com/DataONEorg/d1_cn_index_processor) that runs From 0d5e2d0ac800be1179aade8ddb831dcba16f00f9 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 10:21:33 -0700 Subject: [PATCH 28/36] Fixed a typo. --- src/main/java/org/dataone/cn/indexer/IndexWorker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 61057078..1e412813 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -59,7 +59,7 @@ public class IndexWorker { //The create is the index task type for the action when a new object was created. So the solr index will be generated. //delete is the index task type for the action when an object was deleted. So the solr index will be deleted //sysmeta is the index task type for the action when the system metadata of an existing object was updated. - public final static String CREATE_INDEXT_TYPE = "create"; + public final static String CREATE_INDEX_TYPE = "create"; public final static String DELETE_INDEX_TYPE = "delete"; public final static String SYSMETA_CHANGE_TYPE = "sysmeta"; //this handle for resource map only @@ -465,7 +465,7 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread) + pid.getValue() + " , the index type: " + indexType + ", the priority: " + priority + ", the docId(can be null): " + docId); switch (indexType) { - case CREATE_INDEXT_TYPE -> { + case CREATE_INDEX_TYPE -> { boolean sysmetaOnly = false; solrIndex.update(pid, sysmetaOnly, docId); } From f773a2792f8acc77cc5a47e61853f86bc033ee94 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 10:23:21 -0700 Subject: [PATCH 29/36] Fixed a typo. --- .../org/dataone/cn/indexer/object/ObjectManagerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index f0c22a72..b7e600a0 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -45,7 +45,7 @@ public static ObjectManager getObjectManager() if (manager == null) { synchronized (ObjectManagerFactory.class) { if (manager == null) { - logger.info("The final ObjectManager class name form env variable is " + logger.info("The final ObjectManager class name from env variable is " + classNameFromEnv); Class managerClass = Class.forName(className); manager = (ObjectManager) managerClass.getDeclaredConstructor().newInstance(); From 3641c9592bb9daf2f8afb0c855e7dbe978a8c8e3 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 10:30:44 -0700 Subject: [PATCH 30/36] Changed the name of the env variable to DATAONE_INDEXER_AUTH_TOKEN --- helm/templates/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 9303fc66..b212fd30 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -117,7 +117,7 @@ spec: - name: IDX_JAVA_MEM value: {{ .Values.idxworker.javaMem | quote }} {{- end }} - - name: DATAONE_AUTH_TOKEN + - name: DATAONE_INDEXER_AUTH_TOKEN valueFrom: secretKeyRef: name: {{ .Release.Name }}-indexer-token From 4d6e1b4deebfe88d843ab5a9e9a5dfd085d03795 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 10:39:55 -0700 Subject: [PATCH 31/36] Fixed a typo. --- src/main/java/org/dataone/cn/indexer/object/ObjectManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 6096889a..5b84c092 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -133,7 +133,7 @@ public static void refreshD1Node() throws ServiceFailure { dataONEauthToken = FileUtils.readFileToString(new File(tokenFilePath), "UTF-8"); } catch (IOException e) { dataONEauthToken = null; - logger.warn("Can NOT get the authen token from the file " + tokenFilePath + + logger.warn("Can NOT get the auth token from the file " + tokenFilePath + " since " + e.getMessage()); } if (dataONEauthToken != null && !dataONEauthToken.isBlank()) { From 07f8343eccfa3d40048957ce2365bd16e2e72e42 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 11:00:56 -0700 Subject: [PATCH 32/36] Combined the catch clauses into one. --- .../IndexVisibilityDelegateImpl.java | 66 +++---------------- 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java index 97ea7e75..c5e60b30 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java @@ -37,35 +37,10 @@ public boolean isDocumentVisible(Identifier pid) { if (SolrDoc.visibleInIndex(systemMetadata)) { visible = true; } - } catch (NullPointerException npe) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + npe.getMessage()); - } catch (InvalidToken e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotAuthorized e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + e.getMessage()); - } catch (NotImplemented e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ServiceFailure e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotFound e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (InstantiationException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IllegalAccessException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IOException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (MarshallingException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NoSuchAlgorithmException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ClassNotFoundException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " - + e.getMessage()); - } catch (InvocationTargetException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " - + e.getMessage()); - } catch (NoSuchMethodException e) { + } catch (NullPointerException | InvalidToken | NotAuthorized | NotImplemented | + ServiceFailure | NotFound | InstantiationException | IllegalAccessException | + IOException | MarshallingException | NoSuchAlgorithmException | + ClassNotFoundException | InvocationTargetException | NoSuchMethodException e) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + e.getMessage()); } @@ -83,35 +58,10 @@ public boolean documentExists(Identifier pid) { // TODO: Is pid Identifier a SID? return true; } - } catch (NullPointerException npe) { - logger.warn("Could not get visible value for pid: " + pid.getValue()); - } catch (InvalidToken e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotAuthorized e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotImplemented e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ServiceFailure e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NotFound e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (InstantiationException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IllegalAccessException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (IOException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (MarshallingException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (NoSuchAlgorithmException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage()); - } catch (ClassNotFoundException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " - + e.getMessage()); - } catch (InvocationTargetException e) { - logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " - + e.getMessage()); - } catch (NoSuchMethodException e) { + } catch (NullPointerException | InvalidToken | NotAuthorized | NotImplemented | + ServiceFailure | NotFound | InstantiationException | IllegalAccessException | + IOException | MarshallingException | NoSuchAlgorithmException | + ClassNotFoundException | InvocationTargetException | NoSuchMethodException e) { logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " + e.getMessage()); } From a8813e9a9d8f0fa096b31a8e63fdded2b6e13111 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 20 Jun 2025 11:04:56 -0700 Subject: [PATCH 33/36] Rewored a sentence and changed a format of an assignment. --- .../org/dataone/indexer/queue/IndexQueueMessageParser.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java index 7ef6c171..0aaf2f99 100644 --- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java +++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java @@ -85,8 +85,8 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe } catch (NullPointerException e) { logger.info( "IndexQueueMessageParser.parse - the priority is not set in the message and we " - + "will set it one."); - priority =1; + + "will set it to 1."); + priority = 1; } logger.debug( "IndexQueueMessageParser.parse - the priority in the message is " + priority + " for " From 02cdf8d2f281e518a9cbf91dc958f933e272448d Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 23 Jun 2025 14:19:45 -0700 Subject: [PATCH 34/36] If the ObjectManager is for the legacy store, it will use the error level to log the message if the token is null; Otherwise, it will use the warn level. --- .../dataone/cn/indexer/object/ObjectManager.java | 13 ++++++++++--- .../cn/indexer/object/ObjectManagerFactory.java | 10 +++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 5b84c092..7f133fd9 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -144,9 +144,16 @@ public static void refreshD1Node() throws ServiceFailure { logger.info("Got the auth token from an env. variable"); } if (dataONEauthToken == null || dataONEauthToken.isBlank()) { - logger.warn( - "Could NOT get an auth token from either an env. variable or the properties file." - + " So it will act as the public user."); + String message = + "Could NOT get an auth token from either an env. variable or the properties file" + + ".So it will act as the public user."; + String className = ObjectManagerFactory.getObjManagerClassNameFromEnv(); + if (className != null && className.equals("org.dataone.cn.indexer.object.legacystore" + + ".LegacyStoreObjManager")) { + logger.error(message); + } else { + logger.warn(message); + } } session = createSession(dataONEauthToken); logger.info("Going to create the d1node with url " + nodeBaseURL); diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java index b7e600a0..85a3af35 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManagerFactory.java @@ -35,7 +35,7 @@ public class ObjectManagerFactory { public static ObjectManager getObjectManager() throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { - String classNameFromEnv = System.getenv(OBJECT_MANAGER_CLASS_NAME_ENV_NAME); + String classNameFromEnv = getObjManagerClassNameFromEnv(); String className = DEFAULT_ClASS_NAME; if (classNameFromEnv != null && !classNameFromEnv.isBlank()) { logger.debug("The ObjectManager class name form env variable " @@ -55,6 +55,14 @@ public static ObjectManager getObjectManager() return manager; } + /** + * Get the object manager class name from the env variable. + * @return the class name. It can be null if the env variable isn't set. + */ + protected static String getObjManagerClassNameFromEnv() { + return System.getenv(OBJECT_MANAGER_CLASS_NAME_ENV_NAME); + } + /** * This method is for testing only */ From fdde3286b3b0056b2ac00014aa7c0f9d1e932189 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:30:18 -0700 Subject: [PATCH 35/36] version -> 3.1.5; chart 1.3.2 --- helm/Chart.yaml | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 62aa8ec1..d24abe8b 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -21,13 +21,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: "1.3.1" +version: "1.3.2" # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "3.1.4" +appVersion: "3.1.5" # Chart dependencies dependencies: diff --git a/pom.xml b/pom.xml index cff1f176..56c8dbc5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.dataone dataone-index-worker - 3.1.4 + 3.1.5 jar dataone-index-worker http://maven.apache.org From 7e0bec035cd8ce644892ced854192e3dbdf5b874 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:05:51 -0700 Subject: [PATCH 36/36] release notes --- RELEASE-NOTES.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 195bf48f..e7521711 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,5 +1,27 @@ # dataone-indexer Release Notes +> [!CAUTION] +> **If upgrading from Helm chart v1.2.0 or earlier, note the section entitled: +> `Caution - ENSURE THAT THE RABBITMQ QUEUE IS EMPTY,` [in the release notes for helm chart v1.3.0 +> below!](#helm-chart-version-130)**, + +## dataone-indexer version 3.1.5 & helm chart version 1.3.2 + +### Release date: 2025-06-26 + +### dataone-indexer version 3.1.5 + +This is a patch release with the following minor fixes and upgrades + +- Dataone-indexer can handle legacy Metacat object repository ([Issue #222](https://github.com/DataONEorg/dataone-indexer/issues/222)) +- Remove some extra log statements (for version conflict retries) that are confusing to users ([Issue #243](https://github.com/DataONEorg/dataone-indexer/issues/243)) +- Indexer performance improvement: Decrease the re-try waiting time for a version conflict error ([Issue #245](https://github.com/DataONEorg/dataone-indexer/issues/245)) +- Remove unnecessary dependency on PostrgeSQL jar ([Issue #247](https://github.com/DataONEorg/dataone-indexer/issues/247)) + +### helm chart version 1.3.2 +- Bump indexer App version to 3.1.5 + + ## dataone-indexer version 3.1.4 & helm chart version 1.3.1 ### Release date: 2025-05-20