Skip to content

Commit 08d453c

Browse files
authored
Merge pull request #254 from DataONEorg/develop
Release 3.1.5 develop->main
2 parents 362f847 + 439e616 commit 08d453c

28 files changed

+1161
-330
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,14 @@ vhost = / # Used as default for declare / delete / list
284284
for n in $(seq 1 30); do echo $n; rabbitmqadmin -c rmq.conf -N default -U rmq -p $RMQPW publish exchange=testexchange routing_key=testqueue payload="Message: ${n}" --vhost=/; done
285285
```
286286
287+
## Switching the Storage System
288+
The Dataone Indexer can be configured to use different storage systems by setting the environmental
289+
variable `DATAONE_INDEXER_OBJECT_MANAGER_CLASS_NAME`.
290+
By default, this variable is not set, and the indexer uses
291+
`org.dataone.cn.indexer.object.hashstore.HashStoreObjManager`, which enables support for Hashstore.
292+
To use the legacy storage system instead, set the variable to
293+
`org.dataone.cn.indexer.object.legacystore.LegacyStoreObjManager`.
294+
287295
## History
288296
289297
This is a refactored version of the original DataONE [d1_cn_index_processor](https://github.com/DataONEorg/d1_cn_index_processor) that runs

RELEASE-NOTES.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,27 @@
11
# dataone-indexer Release Notes
22

3+
> [!CAUTION]
4+
> **If upgrading from Helm chart v1.2.0 or earlier, note the section entitled:
5+
> `Caution - ENSURE THAT THE RABBITMQ QUEUE IS EMPTY,` [in the release notes for helm chart v1.3.0
6+
> below!](#helm-chart-version-130)**,
7+
8+
## dataone-indexer version 3.1.5 & helm chart version 1.3.2
9+
10+
### Release date: 2025-06-26
11+
12+
### dataone-indexer version 3.1.5
13+
14+
This is a patch release with the following minor fixes and upgrades
15+
16+
- Dataone-indexer can handle legacy Metacat object repository ([Issue #222](https://github.com/DataONEorg/dataone-indexer/issues/222))
17+
- Remove some extra log statements (for version conflict retries) that are confusing to users ([Issue #243](https://github.com/DataONEorg/dataone-indexer/issues/243))
18+
- Indexer performance improvement: Decrease the re-try waiting time for a version conflict error ([Issue #245](https://github.com/DataONEorg/dataone-indexer/issues/245))
19+
- Remove unnecessary dependency on PostrgeSQL jar ([Issue #247](https://github.com/DataONEorg/dataone-indexer/issues/247))
20+
21+
### helm chart version 1.3.2
22+
- Bump indexer App version to 3.1.5
23+
24+
325
## dataone-indexer version 3.1.4 & helm chart version 1.3.1
426

527
### Release date: 2025-05-20

helm/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ type: application
2121
# This is the chart version. This version number should be incremented each time you make changes
2222
# to the chart and its templates, including the app version.
2323
# Versions are expected to follow Semantic Versioning (https://semver.org/)
24-
version: "1.3.1"
24+
version: "1.3.2"
2525

2626
# This is the version number of the application being deployed. This version number should be
2727
# incremented each time you make changes to the application. Versions are not expected to
2828
# follow Semantic Versioning. They should reflect the version the application is using.
2929
# It is recommended to use it with quotes.
30-
appVersion: "3.1.4"
30+
appVersion: "3.1.5"
3131

3232
# Chart dependencies
3333
dependencies:

helm/config/dataone-indexer.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dataone.mn.registration.serviceType.url={{ .Values.idxworker.d1_serviceType_url
3838

3939
index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourcemapWaitMs }}
4040
index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }}
41-
index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }}
41+
index.solr.versionConflict.waiting.time={{ default 10 .Values.idxworker.solrVerConflictWaitMs }}
4242
index.solr.versionConflict.max.attempts={{ default 25000 .Values.idxworker.solrVerConflictMaxTries }}
4343

4444
# Storage properties

helm/templates/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ spec:
117117
- name: IDX_JAVA_MEM
118118
value: {{ .Values.idxworker.javaMem | quote }}
119119
{{- end }}
120-
- name: DATAONE_AUTH_TOKEN
120+
- name: DATAONE_INDEXER_AUTH_TOKEN
121121
valueFrom:
122122
secretKeyRef:
123123
name: {{ .Release.Name }}-indexer-token

helm/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ idxworker:
167167
## @param idxworker.solrVerConflictWaitMs wait time (mS) before indexer grabs a newer version
168168
## of solr doc after a version conflict
169169
##
170-
solrVerConflictWaitMs: 1000
170+
solrVerConflictWaitMs: 10
171171

172172
## @param idxworker.solrVerConflictMaxTries Number of tries to get a newer version of solr doc
173173
## after a version conflict

pom.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>org.dataone</groupId>
55
<artifactId>dataone-index-worker</artifactId>
6-
<version>3.1.4</version>
6+
<version>3.1.5</version>
77
<packaging>jar</packaging>
88
<name>dataone-index-worker</name>
99
<url>http://maven.apache.org</url>
@@ -306,6 +306,12 @@
306306
<groupId>org.dataone</groupId>
307307
<artifactId>hashstore</artifactId>
308308
<version>1.1.0</version>
309+
<exclusions>
310+
<exclusion>
311+
<groupId>org.postgresql</groupId>
312+
<artifactId>postgresql</artifactId>
313+
</exclusion>
314+
</exclusions>
309315
</dependency>
310316
<dependency>
311317
<groupId>uk.org.webcompere</groupId>

src/main/java/org/dataone/cn/indexer/IndexWorker.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.io.File;
44
import java.io.IOException;
5+
import java.lang.reflect.InvocationTargetException;
56
import java.nio.file.Files;
67
import java.nio.file.Path;
78
import java.nio.file.Paths;
@@ -18,14 +19,13 @@
1819
import javax.xml.parsers.ParserConfigurationException;
1920
import javax.xml.xpath.XPathExpressionException;
2021

21-
import com.rabbitmq.client.ShutdownSignalException;
2222
import org.apache.commons.codec.EncoderException;
2323
import org.apache.commons.configuration.ConfigurationException;
2424
import org.apache.commons.logging.Log;
2525
import org.apache.commons.logging.LogFactory;
2626
import org.apache.solr.client.solrj.SolrServerException;
2727
import org.dataone.cn.indexer.annotation.OntologyModelService;
28-
import org.dataone.cn.indexer.object.ObjectManager;
28+
import org.dataone.cn.indexer.object.ObjectManagerFactory;
2929
import org.dataone.configuration.Settings;
3030
import org.dataone.exceptions.MarshallingException;
3131
import org.dataone.indexer.queue.IndexQueueMessageParser;
@@ -59,7 +59,7 @@ public class IndexWorker {
5959
//The create is the index task type for the action when a new object was created. So the solr index will be generated.
6060
//delete is the index task type for the action when an object was deleted. So the solr index will be deleted
6161
//sysmeta is the index task type for the action when the system metadata of an existing object was updated.
62-
public final static String CREATE_INDEXT_TYPE = "create";
62+
public final static String CREATE_INDEX_TYPE = "create";
6363
public final static String DELETE_INDEX_TYPE = "delete";
6464
public final static String SYSMETA_CHANGE_TYPE = "sysmeta"; //this handle for resource map only
6565

@@ -219,7 +219,10 @@ public static void loadAdditionalPropertyFile(String propertyFile) {
219219
* @throws TimeoutException
220220
* @throws ServiceFailure
221221
*/
222-
public IndexWorker() throws IOException, TimeoutException, ServiceFailure {
222+
public IndexWorker()
223+
throws IOException, TimeoutException, ServiceFailure, ClassNotFoundException,
224+
InvocationTargetException, NoSuchMethodException, InstantiationException,
225+
IllegalAccessException {
223226
this(true);
224227
}
225228

@@ -231,7 +234,9 @@ public IndexWorker() throws IOException, TimeoutException, ServiceFailure {
231234
* @throws TimeoutException
232235
* @throws ServiceFailure
233236
*/
234-
public IndexWorker(Boolean initialize) throws IOException, TimeoutException {
237+
public IndexWorker(Boolean initialize)
238+
throws IOException, TimeoutException, ClassNotFoundException, InvocationTargetException,
239+
NoSuchMethodException, InstantiationException, IllegalAccessException {
235240
String value = System.getenv("KUBERNETES_SERVICE_HOST");
236241
// Java doc says: the string value of the variable, or null if the variable is not defined
237242
// in the system environment
@@ -243,7 +248,7 @@ public IndexWorker(Boolean initialize) throws IOException, TimeoutException {
243248
initExecutorService();//initialize the executor first
244249
initIndexQueue();
245250
initIndexParsers();
246-
ObjectManager.getInstance();
251+
ObjectManagerFactory.getObjectManager();
247252
OntologyModelService.getInstance();
248253
}
249254
}
@@ -385,9 +390,11 @@ public void run() {
385390
indexObject(parser, multipleThread);
386391
}
387392
} catch (InvalidRequest e) {
388-
logger.error(
389-
"cannot index the task for identifier " + parser.getIdentifier().getValue()
390-
+ " since " + e.getMessage());
393+
String error = "Cannot index the task for the object since " + e.getMessage();
394+
if (parser.getIdentifier() != null) {
395+
error = error + " with the identifier " + parser.getIdentifier().getValue();
396+
}
397+
logger.error(error);
391398
boolean requeue = false;
392399
rabbitMQchannel.basicReject(envelope.getDeliveryTag(), requeue);
393400
}
@@ -449,21 +456,22 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
449456
Identifier pid = parser.getIdentifier();
450457
String indexType = parser.getIndexType();
451458
int priority = parser.getPriority();
459+
String docId = parser.getDocId();// It can be null.
452460
try {
453461
long threadId = Thread.currentThread().getId();
454462
logger.info("IndexWorker.consumer.indexObject by multiple thread? " + multipleThread
455463
+ ", with the thread id " + threadId
456464
+ " - Received the index task from the index queue with the identifier: "
457465
+ pid.getValue() + " , the index type: " + indexType
458-
+ ", the priority: " + priority);
466+
+ ", the priority: " + priority + ", the docId(can be null): " + docId);
459467
switch (indexType) {
460-
case CREATE_INDEXT_TYPE -> {
468+
case CREATE_INDEX_TYPE -> {
461469
boolean sysmetaOnly = false;
462-
solrIndex.update(pid, sysmetaOnly);
470+
solrIndex.update(pid, sysmetaOnly, docId);
463471
}
464472
case SYSMETA_CHANGE_TYPE -> {
465473
boolean sysmetaOnly = true;
466-
solrIndex.update(pid, sysmetaOnly);
474+
solrIndex.update(pid, sysmetaOnly, docId);
467475
}
468476
case DELETE_INDEX_TYPE -> solrIndex.remove(pid);
469477
default -> throw new InvalidRequest(
@@ -482,7 +490,8 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
482490
ServiceFailure | XPathExpressionException | UnsupportedType | SAXException |
483491
ParserConfigurationException | SolrServerException | MarshallingException |
484492
EncoderException | InterruptedException | IOException | InstantiationException |
485-
IllegalAccessException e) {
493+
IllegalAccessException | ClassNotFoundException | InvocationTargetException |
494+
NoSuchMethodException e) {
486495
logger.error("Cannot index the task for identifier " + pid.getValue()
487496
+ " since " + e.getMessage(), e);
488497
}

0 commit comments

Comments
 (0)