From 2ae700ddb8a7e208cc8fb2fb6bad62a8fe290aec Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Thu, 1 May 2025 00:59:10 -0400 Subject: [PATCH 01/10] Changes for API to allow updating indexCreatedVersionMajor for SegmentsInfos under the right conditions --- .../org/apache/lucene/index/IndexWriter.java | 40 +++++++++++++++++++ .../org/apache/lucene/index/SegmentInfos.java | 36 ++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 65f79e51b0ef..b39dcf6c9a86 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -212,6 +212,13 @@ public class IndexWriter @SuppressWarnings("NonFinalStaticField") private static int actualMaxDocs = MAX_DOCS; + /** + * Sets signal to force set indexCreatedVersionMajor in SegmentInfos at commit. This also helps + * avoid slowdown in other commit threads since prepareCommitInternal doesn't need to iterate over + * the segments if this flag has not been set + */ + private volatile boolean updateIndexCreationVersion = false; + /** Used only for testing. */ static void setMaxDocs(int maxDocs) { if (maxDocs > MAX_DOCS) { @@ -3737,6 +3744,16 @@ private long prepareCommitInternal() throws IOException { // corresponding add from an updateDocument) can // sneak into the commit point: toCommit = segmentInfos.clone(); + + if (this.updateIndexCreationVersion + && toCommit.getIndexCreatedVersionMajor() < Version.LATEST.major) { + try { + toCommit.setIndexCreatedVersionMajorToLatest(); + } finally { + this.updateIndexCreationVersion = false; + } + } + pendingCommitChangeCount = changeCount.get(); // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while @@ -4108,6 +4125,29 @@ public final long commit() throws IOException { return commitInternal(config.getMergePolicy()); } + /** + * Essentially does what {@link #commit} does. Additionally, if all segments belong to the latest + * version and if the index created major version in the existing {@link SegmentInfos} is older + * than the latest major version, resets the index created version to the latest major version in + * the newly synced {@link SegmentInfos} + * + *

If this method is called on an index where some of the segments belong to an older major + * version, it will throw an exception, and any partial changes done as part of the commit will be + * rolled back. + * + * @return The sequence number of the last operation in the commit. + * All sequence numbers <= this value will be reflected in the commit, and all others will + * not. + * @throws IOException + */ + public final long commitAndResetVersionCreatedMajor() throws IOException { + ensureOpen(); + synchronized (commitLock) { + updateIndexCreationVersion = true; + return commitInternal(config.getMergePolicy()); + } + } + /** * Returns true if there may be changes that have not been committed. There are cases where this * may return true when there are no actual "real" changes to the index, for example if you've diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 0418ef746f53..6bf7579802af 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -159,7 +159,7 @@ public final class SegmentInfos implements Cloneable, Iterable Date: Sun, 4 May 2025 00:49:40 -0400 Subject: [PATCH 02/10] better naming and comments --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java | 2 +- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index b39dcf6c9a86..010ac2df4d0a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -4140,7 +4140,7 @@ public final long commit() throws IOException { * not. * @throws IOException */ - public final long commitAndResetVersionCreatedMajor() throws IOException { + public final long commitAndUpdateVersionCreatedMajor() throws IOException { ensureOpen(); synchronized (commitLock) { updateIndexCreationVersion = true; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 6bf7579802af..242161c49b3a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -1202,7 +1202,7 @@ public int getIndexCreatedVersionMajor() { return indexCreatedVersionMajor; } - // Intended to be only called by IndexWriter to enable strict checks + // Package private access to allow being called only by IndexWriter for greater control void setIndexCreatedVersionMajorToLatest() { if (!allSegmentsAreLatestVersion()) { throw new IllegalStateException( From 89758983be2cdf48443f6b5f52e688b535fe0631 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Mon, 14 Jul 2025 23:27:16 -0400 Subject: [PATCH 03/10] made changes more generic --- .../src/java/org/apache/lucene/index/IndexWriter.java | 8 ++++---- .../src/java/org/apache/lucene/index/SegmentInfos.java | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 010ac2df4d0a..126529e8df37 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -217,7 +217,7 @@ public class IndexWriter * avoid slowdown in other commit threads since prepareCommitInternal doesn't need to iterate over * the segments if this flag has not been set */ - private volatile boolean updateIndexCreationVersion = false; + private volatile boolean updateIndexSupportedVersion = false; /** Used only for testing. */ static void setMaxDocs(int maxDocs) { @@ -3745,12 +3745,12 @@ private long prepareCommitInternal() throws IOException { // sneak into the commit point: toCommit = segmentInfos.clone(); - if (this.updateIndexCreationVersion + if (this.updateIndexSupportedVersion && toCommit.getIndexCreatedVersionMajor() < Version.LATEST.major) { try { toCommit.setIndexCreatedVersionMajorToLatest(); } finally { - this.updateIndexCreationVersion = false; + this.updateIndexSupportedVersion = false; } } @@ -4143,7 +4143,7 @@ public final long commit() throws IOException { public final long commitAndUpdateVersionCreatedMajor() throws IOException { ensureOpen(); synchronized (commitLock) { - updateIndexCreationVersion = true; + updateIndexSupportedVersion = true; return commitInternal(config.getMergePolicy()); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 242161c49b3a..d9d725aee789 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -159,8 +159,9 @@ public final class SegmentInfos implements Cloneable, Iterable= 6, got: " + indexCreatedVersionMajor); } this.indexCreatedVersionMajor = indexCreatedVersionMajor; + this.maxIndexSupportedVersionMajor = this.indexCreatedVersionMajor+1; } /** Returns {@link SegmentCommitInfo} at the provided index. */ From ff5bde921f85981bb6b41b7af16677953b2e76cc Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Mon, 13 Oct 2025 12:19:49 -0400 Subject: [PATCH 04/10] look at version of individual segments to make decision to open index instead of indexCreatedVersionMajor --- .../org/apache/lucene/index/SegmentInfos.java | 138 +++++++++++------- 1 file changed, 86 insertions(+), 52 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 204bd89b3500..66bb27e4968c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -346,30 +346,12 @@ public static final SegmentInfos readCommit( input); } - if (indexCreatedVersion < minSupportedMajorVersion) { - throw new IndexFormatTooOldException( - input, - "Index created with Lucene " - + indexCreatedVersion - + ".x is not supported by Lucene " - + Version.LATEST - + ". This Lucene version only supports indexes created with major version " - + minSupportedMajorVersion - + " or later (found: " - + indexCreatedVersion - + ", minimum: " - + minSupportedMajorVersion - + "). To resolve this issue: (1) Re-index your data using Lucene " - + Version.LATEST.major - + ".x, or (2) Use an older Lucene version that supports your index format."); - } - SegmentInfos infos = new SegmentInfos(indexCreatedVersion); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; infos.luceneVersion = luceneVersion; - parseSegmentInfos(directory, input, infos, format); + parseSegmentInfos(directory, input, infos, format, minSupportedMajorVersion); return infos; } catch (Throwable t) { @@ -385,7 +367,12 @@ public static final SegmentInfos readCommit( } private static void parseSegmentInfos( - Directory directory, DataInput input, SegmentInfos infos, int format) throws IOException { + Directory directory, + DataInput input, + SegmentInfos infos, + int format, + int minSupportedMajorVersion) + throws IOException { infos.version = CodecUtil.readBELong(input); // System.out.println("READ sis version=" + infos.version); infos.counter = input.readVLong(); @@ -402,6 +389,7 @@ private static void parseSegmentInfos( } long totalDocs = 0; + for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); byte[] segmentID = new byte[StringHelper.ID_LENGTH]; @@ -410,6 +398,84 @@ private static void parseSegmentInfos( SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READONCE); info.setCodec(codec); + Version segMinVersion = info.getMinVersion(); + Version segmentVersion = info.getVersion(); + + if (!segmentVersion.onOrAfter(infos.minSegmentLuceneVersion)) { + throw new CorruptIndexException( + "segments file recorded minSegmentLuceneVersion=" + + infos.minSegmentLuceneVersion + + " but segment=" + + info + + " has older version=" + + segmentVersion, + input); + } + + if (infos.indexCreatedVersionMajor >= 7 + && segmentVersion.major < infos.indexCreatedVersionMajor) { + throw new CorruptIndexException( + "segments file recorded indexCreatedVersionMajor=" + + infos.indexCreatedVersionMajor + + " but segment=" + + info + + " has older version=" + + segmentVersion, + input); + } + + if (infos.indexCreatedVersionMajor >= 7 && segMinVersion == null) { + throw new CorruptIndexException( + "segments infos must record minVersion with indexCreatedVersionMajor=" + + infos.indexCreatedVersionMajor, + input); + } + + if (segMinVersion == null) { + if (infos.indexCreatedVersionMajor < minSupportedMajorVersion) { + throw new IndexFormatTooOldException( + input, + "Index created with Lucene " + + infos.indexCreatedVersionMajor + + ".x is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes created with major version " + + minSupportedMajorVersion + + " or later (found: " + + infos.indexCreatedVersionMajor + + ", minimum: " + + minSupportedMajorVersion + + "). To resolve this issue: (1) Re-index your data using Lucene " + + Version.LATEST.major + + ".x, or (2) Use an older Lucene version that supports your index format."); + } else { + throw new CorruptIndexException( + "segments infos must record minVersion with indexCreatedVersionMajor=" + + infos.indexCreatedVersionMajor, + input); + } + } + + if (segMinVersion.major < minSupportedMajorVersion) { + throw new IndexFormatTooOldException( + input, + "Index has segment traces from Lucene version " + + segMinVersion.major + + ".x and is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes with major version " + + minSupportedMajorVersion + + " or later (found: " + + segMinVersion.major + + ", minimum supported: " + + minSupportedMajorVersion + + "). To resolve this issue: (1) Re-index your data using Lucene " + + minSupportedMajorVersion + + ".x or later (preferably " + + Version.LATEST.major + + ".x), or (2) Use an older Lucene version that supports your index format."); + } + totalDocs += info.maxDoc(); long delGen = CodecUtil.readBELong(input); int delCount = CodecUtil.readBEInt(input); @@ -463,38 +529,6 @@ private static void parseSegmentInfos( } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); infos.add(siPerCommit); - - Version segmentVersion = info.getVersion(); - - if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { - throw new CorruptIndexException( - "segments file recorded minSegmentLuceneVersion=" - + infos.minSegmentLuceneVersion - + " but segment=" - + info - + " has older version=" - + segmentVersion, - input); - } - - if (infos.indexCreatedVersionMajor >= 7 - && segmentVersion.major < infos.indexCreatedVersionMajor) { - throw new CorruptIndexException( - "segments file recorded indexCreatedVersionMajor=" - + infos.indexCreatedVersionMajor - + " but segment=" - + info - + " has older version=" - + segmentVersion, - input); - } - - if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) { - throw new CorruptIndexException( - "segments infos must record minVersion with indexCreatedVersionMajor=" - + infos.indexCreatedVersionMajor, - input); - } } infos.userData = input.readMapOfStrings(); From f9b7ec25f9805b9d10ffacf4b12ef9ea740e699e Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 14 Oct 2025 00:04:56 -0400 Subject: [PATCH 05/10] revert changes to overrite indexCreatedVersionMajor in IndexWriter and SegmentInfos --- .../org/apache/lucene/index/IndexWriter.java | 40 ------------------- .../org/apache/lucene/index/SegmentInfos.java | 36 ----------------- 2 files changed, 76 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 0ebc424549e0..aa2c92ffecca 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -214,13 +214,6 @@ public class IndexWriter @SuppressWarnings("NonFinalStaticField") private static int actualMaxDocs = MAX_DOCS; - /** - * Sets signal to force set indexCreatedVersionMajor in SegmentInfos at commit. This also helps - * avoid slowdown in other commit threads since prepareCommitInternal doesn't need to iterate over - * the segments if this flag has not been set - */ - private volatile boolean updateIndexSupportedVersion = false; - /** Used only for testing. */ static void setMaxDocs(int maxDocs) { if (maxDocs > MAX_DOCS) { @@ -3714,16 +3707,6 @@ private long prepareCommitInternal() throws IOException { // corresponding add from an updateDocument) can // sneak into the commit point: toCommit = segmentInfos.clone(); - - if (this.updateIndexSupportedVersion - && toCommit.getIndexCreatedVersionMajor() < Version.LATEST.major) { - try { - toCommit.setIndexCreatedVersionMajorToLatest(); - } finally { - this.updateIndexSupportedVersion = false; - } - } - pendingCommitChangeCount = changeCount.get(); // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while @@ -4094,29 +4077,6 @@ public final long commit() throws IOException { return commitInternal(config.getMergePolicy()); } - /** - * Essentially does what {@link #commit} does. Additionally, if all segments belong to the latest - * version and if the index created major version in the existing {@link SegmentInfos} is older - * than the latest major version, resets the index created version to the latest major version in - * the newly synced {@link SegmentInfos} - * - *

If this method is called on an index where some of the segments belong to an older major - * version, it will throw an exception, and any partial changes done as part of the commit will be - * rolled back. - * - * @return The sequence number of the last operation in the commit. - * All sequence numbers <= this value will be reflected in the commit, and all others will - * not. - * @throws IOException - */ - public final long commitAndUpdateVersionCreatedMajor() throws IOException { - ensureOpen(); - synchronized (commitLock) { - updateIndexSupportedVersion = true; - return commitInternal(config.getMergePolicy()); - } - } - /** * Returns true if there may be changes that have not been committed. There are cases where this * may return true when there are no actual "real" changes to the index, for example if you've diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 377786f64a86..66bb27e4968c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -161,7 +161,6 @@ public final class SegmentInfos implements Cloneable, Iterable= 6, got: " + indexCreatedVersionMajor); } this.indexCreatedVersionMajor = indexCreatedVersionMajor; - this.maxIndexSupportedVersionMajor = this.indexCreatedVersionMajor+1; } /** Returns {@link SegmentCommitInfo} at the provided index. */ @@ -1232,38 +1230,4 @@ public Version getMinSegmentLuceneVersion() { public int getIndexCreatedVersionMajor() { return indexCreatedVersionMajor; } - - // Package private access to allow being called only by IndexWriter for greater control - void setIndexCreatedVersionMajorToLatest() { - if (!allSegmentsAreLatestVersion()) { - throw new IllegalStateException( - String.format( - "Cannot explicitly set the index created major version to %d since some of the segments were written in an older Lucene version.", - Version.LATEST.major)); - } - this.indexCreatedVersionMajor = Version.LATEST.major; - } - - private boolean allSegmentsAreLatestVersion() { - for (SegmentCommitInfo info : this) { - if (info.info.minVersion == null - || info.info.minVersion.major != Version.LATEST.major - || info.info.getVersion().major != Version.LATEST.major) { - if (infoStream != null) { - message( - "At least one segment was not written by the current Lucene version (" - + Version.LATEST.major - + ".x). Offending segment: [name:" - + info.info.name - + ", version:" - + info.info.getVersion() - + ", minVersion:" - + info.info.minVersion - + "]"); - } - return false; - } - } - return true; - } } From 8015f2e07ec0091eb2a25ea3811427c3cdc52b16 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 14 Oct 2025 00:11:17 -0400 Subject: [PATCH 06/10] changes.txt --- lucene/CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 86518e6e9b65..9637e8103ac2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,7 @@ Other * GITHUB#14761: Use more Comparators for PriorityQueue implementations. (Simon Cooper) * GITHUB#14817: Refactor some complex uses of PriorityQueue to use Comparators. (Simon Cooper) +* GITHUB#14607: Revise strategy for opening an index for reading (Rahul Goswami) ======================= Lucene 10.4.0 ======================= API Changes From 96c278cb6ec1983624ef3a28c58c4e3904fab1c2 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Thu, 16 Oct 2025 00:21:02 -0400 Subject: [PATCH 07/10] backwards compatibility tests --- .../lucene/backward_index/TestBasicBackwardsCompatibility.java | 2 +- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java index 77dc6d2412c0..15088e14fb61 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java @@ -864,7 +864,7 @@ public void testFailOpenOldIndex() throws IOException { assertTrue( ex.getMessage() .contains( - "This Lucene version only supports indexes created with major version " + "This Lucene version only supports indexes with major version " + Version.LATEST.major + " or later")); // now open with allowed min version diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 66bb27e4968c..1e129dfd2cf1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -431,6 +431,7 @@ private static void parseSegmentInfos( input); } + // if trying to open some random old index (< Lucene 7) if (segMinVersion == null) { if (infos.indexCreatedVersionMajor < minSupportedMajorVersion) { throw new IndexFormatTooOldException( From 841e7bbe54fefd9978509dfa384f5bb5dc99fc8a Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Sat, 25 Oct 2025 02:37:35 -0400 Subject: [PATCH 08/10] realign for better readability and simplify logic --- .../org/apache/lucene/index/SegmentInfos.java | 133 +++++++----------- 1 file changed, 54 insertions(+), 79 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 1e129dfd2cf1..1760b932cf9c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -398,85 +398,6 @@ private static void parseSegmentInfos( SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READONCE); info.setCodec(codec); - Version segMinVersion = info.getMinVersion(); - Version segmentVersion = info.getVersion(); - - if (!segmentVersion.onOrAfter(infos.minSegmentLuceneVersion)) { - throw new CorruptIndexException( - "segments file recorded minSegmentLuceneVersion=" - + infos.minSegmentLuceneVersion - + " but segment=" - + info - + " has older version=" - + segmentVersion, - input); - } - - if (infos.indexCreatedVersionMajor >= 7 - && segmentVersion.major < infos.indexCreatedVersionMajor) { - throw new CorruptIndexException( - "segments file recorded indexCreatedVersionMajor=" - + infos.indexCreatedVersionMajor - + " but segment=" - + info - + " has older version=" - + segmentVersion, - input); - } - - if (infos.indexCreatedVersionMajor >= 7 && segMinVersion == null) { - throw new CorruptIndexException( - "segments infos must record minVersion with indexCreatedVersionMajor=" - + infos.indexCreatedVersionMajor, - input); - } - - // if trying to open some random old index (< Lucene 7) - if (segMinVersion == null) { - if (infos.indexCreatedVersionMajor < minSupportedMajorVersion) { - throw new IndexFormatTooOldException( - input, - "Index created with Lucene " - + infos.indexCreatedVersionMajor - + ".x is not supported by Lucene " - + Version.LATEST - + ". This Lucene version only supports indexes created with major version " - + minSupportedMajorVersion - + " or later (found: " - + infos.indexCreatedVersionMajor - + ", minimum: " - + minSupportedMajorVersion - + "). To resolve this issue: (1) Re-index your data using Lucene " - + Version.LATEST.major - + ".x, or (2) Use an older Lucene version that supports your index format."); - } else { - throw new CorruptIndexException( - "segments infos must record minVersion with indexCreatedVersionMajor=" - + infos.indexCreatedVersionMajor, - input); - } - } - - if (segMinVersion.major < minSupportedMajorVersion) { - throw new IndexFormatTooOldException( - input, - "Index has segment traces from Lucene version " - + segMinVersion.major - + ".x and is not supported by Lucene " - + Version.LATEST - + ". This Lucene version only supports indexes with major version " - + minSupportedMajorVersion - + " or later (found: " - + segMinVersion.major - + ", minimum supported: " - + minSupportedMajorVersion - + "). To resolve this issue: (1) Re-index your data using Lucene " - + minSupportedMajorVersion - + ".x or later (preferably " - + Version.LATEST.major - + ".x), or (2) Use an older Lucene version that supports your index format."); - } - totalDocs += info.maxDoc(); long delGen = CodecUtil.readBELong(input); int delCount = CodecUtil.readBEInt(input); @@ -530,6 +451,60 @@ private static void parseSegmentInfos( } siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); infos.add(siPerCommit); + + Version segmentVersion = info.getVersion(); + + if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) { + throw new CorruptIndexException( + "segments file recorded minSegmentLuceneVersion=" + + infos.minSegmentLuceneVersion + + " but segment=" + + info + + " has older version=" + + segmentVersion, + input); + } + + if (infos.indexCreatedVersionMajor >= 7 + && segmentVersion.major < infos.indexCreatedVersionMajor) { + throw new CorruptIndexException( + "segments file recorded indexCreatedVersionMajor=" + + infos.indexCreatedVersionMajor + + " but segment=" + + info + + " has older version=" + + segmentVersion, + input); + } + + if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) { + throw new CorruptIndexException( + "segments infos must record minVersion with indexCreatedVersionMajor=" + + infos.indexCreatedVersionMajor, + input); + } + + int segmentDerivedFromMajorVersion = + info.getMinVersion() == null + ? infos.indexCreatedVersionMajor + : info.getMinVersion().major; + if (info.getMinVersion() == null || info.getMinVersion().major < minSupportedMajorVersion) { + throw new IndexFormatTooOldException( + input, + "Index has segments derived from Lucene version " + + segmentDerivedFromMajorVersion + + ".x and is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes with major version " + + minSupportedMajorVersion + + " or later (found: " + + segmentDerivedFromMajorVersion + + ", minimum supported: " + + minSupportedMajorVersion + + "). To resolve this issue re-index your data using Lucene " + + minSupportedMajorVersion + + ".x or later."); + } } infos.userData = input.readMapOfStrings(); From ba9ab0c6fe2b17b3a27a65772fb88132a0af5ba2 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Sun, 26 Oct 2025 00:33:05 -0400 Subject: [PATCH 09/10] rename variable and add comment --- .../src/java/org/apache/lucene/index/SegmentInfos.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 1760b932cf9c..aae2833782b7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -484,21 +484,23 @@ private static void parseSegmentInfos( input); } - int segmentDerivedFromMajorVersion = + int createdOrSegmentMinVersion = info.getMinVersion() == null ? infos.indexCreatedVersionMajor : info.getMinVersion().major; + + // version >=7 are expected to record minVersion if (info.getMinVersion() == null || info.getMinVersion().major < minSupportedMajorVersion) { throw new IndexFormatTooOldException( input, "Index has segments derived from Lucene version " - + segmentDerivedFromMajorVersion + + createdOrSegmentMinVersion + ".x and is not supported by Lucene " + Version.LATEST + ". This Lucene version only supports indexes with major version " + minSupportedMajorVersion + " or later (found: " - + segmentDerivedFromMajorVersion + + createdOrSegmentMinVersion + ", minimum supported: " + minSupportedMajorVersion + "). To resolve this issue re-index your data using Lucene " From c84c52681e44a3393d8275513b6f1513de55958b Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 28 Oct 2025 02:12:51 -0400 Subject: [PATCH 10/10] 1)Push entire commit after previous reversion for clean diff 2)Handle ancient indices (with no supported codec) with graceful message --- lucene/CHANGES.txt | 2 + .../TestBasicBackwardsCompatibility.java | 2 +- .../org/apache/lucene/index/SegmentInfos.java | 80 ++++++++++++++----- 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ea9db0d698bb..0195eae76271 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,8 @@ Other * GITHUB#14761: Use more Comparators for PriorityQueue implementations. (Simon Cooper) * GITHUB#14817: Refactor some complex uses of PriorityQueue to use Comparators. (Simon Cooper) +* GITHUB#14607: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami) + ======================= Lucene 10.4.0 ======================= API Changes diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java index 77dc6d2412c0..15088e14fb61 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java @@ -864,7 +864,7 @@ public void testFailOpenOldIndex() throws IOException { assertTrue( ex.getMessage() .contains( - "This Lucene version only supports indexes created with major version " + "This Lucene version only supports indexes with major version " + Version.LATEST.major + " or later")); // now open with allowed min version diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 204bd89b3500..379a4e97b1c6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -346,30 +346,12 @@ public static final SegmentInfos readCommit( input); } - if (indexCreatedVersion < minSupportedMajorVersion) { - throw new IndexFormatTooOldException( - input, - "Index created with Lucene " - + indexCreatedVersion - + ".x is not supported by Lucene " - + Version.LATEST - + ". This Lucene version only supports indexes created with major version " - + minSupportedMajorVersion - + " or later (found: " - + indexCreatedVersion - + ", minimum: " - + minSupportedMajorVersion - + "). To resolve this issue: (1) Re-index your data using Lucene " - + Version.LATEST.major - + ".x, or (2) Use an older Lucene version that supports your index format."); - } - SegmentInfos infos = new SegmentInfos(indexCreatedVersion); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; infos.luceneVersion = luceneVersion; - parseSegmentInfos(directory, input, infos, format); + parseSegmentInfos(directory, input, infos, format, minSupportedMajorVersion); return infos; } catch (Throwable t) { @@ -385,7 +367,12 @@ public static final SegmentInfos readCommit( } private static void parseSegmentInfos( - Directory directory, DataInput input, SegmentInfos infos, int format) throws IOException { + Directory directory, + DataInput input, + SegmentInfos infos, + int format, + int minSupportedMajorVersion) + throws IOException { infos.version = CodecUtil.readBELong(input); // System.out.println("READ sis version=" + infos.version); infos.counter = input.readVLong(); @@ -402,11 +389,38 @@ private static void parseSegmentInfos( } long totalDocs = 0; + for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); byte[] segmentID = new byte[StringHelper.ID_LENGTH]; input.readBytes(segmentID, 0, segmentID.length); - Codec codec = readCodec(input); + Codec codec = null; + try { + codec = readCodec(input); + } catch (IllegalArgumentException e) { + if (e.getMessage() != null && e.getMessage().contains("Could not load codec")) { + // maybe we tried loading an old default codec which isn't present in backward-codecs + // anymore. + // aka index is too old + throw new IndexFormatTooOldException( + input, + "Index has segments derived from Lucene version " + + infos.indexCreatedVersionMajor + + ".x and is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes with major version " + + minSupportedMajorVersion + + " or later (found: " + + infos.indexCreatedVersionMajor + + ", minimum supported: " + + minSupportedMajorVersion + + "). To resolve this issue re-index your data using Lucene " + + minSupportedMajorVersion + + ".x or later."); + } else { + throw e; + } + } SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READONCE); info.setCodec(codec); @@ -495,6 +509,30 @@ private static void parseSegmentInfos( + infos.indexCreatedVersionMajor, input); } + + int createdOrSegmentMinVersion = + info.getMinVersion() == null + ? infos.indexCreatedVersionMajor + : info.getMinVersion().major; + + // version >=7 are expected to record minVersion + if (info.getMinVersion() == null || info.getMinVersion().major < minSupportedMajorVersion) { + throw new IndexFormatTooOldException( + input, + "Index has segments derived from Lucene version " + + createdOrSegmentMinVersion + + ".x and is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes with major version " + + minSupportedMajorVersion + + " or later (found: " + + createdOrSegmentMinVersion + + ", minimum supported: " + + minSupportedMajorVersion + + "). To resolve this issue re-index your data using Lucene " + + minSupportedMajorVersion + + ".x or later."); + } } infos.userData = input.readMapOfStrings();