Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.apache.hadoop.hbase.mob.MobFileName;
import org.apache.hadoop.hbase.mob.MobStoreEngine;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
Expand Down Expand Up @@ -110,6 +111,7 @@ public HMobStore(final HRegion region, final ColumnFamilyDescriptor family,
this.homePath = MobUtils.getMobHome(conf);
this.mobFamilyPath =
MobUtils.getMobFamilyPath(conf, this.getTableName(), getColumnFamilyName());
CommonFSUtils.setStoragePolicy(this.getFileSystem(), mobFamilyPath, this.policyName);
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CommonFSUtils.setStoragePolicy is invoked on mobFamilyPath during store construction, but the MOB family directory may not exist yet (e.g., DefaultMobStoreFlusher creates targetPath with mkdirs if it doesn't exist). If the path is missing, setStoragePolicy will log a warning and the policy won't be applied, so MOB files created later won't inherit it. Ensure the MOB family directory exists before setting the policy (or set the policy at the point where the directory is created).

Suggested change
CommonFSUtils.setStoragePolicy(this.getFileSystem(), mobFamilyPath, this.policyName);
FileSystem fs = this.getFileSystem();
if (!fs.exists(mobFamilyPath)) {
fs.mkdirs(mobFamilyPath);
}
CommonFSUtils.setStoragePolicy(fs, mobFamilyPath, this.policyName);

Copilot uses AI. Check for mistakes.
List<Path> locations = new ArrayList<>(2);
locations.add(mobFamilyPath);
TableName tn = region.getTableDescriptor().getTableName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ public class HStore
private AtomicLong majorCompactedCellsSize = new AtomicLong();

private final StoreContext storeContext;
protected String policyName;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @xieyupei do we really need this? why not just do String policyName = family.getStoragePolicy(); even in HMobStore.java

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before this key "hbase.hmobstore.block.storage.policy", i think if we don't have this, we should write the same code in HMobStore.
"Optional.ofNullable(family.getStoragePolicy())
.orElseGet(() -> conf.get(BLOCK_STORAGE_POLICY_KEY, DEFAULT_BLOCK_STORAGE_POLICY))
.trim();
"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we should do that right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I think so


// Used to track the store files which are currently being written. For compaction, if we want to
// compact store file [a, b, c] to [d], then here we will record 'd'. And we will also use it to
Expand Down Expand Up @@ -269,11 +270,10 @@ protected HStore(final HRegion region, final ColumnFamilyDescriptor family,
region.getRegionFileSystem().createStoreDir(family.getNameAsString());

// set block storage policy for store directory
String policyName = family.getStoragePolicy();
if (null == policyName) {
policyName = this.conf.get(BLOCK_STORAGE_POLICY_KEY, DEFAULT_BLOCK_STORAGE_POLICY);
}
region.getRegionFileSystem().setStoragePolicy(family.getNameAsString(), policyName.trim());
this.policyName = Optional.ofNullable(family.getStoragePolicy())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any change in logic here? Seems same as before just that we are using lambdas now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, no logic change. just to simplify assigning values ​​to "this.policyName"

.orElseGet(() -> conf.get(BLOCK_STORAGE_POLICY_KEY, DEFAULT_BLOCK_STORAGE_POLICY))
.trim();
region.getRegionFileSystem().setStoragePolicy(family.getNameAsString(), policyName);

this.dataBlockEncoder = new HFileDataBlockEncoderImpl(family.getDataBlockEncoding());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.testclassification.LargeTests;
Expand Down Expand Up @@ -183,6 +184,62 @@ public void testBlockStoragePolicy() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
}
@Test
public void testMobStoreStoragePolicy() throws Exception {
TEST_UTIL = new HBaseTestingUtil();
Configuration conf = TEST_UTIL.getConfiguration();
TEST_UTIL.startMiniCluster();
Table table = TEST_UTIL.createTable(TABLE_NAME, FAMILIES);
assertEquals("Should start with empty table", 0, TEST_UTIL.countRows(table));
HRegionFileSystem regionFs = getHRegionFS(TEST_UTIL.getConnection(), table, conf);
try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
ColumnFamilyDescriptorBuilder cfdA = ColumnFamilyDescriptorBuilder.newBuilder(FAMILIES[0]);
cfdA.setValue(HStore.BLOCK_STORAGE_POLICY_KEY, "ONE_SSD");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a new key: "hbase.hmobstore.block.storage.policy"

I am not sure if this JIRA is a bug or a new feature. But teh change will definitely change behaviour for exisdting users once this patch is merged

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, i agree. what about the "hbase.hmobstore.block.storage.policy" default value? equals hstore or just none?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to add this new key, because when we creat a table, we set the STORAGE-POLICY configuration or ues the default policy (HStore.BLOCK_STORAGE_POLICY_KEY), which means that all data under this column family should follow this policy.
So, the storage strategy for MOB should be consistent with the column family.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discovered this issue when we found incorrect storage capacity in our cluster. We believe this is a bug, so I fix it like this pr, but I'm not sure how this is determined in our community. @NihalJain @guluo2016

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We believe this is a bug, so I fix it like this pr

I agree with you.

but I'm not sure how this is determined in our community.

In my personal opinion, the storage strategy for MOB should be consistent with the column family.
Perhaps we can discuss it with Nihal and others together.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with consolidating with mobstore as that is the right way. My only concern is we may unknowingly start moving data around if this silently lands with a new version upgrade. May be we are okay to just add to release note, since that would be a behavioral change (due to bug).

Hey @Apache9 WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR has been around for a while, may not have noticed, remind. @Apache9

cfdA.setMobEnabled(true);
cfdA.setMobThreshold(2L);
admin.modifyColumnFamily(TABLE_NAME, cfdA.build());
while (
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
.hasRegionsInTransition()
) {
Comment on lines +201 to +204
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The wait loop calls getAssignmentManager().getRegionStates().hasRegionsInTransition(), but RegionStates does not define hasRegionsInTransition() in this codebase, so this won't compile. Use getAssignmentManager().hasRegionsInTransition() (as in testBlockStoragePolicy) or another existing API for RIT checks.

Suggested change
while (
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
.hasRegionsInTransition()
) {
while (TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
.hasRegionsInTransition()) {

Copilot uses AI. Check for mistakes.
Thread.sleep(200);
LOG.debug("Waiting on table to finish schema altering");
}

// flush memstore snapshot into 3 files
for (long i = 0; i < 3; i++) {
Put put = new Put(Bytes.toBytes(i));
put.addColumn(FAMILIES[0], Bytes.toBytes(i), Bytes.toBytes(i));
put.addColumn(FAMILIES[0], Bytes.toBytes(i + "qf"), Bytes.toBytes(i + "value"));
table.put(put);
admin.flush(TABLE_NAME);
}
// there should be 3 files in store dir
FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
Path storePath = regionFs.getStoreDir(Bytes.toString(FAMILIES[0]));
Path mobStorePath = MobUtils.getMobFamilyPath(conf, TABLE_NAME, Bytes.toString(FAMILIES[0]));

FileStatus[] storeFiles = CommonFSUtils.listStatus(fs, storePath);
FileStatus[] mobStoreFiles = CommonFSUtils.listStatus(fs, mobStorePath);
assertNotNull(storeFiles);
assertEquals(3, storeFiles.length);
assertNotNull(mobStoreFiles);
assertEquals(3, mobStoreFiles.length);

for (FileStatus status : storeFiles) {
assertEquals("ONE_SSD",
((HFileSystem) regionFs.getFileSystem()).getStoragePolicyName(status.getPath()));
}
for (FileStatus status : mobStoreFiles) {
assertEquals("ONE_SSD",
((HFileSystem) regionFs.getFileSystem()).getStoragePolicyName(status.getPath()));
}
} finally {
table.close();
TEST_UTIL.deleteTable(TABLE_NAME);
TEST_UTIL.shutdownMiniCluster();
}
}

private HRegionFileSystem getHRegionFS(Connection conn, Table table, Configuration conf)
throws IOException {
Expand Down
Loading