Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions tests/basic/ec/ec-sparsefile-heal.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/bin/bash

. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../ec.rc

function compare_brick_stats {
file_name=$1

SIZE=$(stat -c%s $B0/${V0}0/$file_name)
BLOCKS=$(stat -c%b $B0/${V0}0/$file_name)

echo $SIZE
echo $BLOCKS

for b in {0..5}; do
SIZE_IN_BRICK=$(stat -c%s $B0/${V0}${b}/$file_name)
BLOCKS_IN_BRICK=$(stat -c%b $B0/${V0}${b}/$file_name)

if [[ "$SIZE" -ne "$SIZE_IN_BRICK" || "$BLOCKS" -ne "$BLOCKS_IN_BRICK" ]]; then
return 1
fi
done

return 0
}

function compare_md5sum {

if [[ "$1" == "$2" ]]; then
return 0
fi

return 1
}


cleanup


TEST_DIR="/tmp/glusterfs-sparse-test"
mkdir -p $TEST_DIR


TEST glusterd
TEST pidof glusterd


TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
TEST $CLI volume start $V0

TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0

TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}1

TEST_FILE="sparse_test_file"

TEST dd if=/dev/zero of=$M0/$TEST_FILE bs=1024 count=1024 seek=0
TEST dd if=/dev/urandom of=$M0/$TEST_FILE bs=1024 count=1 seek=0 conv=notrunc
TEST dd if=/dev/urandom of=$M0/$TEST_FILE bs=1024 count=1 seek=512 conv=notrunc
TEST dd if=/dev/urandom of=$M0/$TEST_FILE bs=1024 count=1 seek=1023 conv=notrunc

# Create another sparse file with different pattern
TEST_FILE2="sparse_test_file2"
TEST truncate -s 5M $M0/$TEST_FILE2
TEST dd if=/dev/urandom of=$M0/$TEST_FILE2 bs=4096 count=1 seek=100 conv=notrunc
TEST dd if=/dev/urandom of=$M0/$TEST_FILE2 bs=4096 count=1 seek=500 conv=notrunc

sleep 2

TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0

TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0

TEST compare_brick_stats $TEST_FILE
TEST compare_brick_stats $TEST_FILE2


EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=5:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum1=$(md5sum $M0/$TEST_FILE | cut -d' ' -f1)

EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=0:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum2=$(md5sum $M0/$TEST_FILE | cut -d' ' -f1)

EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=1:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum3=$(md5sum $M0/$TEST_FILE | cut -d' ' -f1)

TEST compare_md5sum $checksum1 $checksum2
TEST compare_md5sum $checksum1 $checksum3


#Test hole punching case
TEST_FILE3="to_be_sparsefile"
TEST dd if=/dev/urandom of=$M0/$TEST_FILE3 bs=1M count=100

TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}1

TEST fallocate --punch-hole --keep-size -o 6144 -l 15728640 $M0/$TEST_FILE3


TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0

TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0

TEST compare_brick_stats $TEST_FILE3

EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=5:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum1=$(md5sum $M0/$TEST_FILE3 | cut -d' ' -f1)

EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=0:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum2=$(md5sum $M0/$TEST_FILE3 | cut -d' ' -f1)

EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
$GFS --xlator-option="*.ec-read-mask=1:2:3:4" -s $H0 --volfile-id $V0 $M0
TEST $GFS -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
checksum3=$(md5sum $M0/$TEST_FILE3 | cut -d' ' -f1)

TEST compare_md5sum $checksum1 $checksum2
TEST compare_md5sum $checksum1 $checksum3


# Cleanup
rm -rf $TEST_DIR
cleanup
164 changes: 136 additions & 28 deletions xlators/cluster/ec/src/ec-heal.c
Original file line number Diff line number Diff line change
Expand Up @@ -1839,8 +1839,7 @@ __ec_heal_data_prepare(call_frame_t *frame, ec_t *ec, fd_t *fd,

for (i = 0; i < ec->nodes; i++) {
if (healed_sinks[i]) {
if (replies[i].stat.ia_size)
trim[i] = 1;
trim[i] = 1;
}
}

Expand Down Expand Up @@ -2045,9 +2044,90 @@ ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
return 0;
}

void
ec_heal_seek_hole_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_heal_t *heal = fop->data;

if (op_ret < 0) {
heal->error = op_errno;
goto out;
}
heal->hole_offset = offset;

out:
syncbarrier_wake(&heal->barrier);
}

void
ec_heal_seek_data_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, off_t offset, dict_t * xdata)
{
ec_fop_data_t *fop = cookie;
ec_heal_t *heal = fop->data;

if (op_ret < 0){
heal->done = _gf_true;
goto out;
}
heal->offset = offset;

out:
syncbarrier_wake(&heal->barrier);
}

int32_t
ec_sync_heal_sparse_region(call_frame_t *frame, ec_t *ec, ec_heal_t *heal)
{
int ret = 0;
ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
ec_heal_seek_data_cbk, heal, heal->fd, heal->offset,
GF_SEEK_DATA, NULL);
syncbarrier_wait(&heal->barrier, 1);

if (heal->done)
goto out;

ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
ec_heal_seek_hole_cbk, heal, heal->fd, heal->offset,
GF_SEEK_HOLE, NULL);
syncbarrier_wait(&heal->barrier,1);

if (heal->error != 0) {
ret = -heal->error;
goto out;
}

for (; (heal->offset < heal->hole_offset) && (!heal->done);
heal->offset += heal->size) {

uint64_t data_block_size = heal->hole_offset - heal->offset;
data_block_size = (data_block_size > ec->stripe_size)?
data_block_size : ec->stripe_size;

uint64_t original_heal_size = heal->size;

if(data_block_size < heal->size)
heal->size = data_block_size;

ret = ec_sync_heal_block(frame, ec->xl, heal);
if (ret < 0)
goto out;

heal->size = original_heal_size;
}
heal->offset = heal->hole_offset;

out:
return ret;

}

int
ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
unsigned char *sources, unsigned char *healed_sinks)
unsigned char *sources, unsigned char *healed_sinks, int hole_exists)
{
ec_heal_t obj, *heal = &obj;
int ret = 0;
Expand All @@ -2072,29 +2152,47 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
heal->ia_type = IA_IFREG;
LOCK_INIT(&heal->lock);

for (heal->offset = 0; (heal->offset < size) && !heal->done;
heal->offset += heal->size) {
/* We immediately abort any heal if a shutdown request has been
* received to avoid delays. The healing of this file will be
* restarted by another SHD or other client that accesses the
* file. */
if (ec->shutdown) {
if (!hole_exists) {
for (heal->offset = 0; (heal->offset < size) && !heal->done;
heal->offset += heal->size) {
/* We immediately abort any heal if a shutdown request has been
* received to avoid delays. The healing of this file will be
* restarted by another SHD or other client that accesses the
* file. */
if (ec->shutdown) {
gf_msg_debug(ec->xl->name, 0,
"Cancelling heal because "
"EC is stopping.");
ret = -ENOTCONN;
break;
}

gf_msg_debug(ec->xl->name, 0,
"Cancelling heal because "
"EC is stopping.");
ret = -ENOTCONN;
break;
"%s: sources: %d, sinks: "
"%d, offset: %" PRIu64 " bsize: %" PRIu64,
uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
EC_COUNT(healed_sinks, ec->nodes), heal->offset,
heal->size);
ret = ec_sync_heal_block(frame, ec->xl, heal);
if (ret < 0)
break;
}

} else {
heal->offset = 0;
while (!heal->done) {
if (ec->shutdown) {
gf_msg_debug(ec->xl->name, 0,
"Cancelling heal because "
"EC is stopping.");
ret = -ENOTCONN;

}
ret = ec_sync_heal_sparse_region(frame, ec, heal);
if (ret < 0)
break;
}

gf_msg_debug(ec->xl->name, 0,
"%s: sources: %d, sinks: "
"%d, offset: %" PRIu64 " bsize: %" PRIu64,
uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
EC_COUNT(healed_sinks, ec->nodes), heal->offset,
heal->size);
ret = ec_sync_heal_block(frame, ec->xl, heal);
if (ret < 0)
break;
}
memset(healed_sinks, 0, ec->nodes);
ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes);
Expand All @@ -2103,14 +2201,14 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
syncbarrier_destroy(&heal->barrier);
if (ret < 0)
gf_msg_debug(ec->xl->name, -ret, "%s: heal failed",
uuid_utoa(fd->inode->gfid));
uuid_utoa(fd->inode->gfid));
return ret;
}

int
__ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
unsigned char *healed_sinks, unsigned char *trim,
uint64_t size)
uint64_t size, int file_has_holes)
{
default_args_cbk_t *replies = NULL;
unsigned char *output = NULL;
Expand All @@ -2127,6 +2225,10 @@ __ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
}
trim_offset = size;
ec_adjust_offset_up(ec, &trim_offset, _gf_true);
if (file_has_holes) {
ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output,
frame, ec->xl, fd, 0, NULL);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Failures in this operation needs to be considered like in lines between 2234-2242

}
ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output,
frame, ec->xl, fd, trim_offset, NULL);
for (i = 0; i < ec->nodes; i++) {
Expand Down Expand Up @@ -2348,6 +2450,8 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
default_args_cbk_t *replies = NULL;
int ret = 0;
int source = 0;
int file_has_holes = 0;
struct iatt source_buf = {0};

locked_on = alloca0(ec->nodes);
output = alloca0(ec->nodes);
Expand All @@ -2371,9 +2475,13 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
}

ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty,
size, sources, healed_sinks, trim, NULL);
size, sources, healed_sinks, trim, &source_buf);
if (ret < 0)
goto unlock;

if (source_buf.ia_blocks * source_buf.ia_blksize != source_buf.ia_size) {
file_has_holes = 1;
}

if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ret = __ec_fd_data_adjust_versions(
Expand All @@ -2387,7 +2495,7 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
goto unlock;

ret = __ec_heal_trim_sinks(frame, ec, fd, healed_sinks, trim,
size[source]);
size[source], file_has_holes);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
Expand All @@ -2404,7 +2512,7 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
EC_COUNT(healed_sinks, ec->nodes));

ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks);
ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks, file_has_holes);
if (ret < 0)
goto out;

Expand Down
1 change: 1 addition & 0 deletions xlators/cluster/ec/src/ec-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ struct _ec_heal {
uint64_t offset;
uint64_t size;
uint64_t total_size;
uint64_t hole_offset;
};

struct subvol_healer {
Expand Down