Skip to content

Commit 56ec897

Browse files
committed
Add format downgrade script
This script downgrades Bitcask data files to the format used before version 1.7.0 (to ship in Riak 2.0). In that version, a more compact encoding is used for key values, and also 2 new types of tombstones are introduced.
1 parent ad5a03a commit 56ec897

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed

priv/scripts/downgrade_bitcask.erl

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
% Run this script to downgrade Bitcask files from the format
2+
% introduced in Riak 2.0 to the format used in Riak 1.4
3+
% Run it by calling escript on it and pointing it to a data
4+
% directory after stopping the Riak node.
5+
% The script will recursively find all Bitcask files under that
6+
% directory and reformat them.
7+
% $ escript downgrade_bitcask.erl /my/riak/data/bitcask
8+
-module(downgrade_bitcask).
9+
-mode(compile).
10+
-export([main/1]).
11+
12+
-define(HEADER_SIZE, 14).
13+
-record(entry, { crc, tstamp, keysz, valsz, key, val}).
14+
15+
main([DataDir]) ->
16+
downgrade_if_dir(DataDir).
17+
18+
maybe_downgrade_file(F) ->
19+
is_bitcask_file(F) andalso downgrade_file(F).
20+
21+
downgrade_if_dir(Dir) ->
22+
case filelib:is_dir(Dir) of
23+
true ->
24+
downgrade_dir(Dir);
25+
false ->
26+
ok
27+
end.
28+
29+
downgrade_dir(Dir) ->
30+
{ok, Children0} = file:list_dir(Dir),
31+
Children = [filename:join(Dir, Child) || Child <- Children0],
32+
case is_bitcask_dir(Dir) of
33+
false ->
34+
[downgrade_if_dir(Child) || Child <- Children];
35+
true ->
36+
[maybe_downgrade_file(Child) || Child <- Children]
37+
end.
38+
39+
is_bitcask_file(Filename0) ->
40+
Filename = filename:basename(Filename0),
41+
Match = re:run(Filename, "^\\d+\\.bitcask\\.data$"),
42+
nomatch =/= Match.
43+
44+
is_bitcask_dir(Dir) ->
45+
case filelib:is_dir(Dir) of
46+
false ->
47+
false;
48+
true ->
49+
{ok, Files} = file:list_dir(Dir),
50+
lists:any(fun is_bitcask_file/1, Files)
51+
end.
52+
53+
read_entry(F) ->
54+
case file:read(F, ?HEADER_SIZE) of
55+
{ok, <<CRC:32,Tstamp:32,KeySz:16,ValueSz:32>>} ->
56+
case file:read(F, KeySz+ValueSz) of
57+
{ok, <<Key:KeySz/bytes, Value:ValueSz/bytes>>} ->
58+
% io:format("K: ~p, V: ~p\n", [Key, Value]),
59+
{ok, #entry{crc=CRC, tstamp=Tstamp, keysz=KeySz, valsz=ValueSz,
60+
key=Key, val=Value}};
61+
_ ->
62+
error
63+
end;
64+
eof ->
65+
eof;
66+
_ ->
67+
io:format("Error reading entry\n"),
68+
error
69+
end.
70+
71+
downgrade_file(F) ->
72+
Dir = filename:dirname(F),
73+
NewF = F ++ ".new",
74+
HintFile = filename:join(Dir, filename:basename(F, ".data")++".hint"),
75+
NewHF = HintFile ++ ".new",
76+
io:format("Downgrading file ~s\n", [F]),
77+
{ok, Fi} = file:open(F, [read, raw, binary]),
78+
{ok, Fo} = file:open(NewF, [write, raw, binary]),
79+
{ok, Fh} = file:open(NewHF, [write, raw, binary]),
80+
ok = convert_file(Fi, Fo, Fh, 0, 0, fun tx_pre_20/1),
81+
ok = file:close(Fi),
82+
ok = file:close(Fo),
83+
ok = file:close(Fh),
84+
HintBak = HintFile ++ ".bak",
85+
FBak = F ++ ".bak",
86+
ok = file:rename(HintFile, HintBak),
87+
ok = file:rename(F, FBak),
88+
ok = file:rename(NewF, F),
89+
ok = file:rename(NewHF, HintFile),
90+
ok = file:delete(HintBak),
91+
ok = file:delete(FBak),
92+
ok.
93+
94+
convert_file(Fi, Fo, Fh, Ofs, Crc, Tx) ->
95+
case read_entry(Fi) of
96+
{ok, Entry} ->
97+
NewEntry = Tx(Entry),
98+
Sz = write_entry(Fo, NewEntry),
99+
NewCrc = write_hint_entry(Fh, Ofs, Sz, Crc, NewEntry),
100+
convert_file(Fi, Fo, Fh, Ofs+Sz, NewCrc, Tx);
101+
eof ->
102+
write_hint_entry(Fh, 16#ffffFFFFffffFFFF, Crc, 0,
103+
#entry{key= <<>>, tstamp=0}),
104+
% io:format("Finished reading file\n", []),
105+
ok;
106+
_ ->
107+
io:format(standard_error, "Error reading file\n", []),
108+
error
109+
end.
110+
111+
write_hint_entry(F, Ofs, Sz, Crc, #entry{key=Key, tstamp=Tstamp}) ->
112+
KeySz = size(Key),
113+
Hint = [<<Tstamp:32, KeySz:16, Sz:32, Ofs:64>>, Key],
114+
ok = file:write(F, Hint),
115+
erlang:crc32(Crc, Hint).
116+
117+
write_entry(F, #entry {key=Key, val=Value, tstamp=Tstamp}) ->
118+
KeySz = size(Key),
119+
ValueSz = size(Value),
120+
Bytes0 = [<<Tstamp:32>>, <<KeySz:16>>, <<ValueSz:32>>, Key, Value],
121+
Bytes = [<<(erlang:crc32(Bytes0)):32>> | Bytes0],
122+
ok = file:write(F, Bytes),
123+
iolist_size(Bytes).
124+
125+
tx_pre_20(Entry =
126+
#entry{key= <<2, BucketSz:16, Bucket:BucketSz/binary,
127+
Key/binary>>}) ->
128+
OldKey=term_to_binary({Bucket, Key}),
129+
% io:format("Converted B/K ~s/~s\n", [Bucket, Key]),
130+
tx_pre_20(Entry#entry{key=OldKey, keysz=size(OldKey)});
131+
tx_pre_20(Entry=
132+
#entry{val= <<"bitcask_tombstone2", _/binary>>}) ->
133+
NewVal = <<"bitcask_tombstone">>,
134+
Entry#entry{val=NewVal, valsz=size(NewVal)};
135+
tx_pre_20(Entry) ->
136+
Entry.

0 commit comments

Comments
 (0)