Skip to content

Commit 394df55

Browse files
committed
initial encrypted zone support
1 parent ff27ef8 commit 394df55

File tree

8 files changed

+420
-8
lines changed

8 files changed

+420
-8
lines changed

aes.go

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package hdfs
2+
3+
import (
4+
"crypto/aes"
5+
"crypto/cipher"
6+
"encoding/binary"
7+
"fmt"
8+
)
9+
10+
const (
11+
// in FileWriter we use chunks upto aesChunkSize bytes to encrypt data
12+
aesChunkSize = 1024 * 1024
13+
)
14+
15+
// calculateIV `shifts` IV to given offset
16+
// based on calculateIV from AesCtrCryptoCodec.java
17+
func calculateIV(offset int64, initIV []byte) ([]byte, error) {
18+
if len(initIV) != aes.BlockSize {
19+
return nil, fmt.Errorf("calculateIV: invalid iv size: %v", len(initIV))
20+
}
21+
22+
counter := offset / aes.BlockSize
23+
iv := make([]byte, aes.BlockSize)
24+
25+
high := binary.BigEndian.Uint64(initIV[:8])
26+
low := binary.BigEndian.Uint64(initIV[8:])
27+
origLow := low
28+
29+
low += uint64(counter)
30+
if low < origLow { // wrap
31+
high += 1
32+
}
33+
34+
binary.BigEndian.PutUint64(iv, high)
35+
binary.BigEndian.PutUint64(iv[8:], low)
36+
37+
return iv, nil
38+
}
39+
40+
// aesCtrStep perform AES-CTR XOR operation on given byte string.
41+
// Once encryption and decryption are exactly the same operation for CTR mode,
42+
// this function can be used to perform both.
43+
func aesCtrStep(offset int64, enc *TransparentEncryptionInfo, b []byte) ([]byte, error) {
44+
iv, err := calculateIV(offset, enc.iv)
45+
if err != nil {
46+
return nil, err
47+
}
48+
49+
block, err := aes.NewCipher(enc.key)
50+
if err != nil {
51+
return nil, err
52+
}
53+
stream := cipher.NewCTR(block, iv)
54+
55+
padding := offset % aes.BlockSize
56+
if padding > 0 {
57+
tmp := make([]byte, padding)
58+
stream.XORKeyStream(tmp, tmp)
59+
}
60+
61+
text := make([]byte, len(b))
62+
stream.XORKeyStream(text, b)
63+
return text, nil
64+
}

aes_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package hdfs
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestAesChunks(t *testing.T) {
10+
originalText := []byte("some random plain text, nice to have it quite long")
11+
key := []byte("0123456789abcdef")
12+
13+
// Choose iv to hit counter overflow.
14+
iv := []byte("\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xf5")
15+
enc := &TransparentEncryptionInfo{iv: iv, key: key}
16+
17+
// Ensure that we can decrypt text after encryption.
18+
// In CTR mode, implementation for `encrypt` and `decrypt` actually the same
19+
// since we just XOR on input.
20+
encryptedText, err := aesCtrStep(0, enc, originalText)
21+
assert.Equal(t, err, nil)
22+
decryptedText, err := aesCtrStep(0, enc, encryptedText)
23+
assert.Equal(t, err, nil)
24+
assert.Equal(t, originalText, decryptedText)
25+
26+
// CTR mode allow us to encrypt/decrypt string by chunks
27+
// (using correct offset from start of string).
28+
// Ensure that result equal to one, produced in one step.
29+
encryptedByChunks := make([]byte, 0)
30+
var pos int64 = 0
31+
for _, x := range []int{5, 7, 6, 4, 28} {
32+
tmp, err := aesCtrStep(pos, enc, originalText[pos:pos+int64(x)])
33+
assert.Equal(t, err, nil)
34+
encryptedByChunks = append(encryptedByChunks, tmp...)
35+
pos += int64(x)
36+
}
37+
assert.Equal(t, encryptedByChunks, encryptedText)
38+
39+
// Decrypt string by chunks.
40+
// Ensure that result equal to one, produced in one step.
41+
decryptedByChunks := make([]byte, 0)
42+
pos = 0
43+
for _, x := range []int{5, 7, 6, 4, 28} {
44+
tmp, err := aesCtrStep(pos, enc, encryptedText[pos:pos+int64(x)])
45+
assert.Equal(t, err, nil)
46+
decryptedByChunks = append(decryptedByChunks, tmp...)
47+
pos += int64(x)
48+
}
49+
assert.Equal(t, decryptedByChunks, originalText)
50+
}

client.go

+14-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"io"
77
"io/ioutil"
88
"net"
9+
"net/http"
10+
"net/http/cookiejar"
911
"os"
1012
"os/user"
1113
"sort"
@@ -36,6 +38,8 @@ type Client struct {
3638

3739
defaults *hdfs.FsServerDefaultsProto
3840
encryptionKey *hdfs.DataEncryptionKeyProto
41+
42+
http *http.Client
3943
}
4044

4145
// ClientOptions represents the configurable options for a client.
@@ -203,7 +207,16 @@ func NewClient(options ClientOptions) (*Client, error) {
203207
return nil, err
204208
}
205209

206-
return &Client{namenode: namenode, options: options}, nil
210+
// We need cookies to access KMS (required for HDFS encrypted zone).
211+
jar, err := cookiejar.New(nil)
212+
if err != nil {
213+
return nil, errors.New("cant create cookie jar")
214+
}
215+
216+
// Not extending ClientOptions to preserve compatibility, so timeouts not configured.
217+
http := &http.Client{Jar: jar}
218+
219+
return &Client{namenode: namenode, options: options, http: http}, nil
207220
}
208221

209222
// New returns Client connected to the namenode(s) specified by address, or an

file_reader.go

+42
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ type FileReader struct {
2929
readdirLast string
3030

3131
closed bool
32+
33+
// encryption
34+
enc *TransparentEncryptionInfo
35+
}
36+
37+
// A TransparentEncryptionInfo is a key and iv to encrypt or decrypt file data
38+
type TransparentEncryptionInfo struct {
39+
key []byte
40+
iv []byte
3241
}
3342

3443
// Open returns an FileReader which can be used for reading.
@@ -38,11 +47,25 @@ func (c *Client) Open(name string) (*FileReader, error) {
3847
return nil, &os.PathError{"open", name, interpretException(err)}
3948
}
4049

50+
status, ok := info.Sys().(*FileStatus)
51+
if !ok {
52+
return nil, &os.PathError{"open", name, errors.New("internal error: fail to access file status")}
53+
}
54+
55+
var enc *TransparentEncryptionInfo
56+
if status.FileEncryptionInfo != nil {
57+
enc, err = c.kmsGetKey(status.FileEncryptionInfo)
58+
if err != nil {
59+
return nil, &os.PathError{"open", name, err}
60+
}
61+
}
62+
4163
return &FileReader{
4264
client: c,
4365
name: name,
4466
info: info,
4567
closed: false,
68+
enc: enc,
4669
}, nil
4770
}
4871

@@ -184,6 +207,25 @@ func (f *FileReader) Read(b []byte) (int, error) {
184207
return 0, io.ErrClosedPipe
185208
}
186209

210+
offset := f.offset
211+
n, err := f.readImpl(b)
212+
213+
// Decrypt data chunk if file from HDFS encrypted zone.
214+
if f.enc != nil && n > 0 {
215+
plaintext, err := aesCtrStep(offset, f.enc, b[:n])
216+
if err != nil {
217+
f.offset = offset
218+
return 0, err
219+
}
220+
for i := 0; i < n; i++ {
221+
b[i] = plaintext[i]
222+
}
223+
}
224+
225+
return n, err
226+
}
227+
228+
func (f *FileReader) readImpl(b []byte) (int, error) {
187229
if f.info.IsDir() {
188230
return 0, &os.PathError{
189231
"read",

file_writer.go

+61-7
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ type FileWriter struct {
2727
name string
2828
replication int
2929
blockSize int64
30+
offset int64
3031

3132
blockWriter *transfer.BlockWriter
3233
deadline time.Time
34+
35+
// Key and IV for transparent encryption support.
36+
enc *TransparentEncryptionInfo
3337
}
3438

3539
// Create opens a new file in HDFS with the default replication, block size,
@@ -62,13 +66,14 @@ func (c *Client) Create(name string) (*FileWriter, error) {
6266
// very important that Close is called after all data has been written.
6367
func (c *Client) CreateFile(name string, replication int, blockSize int64, perm os.FileMode) (*FileWriter, error) {
6468
createReq := &hdfs.CreateRequestProto{
65-
Src: proto.String(name),
66-
Masked: &hdfs.FsPermissionProto{Perm: proto.Uint32(uint32(perm))},
67-
ClientName: proto.String(c.namenode.ClientName),
68-
CreateFlag: proto.Uint32(1),
69-
CreateParent: proto.Bool(false),
70-
Replication: proto.Uint32(uint32(replication)),
71-
BlockSize: proto.Uint64(uint64(blockSize)),
69+
Src: proto.String(name),
70+
Masked: &hdfs.FsPermissionProto{Perm: proto.Uint32(uint32(perm))},
71+
ClientName: proto.String(c.namenode.ClientName),
72+
CreateFlag: proto.Uint32(1),
73+
CreateParent: proto.Bool(false),
74+
Replication: proto.Uint32(uint32(replication)),
75+
BlockSize: proto.Uint64(uint64(blockSize)),
76+
CryptoProtocolVersion: []hdfs.CryptoProtocolVersionProto{hdfs.CryptoProtocolVersionProto_ENCRYPTION_ZONES},
7277
}
7378
createResp := &hdfs.CreateResponseProto{}
7479

@@ -77,11 +82,20 @@ func (c *Client) CreateFile(name string, replication int, blockSize int64, perm
7782
return nil, &os.PathError{"create", name, interpretCreateException(err)}
7883
}
7984

85+
var enc *TransparentEncryptionInfo
86+
if createResp.GetFs().GetFileEncryptionInfo() != nil {
87+
enc, err = c.kmsGetKey(createResp.GetFs().GetFileEncryptionInfo())
88+
if err != nil {
89+
return nil, &os.PathError{"create", name, err}
90+
}
91+
}
92+
8093
return &FileWriter{
8194
client: c,
8295
name: name,
8396
replication: replication,
8497
blockSize: blockSize,
98+
enc: enc,
8599
}, nil
86100
}
87101

@@ -106,11 +120,21 @@ func (c *Client) Append(name string) (*FileWriter, error) {
106120
return nil, &os.PathError{"append", name, interpretException(err)}
107121
}
108122

123+
var enc *TransparentEncryptionInfo
124+
if appendResp.GetStat().GetFileEncryptionInfo() != nil {
125+
enc, err = c.kmsGetKey(appendResp.GetStat().GetFileEncryptionInfo())
126+
if err != nil {
127+
return nil, &os.PathError{"append", name, err}
128+
}
129+
}
130+
109131
f := &FileWriter{
110132
client: c,
111133
name: name,
112134
replication: int(appendResp.Stat.GetBlockReplication()),
113135
blockSize: int64(appendResp.Stat.GetBlocksize()),
136+
offset: int64(*appendResp.Stat.Length),
137+
enc: enc,
114138
}
115139

116140
// This returns nil if there are no blocks (it's an empty file) or if the
@@ -176,6 +200,28 @@ func (f *FileWriter) SetDeadline(t time.Time) error {
176200
// of this, it is important that Close is called after all data has been
177201
// written.
178202
func (f *FileWriter) Write(b []byte) (int, error) {
203+
// Encrypt data chunk if file in HDFS encrypted zone.
204+
if f.enc != nil && len(b) > 0 {
205+
var offset int
206+
for offset < len(b) {
207+
size := min(len(b)-offset, aesChunkSize)
208+
ciphertext, err := aesCtrStep(f.offset, f.enc, b[offset:offset+size])
209+
if err != nil {
210+
return offset, err
211+
}
212+
writtenSize, err := f.writeImpl(ciphertext)
213+
offset += writtenSize
214+
if err != nil {
215+
return offset, err
216+
}
217+
}
218+
return offset, nil
219+
} else {
220+
return f.writeImpl(b)
221+
}
222+
}
223+
224+
func (f *FileWriter) writeImpl(b []byte) (int, error) {
179225
if f.blockWriter == nil {
180226
err := f.startNewBlock()
181227
if err != nil {
@@ -187,6 +233,7 @@ func (f *FileWriter) Write(b []byte) (int, error) {
187233
for off < len(b) {
188234
n, err := f.blockWriter.Write(b[off:])
189235
off += n
236+
f.offset += int64(n)
190237
if err == transfer.ErrEndOfBlock {
191238
err = f.startNewBlock()
192239
}
@@ -316,3 +363,10 @@ func (f *FileWriter) finalizeBlock() error {
316363
f.blockWriter = nil
317364
return nil
318365
}
366+
367+
func min(a, b int) int {
368+
if a < b {
369+
return a
370+
}
371+
return b
372+
}

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ require (
1515
github.com/jcmturner/gofork v1.0.0 // indirect
1616
github.com/jcmturner/goidentity/v6 v6.0.1 // indirect
1717
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
18+
github.com/pkg/errors v0.9.1 // indirect
1819
github.com/pmezard/go-difflib v1.0.0 // indirect
1920
golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9 // indirect
2021
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZ
2323
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
2424
github.com/pborman/getopt v1.1.0 h1:eJ3aFZroQqq0bWmraivjQNt6Dmm5M0h2JcDW38/Azb0=
2525
github.com/pborman/getopt v1.1.0/go.mod h1:FxXoW1Re00sQG/+KIkuSqRL/LwQgSkv7uyac+STFsbk=
26+
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
27+
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
2628
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
2729
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
2830
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

0 commit comments

Comments
 (0)