Skip to content

Transparent encryption #281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/scripts/fixtures.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,13 @@ set -e
hadoop fs -mkdir -p "/_test"
hadoop fs -chmod 777 "/_test"

if [ "$TRANSPARENT_ENCRYPTION" = "true" ]; then
echo "Prepare encrypted zone"
hadoop fs -mkdir /_test/kms
hadoop fs -chmod 777 "/_test/kms"
hadoop key create key1
hdfs crypto -createZone -keyName key1 -path /_test/kms
fi

hadoop fs -put ./testdata/foo.txt "/_test/foo.txt"
hadoop fs -Ddfs.block.size=1048576 -put ./testdata/mobydick.txt "/_test/mobydick.txt"
62 changes: 58 additions & 4 deletions .github/scripts/install-hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e

KERBEROS=${KERBEROS-"false"}
KERBEROS="${KERBEROS-false}"
AES=${AES-"false"}
if [ "$DATA_TRANSFER_PROTECTION" = "privacy" ]; then
KERBEROS="true"
Expand All @@ -15,11 +15,18 @@ else
ENCRYPT_DATA_TRANSFER="false"
fi

CONF_KMS_PROVIDER=""
TRANSPARENT_ENCRYPTION=false
if [ "$HADOOP_VERSION" != "2.10.1" ]; then
TRANSPARENT_ENCRYPTION=true
CONF_KMS_PROVIDER="kms://http@localhost:9600/kms"
fi

CONF_AUTHENTICATION="simple"
KERBEROS_REALM="EXAMPLE.COM"
KERBEROS_PRINCIPLE="administrator"
KERBEROS_PASSWORD="password1234"
if [ $KERBEROS = "true" ]; then
if [ "$KERBEROS" = "true" ]; then
CONF_AUTHENTICATION="kerberos"

HOSTNAME=$(hostname)
Expand Down Expand Up @@ -50,13 +57,16 @@ EOF
sudo apt-get install -y krb5-user krb5-kdc krb5-admin-server

printf "$KERBEROS_PASSWORD\n$KERBEROS_PASSWORD" | sudo kdb5_util -r "$KERBEROS_REALM" create -s
for p in nn dn $USER gohdfs1 gohdfs2; do
for p in nn dn kms $USER gohdfs1 gohdfs2; do
sudo kadmin.local -q "addprinc -randkey $p/$HOSTNAME@$KERBEROS_REALM"
sudo kadmin.local -q "addprinc -randkey $p/localhost@$KERBEROS_REALM"
sudo kadmin.local -q "xst -k /tmp/$p.keytab $p/$HOSTNAME@$KERBEROS_REALM"
sudo kadmin.local -q "xst -k /tmp/$p.keytab $p/localhost@$KERBEROS_REALM"
sudo chmod +rx /tmp/$p.keytab
done
# HTTP service for KMS
sudo kadmin.local -q "addprinc -randkey HTTP/localhost@$KERBEROS_REALM"
sudo kadmin.local -q "xst -k /tmp/kms.keytab HTTP/localhost@$KERBEROS_REALM"

echo "Restarting krb services..."
sudo service krb5-kdc restart
Expand Down Expand Up @@ -116,6 +126,10 @@ sudo tee $HADOOP_ROOT/etc/hadoop/core-site.xml <<EOF
<name>hadoop.rpc.protection</name>
<value>$RPC_PROTECTION</value>
</property>
<property>
<name>hadoop.security.key.provider.path</name>
<value>$CONF_KMS_PROVIDER</value>
</property>
</configuration>
EOF

Expand All @@ -125,6 +139,10 @@ sudo tee $HADOOP_ROOT/etc/hadoop/hdfs-site.xml <<EOF
<name>dfs.namenode.name.dir</name>
<value>/tmp/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.fs-limits.min-block-size</name>
<value>131072</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/tmp/hdfs/data</value>
Expand Down Expand Up @@ -172,6 +190,41 @@ $HADOOP_ROOT/bin/hdfs namenode -format
sudo groupadd hadoop
sudo usermod -a -G hadoop $USER

sudo tee $HADOOP_ROOT/etc/hadoop/kms-site.xml <<EOF
<configuration>
<property>
<name>hadoop.kms.key.provider.uri</name>
<value>jceks://file@/tmp/hdfs/kms.keystore</value>
</property>
<property>
<name>hadoop.security.keystore.java-keystore-provider.password-file</name>
<value>kms.keystore.password</value>
</property>
<property>
<name>hadoop.kms.authentication.type</name>
<value>$CONF_AUTHENTICATION</value>
</property>
<property>
<name>hadoop.kms.authentication.kerberos.keytab</name>
<value>/tmp/kms.keytab</value>
</property>
<property>
<name>hadoop.kms.authentication.kerberos.principal</name>
<value>HTTP/localhost@$KERBEROS_REALM</value>
</property>
</configuration>
EOF

sudo tee $HADOOP_ROOT/etc/hadoop/kms.keystore.password <<EOF
123456
EOF

if [ "$TRANSPARENT_ENCRYPTION" = "true" ]; then
echo "Starting KMS..."
rm $HADOOP_ROOT/etc/hadoop/kms-log4j.properties
$HADOOP_ROOT/bin/hadoop kms > /tmp/hdfs/kms.log 2>&1 &
fi

echo "Starting namenode..."
$HADOOP_ROOT/bin/hdfs namenode > /tmp/hdfs/namenode.log 2>&1 &

Expand All @@ -184,4 +237,5 @@ echo "Waiting for cluster to exit safe mode..."
$HADOOP_ROOT/bin/hdfs dfsadmin -safemode wait

echo "HADOOP_CONF_DIR=$(pwd)/$HADOOP_ROOT/etc/hadoop" >> $GITHUB_ENV
echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH
echo "TRANSPARENT_ENCRYPTION=$TRANSPARENT_ENCRYPTION" >> $GITHUB_ENV
echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH
14 changes: 12 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ jobs:
go-version: 1.17

# This step installs downloads hadoop and starts a local cluster with one
# namenode and one datanode. It adds the hadoop binaries to GITHUB_PATH
# and HADOOP_CONF_DIR to GITHUB_ENV.
# namenode and one datanode. It adds the hadoop binaries to GITHUB_PATH,
# TRANSPARENT_ENCRYPTION and HADOOP_CONF_DIR to GITHUB_ENV.
- name: install-hdfs.sh
run: ./.github/scripts/install-hdfs.sh
env:
Expand All @@ -65,6 +65,16 @@ jobs:
run: |
make test

- name: cat kms.log
if: always()
run: |
if [ -f /tmp/hdfs/kms.log ]
then
cat /tmp/hdfs/kms.log
else
echo "not exists"
fi

- name: cat namenode.log
if: always()
run: cat /tmp/hdfs/namenode.log
Expand Down
58 changes: 58 additions & 0 deletions aes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package hdfs

import (
"crypto/aes"
"crypto/cipher"
"encoding/binary"
"fmt"
)

// calculateIV `shifts` IV to given offset
// based on calculateIV from AesCtrCryptoCodec.java
func calculateIV(offset int64, initIV []byte) ([]byte, error) {
if len(initIV) != aes.BlockSize {
return nil, fmt.Errorf("calculateIV: invalid iv size: %v", len(initIV))
}

counter := offset / aes.BlockSize
iv := make([]byte, aes.BlockSize)

high := binary.BigEndian.Uint64(initIV[:8])
low := binary.BigEndian.Uint64(initIV[8:])
origLow := low

low += uint64(counter)
if low < origLow { // wrap
high += 1
}

binary.BigEndian.PutUint64(iv, high)
binary.BigEndian.PutUint64(iv[8:], low)

return iv, nil
}

// aesCreateCTRStream create stream to encrypt/decrypt data from specific offset
func aesCreateCTRStream(offset int64, enc *transparentEncryptionInfo) (cipher.Stream, error) {
iv, err := calculateIV(offset, enc.iv)
if err != nil {
return nil, err
}

if enc.cipher == nil {
cipher, err := aes.NewCipher(enc.key)
if err != nil {
return nil, err
}
enc.cipher = cipher
}

stream := cipher.NewCTR(enc.cipher, iv)

padding := offset % aes.BlockSize
if padding > 0 {
tmp := make([]byte, padding)
stream.XORKeyStream(tmp, tmp)
}
return stream, nil
}
69 changes: 69 additions & 0 deletions aes_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package hdfs

import (
"bytes"
"crypto/cipher"
"testing"

"github.com/stretchr/testify/assert"
)

// aesCtrRead perform AES-CTR XOR operation on given byte string.
// Once encryption and decryption are exactly the same operation for CTR mode,
// this function can be used to perform both.
func aesCtrStep(offset int64, enc *transparentEncryptionInfo, b []byte) ([]byte, error) {
stream, err := aesCreateCTRStream(offset, enc)
if err != nil {
return nil, err
}

r := make([]byte, len(b))
_, err = cipher.StreamReader{S: stream, R: bytes.NewReader(b)}.Read(r)
if err != nil {
return nil, err
}
return r, nil
}

func TestAesIV(t *testing.T) {
originalText := []byte("some random plain text, nice to have it quite long")
key := []byte("0123456789abcdef")

// Choose iv to hit counter overflow.
iv := []byte("\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xf5")
enc := &transparentEncryptionInfo{iv: iv, key: key}

// Ensure that we can decrypt text after encryption.
// In CTR mode, implementation for `encrypt` and `decrypt` actually the same
// since we just XOR on input.
encryptedText, err := aesCtrStep(0, enc, originalText)
assert.Equal(t, err, nil)
decryptedText, err := aesCtrStep(0, enc, encryptedText)
assert.Equal(t, err, nil)
assert.Equal(t, originalText, decryptedText)

// CTR mode allow us to encrypt/decrypt string by chunks
// (using correct offset from start of string).
// Ensure that result equal to one, produced in one step.
encryptedByChunks := make([]byte, 0)
var pos int64 = 0
for _, x := range []int{5, 7, 6, 4, 28} {
tmp, err := aesCtrStep(pos, enc, originalText[pos:pos+int64(x)])
assert.Equal(t, err, nil)
encryptedByChunks = append(encryptedByChunks, tmp...)
pos += int64(x)
}
assert.Equal(t, encryptedByChunks, encryptedText)

// Decrypt string by chunks.
// Ensure that result equal to one, produced in one step.
decryptedByChunks := make([]byte, 0)
pos = 0
for _, x := range []int{5, 7, 6, 4, 28} {
tmp, err := aesCtrStep(pos, enc, encryptedText[pos:pos+int64(x)])
assert.Equal(t, err, nil)
decryptedByChunks = append(decryptedByChunks, tmp...)
pos += int64(x)
}
assert.Equal(t, decryptedByChunks, decryptedText)
}
15 changes: 14 additions & 1 deletion client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"io"
"io/ioutil"
"net"
"net/http"
"net/http/cookiejar"
"os"
"os/user"
"sort"
Expand Down Expand Up @@ -36,6 +38,8 @@ type Client struct {

defaults *hdfs.FsServerDefaultsProto
encryptionKey *hdfs.DataEncryptionKeyProto

http *http.Client
}

// ClientOptions represents the configurable options for a client.
Expand Down Expand Up @@ -203,7 +207,16 @@ func NewClient(options ClientOptions) (*Client, error) {
return nil, err
}

return &Client{namenode: namenode, options: options}, nil
// We need cookies to access KMS (required for HDFS encrypted zone).
jar, err := cookiejar.New(nil)
if err != nil {
return nil, errors.New("cant create cookie jar")
}

// Not extending ClientOptions to preserve compatibility, so timeouts not configured.
http := &http.Client{Jar: jar}

return &Client{namenode: namenode, options: options, http: http}, nil
}

// New returns Client connected to the namenode(s) specified by address, or an
Expand Down
1 change: 1 addition & 0 deletions cmd/hdfs/test/helper.bash
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

export HADOOP_FS=${HADOOP_FS-"hadoop fs"}
export HADOOP_KEY=${HADOOP_KEY-"hadoop key"}
export ROOT_TEST_DIR="$BATS_TEST_DIRNAME/../../.."
export HDFS="$ROOT_TEST_DIR/hdfs"

Expand Down
Loading