Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCS backend: Detect and force-unlock stale locks. #17470

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
36 changes: 34 additions & 2 deletions backend/remote-state/gcs/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"os"
"strings"
"time"

"cloud.google.com/go/storage"
"github.com/hashicorp/terraform/backend"
Expand Down Expand Up @@ -35,6 +36,13 @@ type Backend struct {

projectID string
region string

// Time between consecutive heartbeats on the lock file.
lockHeartbeatInterval time.Duration

// The mininum duration that must have passed since the youngest
// recorded heartbeat before the lock file is considered stale/orphaned.
lockStaleAfter time.Duration
}

func New() backend.Backend {
Expand Down Expand Up @@ -88,6 +96,20 @@ func New() backend.Backend {
Description: "Region / location in which to create the bucket",
Default: "",
},

"lock_heartbeat_interval": {
Type: schema.TypeString,
Optional: true,
Description: "Time between consecutive heartbeats on the lock file as a duration string (cf. https://golang.org/pkg/time/#ParseDuration).",
Default: "1m",
},

"lock_stale_after": {
Type: schema.TypeString,
Optional: true,
Description: "Mininum duration (cf. https://golang.org/pkg/time/#ParseDuration) that must have passed since the youngest recorded heartbeat before the lock file is considered stale/orphaned.",
Default: "15m",
},
},
}

Expand Down Expand Up @@ -147,13 +169,13 @@ func (b *Backend) configure(ctx context.Context) error {
conf := jwt.Config{
Email: account.ClientEmail,
PrivateKey: []byte(account.PrivateKey),
Scopes: []string{storage.ScopeReadWrite},
Scopes: []string{storage.ScopeFullControl},
TokenURL: "https://accounts.google.com/o/oauth2/token",
}

opts = append(opts, option.WithHTTPClient(conf.Client(ctx)))
} else {
opts = append(opts, option.WithScopes(storage.ScopeReadWrite))
opts = append(opts, option.WithScopes(storage.ScopeFullControl))
}

opts = append(opts, option.WithUserAgent(httpclient.UserAgentString()))
Expand Down Expand Up @@ -188,6 +210,16 @@ func (b *Backend) configure(ctx context.Context) error {
b.encryptionKey = k
}

b.lockHeartbeatInterval, err = time.ParseDuration(data.Get("lock_heartbeat_interval").(string))
if err != nil {
return fmt.Errorf("Error parsing lock_heartbeat_interval: %s", err)
}

b.lockStaleAfter, err = time.ParseDuration(data.Get("lock_stale_after").(string))
if err != nil {
return fmt.Errorf("Error parsing lock_stale_after: %s", err)
}

return nil
}

Expand Down
14 changes: 8 additions & 6 deletions backend/remote-state/gcs/backend_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ func (b *Backend) client(name string) (*remoteClient, error) {
}

return &remoteClient{
storageContext: b.storageContext,
storageClient: b.storageClient,
bucketName: b.bucketName,
stateFilePath: b.stateFile(name),
lockFilePath: b.lockFile(name),
encryptionKey: b.encryptionKey,
storageContext: b.storageContext,
storageClient: b.storageClient,
bucketName: b.bucketName,
stateFilePath: b.stateFile(name),
lockFilePath: b.lockFile(name),
encryptionKey: b.encryptionKey,
lockHeartbeatInterval: b.lockHeartbeatInterval,
lockStaleAfter: b.lockStaleAfter,
}, nil
}

Expand Down
69 changes: 69 additions & 0 deletions backend/remote-state/gcs/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"cloud.google.com/go/storage"
"github.com/hashicorp/terraform/backend"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/state/remote"
)

Expand Down Expand Up @@ -139,6 +140,7 @@ func TestBackend(t *testing.T) {
backend.TestBackendStates(t, be0)
backend.TestBackendStateLocks(t, be0, be1)
backend.TestBackendStateForceUnlock(t, be0, be1)
testStaleLocks(t, be0, be1)
}

func TestBackendWithPrefix(t *testing.T) {
Expand All @@ -154,7 +156,9 @@ func TestBackendWithPrefix(t *testing.T) {

backend.TestBackendStates(t, be0)
backend.TestBackendStateLocks(t, be0, be1)
testStaleLocks(t, be0, be1)
}

func TestBackendWithEncryption(t *testing.T) {
t.Parallel()

Expand All @@ -167,6 +171,71 @@ func TestBackendWithEncryption(t *testing.T) {

backend.TestBackendStates(t, be0)
backend.TestBackendStateLocks(t, be0, be1)
testStaleLocks(t, be0, be1)
}

func testStaleLocks(t *testing.T, b1, b2 backend.Backend) {
t.Helper()

// Get the default state for each
b1StateMgr, err := b1.StateMgr(backend.DefaultStateName)
if err != nil {
t.Fatalf("error: %s", err)
}
if err := b1StateMgr.RefreshState(); err != nil {
t.Fatalf("bad: %s", err)
}

b2StateMgr, err := b2.StateMgr(backend.DefaultStateName)
if err != nil {
t.Fatalf("error: %s", err)
}
if err := b2StateMgr.RefreshState(); err != nil {
t.Fatalf("bad: %s", err)
}

// Reassign so its obvious whats happening
lockerA := b1StateMgr.(state.Locker)
lockerB := b2StateMgr.(state.Locker)

infoA := state.NewLockInfo()
infoA.Operation = "test"
infoA.Who = "clientA"

infoB := state.NewLockInfo()
infoB.Operation = "test"
infoB.Who = "clientB"

// For faster tests, reduce the duration until the lock is considered stale.
lockerB.(*remote.State).Client.(*remoteClient).lockHeartbeatInterval = 5 * time.Second
lockerB.(*remote.State).Client.(*remoteClient).lockStaleAfter = 20 * time.Second

lockIDA, err := lockerA.Lock(infoA)
if err != nil {
t.Fatal("unable to get initial lock:", err)
}

// Stop heartbeating on the lock file. It will be considered stale after lockStaleAfter.
lockerA.(*remote.State).Client.(*remoteClient).stopHeartbeatCh <- true

// Lock is still held by A after 10 seconds.
time.Sleep(10 * time.Second)
_, err = lockerB.Lock(infoB)
if err == nil {
lockerA.Unlock(lockIDA)
t.Fatal("client B obtained lock while held by client A")
}

// Wait a bit longer, and the lock will become stale.
time.Sleep(20 * time.Second)
lockIDB, err := lockerB.Lock(infoB)
if err != nil {
t.Fatal("client B failed to obtain lock that was previously held by client A but that went stale")
}

if err := lockerB.Unlock(lockIDB); err != nil {
t.Fatal("error unlocking client B", err)
}
}

// setupBackend returns a new GCS backend.
Expand Down
Loading