Skip to content

Commit 1505efe

Browse files
committed
[CASCL-1386] Implement Karpenter user NodePool eviction
Part of a stack splitting #3026 (too large to review in one piece) into small pieces that each build and pass tests on their own. The command is fully functional only once the whole stack lands.
1 parent 4e1f9b8 commit 1505efe

2 files changed

Lines changed: 49 additions & 1 deletion

File tree

cmd/kubectl-datadog/autoscaling/cluster/evict/karpenter_user.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,25 @@ package evict
22

33
import (
44
"context"
5+
"errors"
6+
"fmt"
7+
"log"
58

69
"k8s.io/client-go/kubernetes"
710
)
811

912
func evictKarpenterUserNodePool(ctx context.Context, clientset kubernetes.Interface, nodePoolName string, nodes []string, drainOpts nodeDrainOptions) error {
10-
panic("TODO: evictKarpenterUserNodePool — implemented in PR https://github.com/DataDog/datadog-operator/pull/3177")
13+
// Cordon every node up front so a pod evicted from one node is never
14+
// rescheduled onto another node of the same NodePool that is itself about
15+
// to be drained.
16+
cordoned, errs := cordonNodes(ctx, clientset, nodes, drainOpts.DryRun)
17+
for _, node := range cordoned {
18+
if err := drainNode(ctx, clientset, node.Name, drainOpts); err != nil {
19+
errs = append(errs, fmt.Errorf("drain node %s: %w", node.Name, err))
20+
}
21+
}
22+
if !drainOpts.DryRun && len(errs) == 0 {
23+
log.Printf("Drained %d node(s) from user NodePool %s; Karpenter will terminate their NodeClaims once empty.", len(cordoned), nodePoolName)
24+
}
25+
return errors.Join(errs...)
1126
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package evict
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/stretchr/testify/require"
8+
corev1 "k8s.io/api/core/v1"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/client-go/kubernetes/fake"
11+
)
12+
13+
func TestEvictKarpenterUserNodePool(t *testing.T) {
14+
for _, tc := range []struct {
15+
name string
16+
dryRun bool
17+
wantUnschedulable bool
18+
}{
19+
{name: "cordons and drains", dryRun: false, wantUnschedulable: true},
20+
{name: "dry-run touches nothing", dryRun: true, wantUnschedulable: false},
21+
} {
22+
t.Run(tc.name, func(t *testing.T) {
23+
node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}}
24+
client := fake.NewClientset(node)
25+
26+
require.NoError(t, evictKarpenterUserNodePool(t.Context(), client, "user-np", []string{"n1"}, newDrainOpts(tc.dryRun)))
27+
28+
got, err := client.CoreV1().Nodes().Get(t.Context(), "n1", metav1.GetOptions{})
29+
require.NoError(t, err)
30+
assert.Equal(t, tc.wantUnschedulable, got.Spec.Unschedulable)
31+
})
32+
}
33+
}

0 commit comments

Comments
 (0)