Skip to content

Commit c9ae52e

Browse files
committed
add E2E test verifying OTEL tracing across Zenko services
- Deploy Jaeger all-in-one (memory-only, OTLP-enabled) in the kind CI cluster alongside existing dependencies - Patch the Zenko CR with `spec.otel` (enabled, sampling ratio 1.0) so every request is traced during CI — also acts as a smoke test that OTEL doesn't break existing @premerge tests - Add a new @premerge CTST scenario that puts an S3 object and then polls the Jaeger query API to assert a trace exists with spans from both cloudserver and vault Issue: ZENKO-5258
1 parent 1ff3dcb commit c9ae52e

6 files changed

Lines changed: 224 additions & 1 deletion

File tree

.github/scripts/end2end/configure-e2e-ctst.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,19 @@ kubectl run kafka-topics \
111111
# Deploy PyKMIP server (infra only, does NOT patch the CR).
112112
# The CR is patched later, after file-backend SSE tests have run.
113113
bash "$(dirname "$0")/../mocks/setup-kmip.sh"
114+
115+
# Enable OTEL tracing on the Zenko CR (always-on in CI, acts as a smoke test)
116+
NAMESPACE="${NAMESPACE:-default}"
117+
kubectl patch zenko ${ZENKO_NAME} -n ${NAMESPACE} --type merge -p '{
118+
"spec": {"otel": {
119+
"enabled": true,
120+
"samplingRatio": "1.0",
121+
"tracesEndpoint": "http://jaeger.default.svc.cluster.local:4318/v1/traces"
122+
}}
123+
}'
124+
# Wait for the operator to reconcile and roll pods with new OTEL env vars
125+
kubectl wait --for condition=DeploymentInProgress=true --timeout 10m zenko/${ZENKO_NAME} -n ${NAMESPACE}
126+
kubectl wait --for condition=DeploymentFailure=false --timeout 10m zenko/${ZENKO_NAME} -n ${NAMESPACE}
127+
kubectl wait --for condition=DeploymentInProgress=false --timeout 10m zenko/${ZENKO_NAME} -n ${NAMESPACE}
128+
kubectl rollout status deployment -l app.kubernetes.io/name=connector-cloudserver,app.kubernetes.io/instance=${ZENKO_NAME} -n ${NAMESPACE} --timeout=5m
129+
kubectl rollout status deployment -l app.kubernetes.io/name=connector-vault,app.kubernetes.io/instance=${ZENKO_NAME} -n ${NAMESPACE} --timeout=5m

.github/scripts/end2end/install-kind-dependencies.sh

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,67 @@ helm upgrade --install --version ${KEYCLOAK_VERSION} keycloak codecentric/keyclo
153153

154154
kubectl rollout status sts/keycloak --timeout=10m
155155

156+
# jaeger all-in-one (OTLP collector + query UI, memory-only)
157+
kubectl apply -f - <<'EOF'
158+
apiVersion: apps/v1
159+
kind: Deployment
160+
metadata:
161+
name: jaeger
162+
namespace: default
163+
labels:
164+
app: jaeger
165+
spec:
166+
replicas: 1
167+
selector:
168+
matchLabels:
169+
app: jaeger
170+
template:
171+
metadata:
172+
labels:
173+
app: jaeger
174+
spec:
175+
containers:
176+
- name: jaeger
177+
image: jaegertracing/all-in-one:1.76.0
178+
env:
179+
- name: COLLECTOR_OTLP_ENABLED
180+
value: "true"
181+
- name: MEMORY_MAX_TRACES
182+
value: "10000"
183+
ports:
184+
- containerPort: 16686
185+
name: query
186+
- containerPort: 4317
187+
name: otlp-grpc
188+
- containerPort: 4318
189+
name: otlp-http
190+
readinessProbe:
191+
httpGet:
192+
path: /
193+
port: 16686
194+
initialDelaySeconds: 5
195+
periodSeconds: 5
196+
---
197+
apiVersion: v1
198+
kind: Service
199+
metadata:
200+
name: jaeger
201+
namespace: default
202+
spec:
203+
selector:
204+
app: jaeger
205+
ports:
206+
- name: query
207+
port: 16686
208+
targetPort: 16686
209+
- name: otlp-grpc
210+
port: 4317
211+
targetPort: 4317
212+
- name: otlp-http
213+
port: 4318
214+
targetPort: 4318
215+
EOF
216+
kubectl rollout status deployment/jaeger --timeout=5m
156217

157218
# TODO: use zenko-operator install-deps
158219
kubectl apply -f - <<EOF

.github/scripts/end2end/setup-e2e-env.sh

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,16 @@ else
235235
fi
236236
export PROMETHEUS_SERVICE="${PROMETHEUS_SVC}.${NAMESPACE}.svc.cluster.local"
237237

238+
# Jaeger query API — port-forward for OTEL tracing tests
239+
JAEGER_QUERY_PORT=16686
240+
if ! ss -tlnp 2>/dev/null | grep -q ":${JAEGER_QUERY_PORT}" && \
241+
! lsof -i ":${JAEGER_QUERY_PORT}" &>/dev/null; then
242+
kubectl port-forward "svc/jaeger" "${JAEGER_QUERY_PORT}:${JAEGER_QUERY_PORT}" &>/dev/null &
243+
_JAEGER_PF_PID=$!
244+
timeout 10 bash -c "until ss -tlnp 2>/dev/null | grep -q ':${JAEGER_QUERY_PORT}'; do sleep 0.2; done"
245+
fi
246+
export JAEGER_QUERY_ENDPOINT="http://localhost:${JAEGER_QUERY_PORT}"
247+
238248
# --- 14. Zenko CR metadata ---
239249
export TIME_PROGRESSION_FACTOR=$(kubectl get zenko ${ZENKO_NAME} -o jsonpath="{.metadata.annotations.zenko\.io/time-progression-factor}")
240250
export INSTANCE_ID=$(kubectl get zenko ${ZENKO_NAME} -o jsonpath='{.status.instanceID}')
@@ -338,7 +348,8 @@ else
338348
"DRAdminSecretKey":"${ADMIN_PRA_SECRET_ACCESS_KEY}",
339349
"UtilizationServiceHost":"${UTILIZATION_SERVICE_HOST}",
340350
"UtilizationServicePort":"${UTILIZATION_SERVICE_PORT}",
341-
"KubeconfigPath":"${KUBECONFIG:-${HOME}/.kube/config}"
351+
"KubeconfigPath":"${KUBECONFIG:-${HOME}/.kube/config}",
352+
"JaegerQueryEndpoint":"${JAEGER_QUERY_ENDPOINT}"
342353
}
343354
EOF
344355
)"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
@2.15.0
2+
@PreMerge
3+
Feature: OpenTelemetry Tracing
4+
Traces should propagate across Zenko services
5+
6+
Scenario: S3 PutObject produces a trace spanning cloudserver and vault
7+
Given a "Non versioned" bucket
8+
And an object "otel-test-object" that "exists"
9+
Then a trace should exist in Jaeger for service "cloudserver"
10+
And the trace should contain spans from service "vault"
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import { Then } from '@cucumber/cucumber';
2+
import { strict as assert } from 'assert';
3+
import { Utils } from 'cli-testing';
4+
import Zenko from 'world/Zenko';
5+
6+
const JAEGER_POLL_TIMEOUT = 30000;
7+
const JAEGER_POLL_INTERVAL = 2000;
8+
9+
interface JaegerProcess {
10+
serviceName: string;
11+
tags: { key: string; value: string }[];
12+
}
13+
14+
interface JaegerSpan {
15+
traceID: string;
16+
spanID: string;
17+
operationName: string;
18+
processID: string;
19+
tags: { key: string; type: string; value: unknown }[];
20+
}
21+
22+
interface JaegerTrace {
23+
traceID: string;
24+
spans: JaegerSpan[];
25+
processes: Record<string, JaegerProcess>;
26+
}
27+
28+
interface JaegerSearchResponse {
29+
data: JaegerTrace[];
30+
}
31+
32+
async function pollJaegerForTraces(
33+
endpoint: string,
34+
service: string,
35+
bucketName: string,
36+
timeoutMs = JAEGER_POLL_TIMEOUT,
37+
intervalMs = JAEGER_POLL_INTERVAL,
38+
): Promise<JaegerTrace[]> {
39+
const deadline = Date.now() + timeoutMs;
40+
let lastError: Error | null = null;
41+
42+
while (Date.now() < deadline) {
43+
try {
44+
const url = `${endpoint}/api/traces?service=${service}&lookback=1m&limit=100`;
45+
const response = await fetch(url, {
46+
signal: AbortSignal.timeout(5000),
47+
});
48+
if (!response.ok) {
49+
throw new Error(`Jaeger query returned HTTP ${response.status}`);
50+
}
51+
const body = await response.json() as JaegerSearchResponse;
52+
const matching = (body.data || []).filter(trace => traceMatchesBucket(trace, bucketName));
53+
if (matching.length > 0) {
54+
return matching;
55+
}
56+
} catch (err) {
57+
lastError = err as Error;
58+
}
59+
await Utils.sleep(intervalMs);
60+
}
61+
62+
throw new Error(
63+
`pollJaegerForTraces timed out after ${timeoutMs}ms waiting for traces ` +
64+
`from service "${service}" referencing bucket "${bucketName}"` +
65+
`${lastError ? `: ${lastError.message}` : ''}`,
66+
);
67+
}
68+
69+
function traceMatchesBucket(trace: JaegerTrace, bucketName: string): boolean {
70+
return trace.spans.some(span =>
71+
span.tags.some(tag =>
72+
typeof tag.value === 'string' && tag.value.includes(bucketName),
73+
),
74+
);
75+
}
76+
77+
function findPutObjectTrace(traces: JaegerTrace[]): JaegerTrace | undefined {
78+
return traces.find(trace =>
79+
trace.spans.some(span => span.operationName === 'api.object_put'),
80+
);
81+
}
82+
83+
function traceHasServiceSpans(trace: JaegerTrace, serviceName: string): boolean {
84+
const processIds = Object.entries(trace.processes)
85+
.filter(([, proc]) => proc.serviceName === serviceName)
86+
.map(([id]) => id);
87+
88+
return trace.spans.some(span => processIds.includes(span.processID));
89+
}
90+
91+
Then('a trace should exist in Jaeger for service {string}',
92+
{ timeout: JAEGER_POLL_TIMEOUT + 10000 },
93+
async function (this: Zenko, service: string) {
94+
const endpoint = this.parameters.JaegerQueryEndpoint;
95+
assert.ok(endpoint, 'JaegerQueryEndpoint is not configured in world parameters');
96+
97+
const bucketName = this.getSaved<string>('bucketName');
98+
assert.ok(bucketName, 'No bucketName saved from a previous step');
99+
100+
const traces = await pollJaegerForTraces(endpoint, service, bucketName);
101+
const trace = findPutObjectTrace(traces);
102+
assert.ok(trace,
103+
`No trace with api.object_put operation found among ${traces.length} traces from ` +
104+
`service "${service}" referencing bucket "${bucketName}"`,
105+
);
106+
107+
this.addToSaved('jaegerTrace', trace);
108+
},
109+
);
110+
111+
Then('the trace should contain spans from service {string}',
112+
async function (this: Zenko, service: string) {
113+
const trace = this.getSaved<JaegerTrace>('jaegerTrace');
114+
assert.ok(trace, 'No trace saved from the previous step');
115+
116+
assert.ok(
117+
traceHasServiceSpans(trace, service),
118+
`Trace ${trace.traceID} does not contain spans from service "${service}". ` +
119+
`Services in trace: ${[...new Set(
120+
Object.values(trace.processes).map(p => p.serviceName),
121+
)].join(', ')}`,
122+
);
123+
},
124+
);

tests/functional/ctst/world/Zenko.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ export interface ZenkoWorldParameters extends ClientOptions {
103103
SorbetdRestoreTimeout: string;
104104
UtilizationServiceHost: string;
105105
UtilizationServicePort: string;
106+
JaegerQueryEndpoint: string;
106107
[key: string]: unknown;
107108
}
108109

0 commit comments

Comments
 (0)