Prometheus Operator support an auth proxy for Service Discovery
- 4 minutes read - 809 wordsCRD linting
Returning to yesterday’s failing tests, it’s unclear how to introspect the E2E tests.
kubectl get namespaces
NAME STATUS AGE
...
allns-s2os2u-0-90f56669 Active 22h
allns-s2qhuw-0-6b33d5eb Active 4m23s
kubectl get all \
--namespace=allns-s2os2u-0-90f56669
No resources found in allns-s2os2u-0-90f56669 namespace.
kubectl get all \
--namespace=allns-s2qhuw-0-6b33d5eb
NAME READY STATUS RESTARTS AGE
pod/prometheus-operator-6c96477b9c-q6qm2 1/1 Running 0 4m12s
pod/prometheus-operator-admission-webhook-68bc9f885-nq6r8 0/1 ImagePullBackOff 0 4m7s
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/prometheus-operator ClusterIP 10.152.183.247 <none> 443/TCP 4m9s
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/prometheus-operator 1/1 1 1 4m12s
deployment.apps/prometheus-operator-admission-webhook 0/1 1 0 4m7s
NAME DESIRED CURRENT READY AGE
replicaset.apps/prometheus-operator-6c96477b9c 1 1 1 4m13s
replicaset.apps/prometheus-operator-admission-webhook-68bc9f885 1 1 0 4m8s
kubectl logs deployment/prometheus-operator-admission-webhook \
--namespace=allns-s2qhuw-0-6b33d5eb
Error from server (BadRequest): container "prometheus-operator-admission-webhook" in pod "prometheus-operator-admission-webhook-68bc9f885-nq6r8" is waiting to start: trying and failing to pull image
NAME="prometheus-operator-admission-webhook"
FILTER="{.spec.template.spec.containers[?(@.name==\"${NAME}\")].image}"
kubectl get deployment/prometheus-operator-admission-webhook \
--namespace=allns-s2qjz2-0-fad82c03 \
--output=jsonpath="${FILTER}"
quay.io/prometheus-operator/admission-webhook:52d1e55af
Want:
localhost:32000/admission-webhook:52d1e55af
So, that’s the first problem, the admission-webhook
should also be in localhost:32000
not quay.io/prometheus-operator
So, it appears the only the operator image is passed to the tests:
go test \
-timeout 120m \
-v ./test/e2e/ \
$(TEST_RUN_ARGS) \
--kubeconfig=$(KUBECONFIG) \
--operator-image=$(IMAGE_OPERATOR):$(TAG) \
-count=1
So, I revised:
makefile
go test \
-timeout 120m \
-v ./test/e2e/ \
$(TEST_RUN_ARGS) \
--kubeconfig=$(KUBECONFIG) \
--operator-image=$(IMAGE_OPERATOR):$(TAG) \
--config-reloader=$(IMAGE_RELOADER):$(TAG) \
--admission-webhook=$(IMAGE_WEBHOOK):$(TAG) \
-count=1
main-test.go
var (
...
opImage *string
crImage *string
awImage *string
)
func TestMain(m *testing.M) {
...
opImage = flag.String(
"operator-image",
"",
"operator image, e.g. quay.io/prometheus-operator/prometheus-operator",
)
crImage = flag.String(
"config-reloader",
"",
"config-reloader image, e.g. quay.io/prometheus-operator/prometheus-config-reloader",
)
awImage = flag.String(
"admission-webhook",
"",
"admission-webhook image, e.g. quay.io/prometheus-operator/admission-webhook",
)
flag.Parse()
...
prevStableOpImage := fmt.Sprintf("%s:v%s",
"quay.io/prometheus-operator/prometheus-operator",
strings.TrimSpace(string(prevStableVersion)),
)
prevStableCRImage := fmt.Sprintf("%s:v%s",
"quay.io/prometheus-operator/prometheus-config-reloader",
strings.TrimSpace(string(prevStableVersion)),
)
prevStableAWImage := fmt.Sprintf("%s:v%s",
"quay.io/prometheus-operator/admission-webhook",
strings.TrimSpace(string(prevStableVersion)),
)
...
if framework, err = operatorFramework.New(
*kubeconfig,
*opImage,
*crImage,
*awImage,
exampleDir,
resourcesDir,
nextSemVer,
); err != nil {
logger.Printf("failed to setup framework: %v\n", err)
os.Exit(1)
}
framework.go
type Framework struct {
...
opImage string
crImage string
awImage string
...
}
func New(
kubeconfig,
opImage,
crImage,
awImage,
exampleDir,
resourcesDir string,
operatorVersion semver.Version,
) (*Framework, error) {
...
f := &Framework{
...
opImage: opImage,
crImage: crImage,
awImage: awImage,
...
}
}
func (f *Framework) CreateOrUpdatePrometheusOperator(
...
if f.opImage != "" {
parts := strings.Split(f.opImage, ":")
if len(parts) > 4 && len(parts) < 2 {
return nil, errors.Errorf(
"expected operator image '%v' split by colon to include the tag but got '%v'",
f.opImage,
parts,
)
}
// Override operator image used, if specified when running tests.
deploy.Spec.Template.Spec.Containers[0].Image = f.opImage
}
if f.crImage != "" {
parts := strings.Split(f.crImage, ":")
if len(parts) > 4 && len(parts) < 2 {
return nil, errors.Errorf(
"expected config-reloader image '%v' split by colon to include the tag but got '%v'",
f.crImage,
parts,
)
}
// Override Prometheus config reloader image
for i, arg := range deploy.Spec.Template.Spec.Containers[0].Args {
if strings.Contains(arg, "--prometheus-config-reloader=") {
deploy.Spec.Template.Spec.Containers[0].Args[i] = "--prometheus-config-reloader=" + f.crImage
}
}
}
var webhookServerImage string
if f.awImage != "" {
parts := strings.Split(f.awImage, ":")
if len(parts) > 4 && len(parts) < 2 {
return nil, errors.Errorf(
"expected admission-webhook image '%v' split by colon to include the tag but got '%v'",
f.awImage,
parts,
)
}
// Override admission webhook image
webhookServerImage = f.awImage
}
}
func (f *Framework) CreateOrUpdateAdmissionWebhookServer(
...
if image != "" {
// Override operator image used, if specified when running tests.
deploy.Spec.Template.Spec.Containers[0].Image = image
parts := strings.Split(image, ":")
if len(parts) > 4 && len(parts) < 2 {
return nil, nil, errors.Errorf(
"expected image '%v' split by colon to include the image tag but got '%v'",
image,
parts,
)
}
}
}
That got me further through the E2E tests but still failures:
alertmanager_test.go:64: alertmanager allns-x-amcreatedeletecluster-s2qk0c-0-e229d475/test failed to become available: context deadline exceeded: expected 3 replicas, got 2
alertmanager_test.go:308: alertmanager allns-x-amexposingwithkubernetesapi-s2qk8p-0-a86fee0c/test-alertmanager failed to become available: context deadline exceeded: expected 1 replicas, got 0
alertmanager_test.go:248: client rate limiter Wait returned an error: context deadline exceeded
alertmanager_test.go:564: alertmanager allns-x-amreloadconfig-s2qkzg-0-de59dc20/reload-config failed to become available: context deadline exceeded: expected Available condition to be 'True', got "False" (reason NoPodReady, "pod alertmanager-reload-config-0: containers with incomplete status: [init-config-reloader]")
alertmanager_test.go:354: waiting for service alertmanager-test to become ready timed out: requesting endpoints for service alertmanager-test failed: endpoints "alertmanager-test" not found
alertmanager_test.go:153: failed to update Alertmanager: alertmanager allns-x-amscaling-s2qlro-0-b131bbf6/test failed to become available: context deadline exceeded: expected 5 replicas, got 0
alertmanager_test.go:756: waiting for service alertmanager-webhook to become ready timed out: requesting endpoints for service alertmanager-webhook failed: endpoints "alertmanager-webhook" not found
alertmanager_test.go:2140: Operation cannot be fulfilled on statefulsets.apps "alertmanager-test": the object has been modified; please apply your changes to the latest version and try again
alertmanager_test.go:2572: alertmanager allns-x-alertmanagercrd-time-in-milli-seconds-s2qm7o-0-7ca9d0a1/test failed to become available: context deadline exceeded: expected 1 replicas, got 0
alertmanager_test.go:2572: alertmanager allns-x-alertmanagercrd-time-in-seconds-s2qm83-0-b4484f85/test failed to become available: context deadline exceeded: expected Available condition to be 'True', got "False" (reason NoPodReady, "pod alertmanager-test-0: containers with incomplete status: [init-config-reloader]")
alertmanager_test.go:2090: failed to update Alertmanager: alertmanager allns-x-ampreserveuseraddedmetadata-s2qm7o-0-d744e1ad/test failed to become available: context deadline exceeded: expected 2 replicas, got 0
&c.
So, clearly there’s an issue with the the AlertManager configuration. Tomorrow…