Skip to content

Commit 4056ba3

Browse files
csvirimetacosm
andauthored
feat: error handler improvements (#1033)
* feat: adding context to error handler; using exception instead runtime exception * fix: error handling compilation issue * feat: added error status update control * Update operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java Co-authored-by: Chris Laprun <[email protected]> * fix: improvements on error handling * fix: exception wrapping * fix: addind missing override * fix: unit tests * docs: update Co-authored-by: Chris Laprun <[email protected]>
1 parent dd6e19d commit 4056ba3

File tree

18 files changed

+230
-114
lines changed

18 files changed

+230
-114
lines changed

docs/documentation/features.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,9 @@ In order to facilitate error reporting Reconciler can implement the following
275275
[interface](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java):
276276

277277
```java
278-
public interface ErrorStatusHandler<T extends HasMetadata> {
278+
public interface ErrorStatusHandler<P extends HasMetadata> {
279279

280-
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
280+
ErrorStatusUpdateControl<P> updateErrorStatus(P resource, Context<P> context, Exception e);
281281

282282
}
283283
```
@@ -294,6 +294,12 @@ will also produce an event, and will result in a reconciliation if the controlle
294294
The scope of this feature is only the `reconcile` method of the reconciler, since there should not be updates on custom
295295
resource after it is marked for deletion.
296296

297+
Retry can be skipped for the cases of unrecoverable errors:
298+
299+
```java
300+
ErrorStatusUpdateControl.updateStatus(customResource).withNoRetry();
301+
```
302+
297303
### Correctness and Automatic Retries
298304

299305
There is a possibility to turn off the automatic retries. This is not desirable, unless there is a very specific reason.

micrometer-support/src/main/java/io/javaoperatorsdk/operator/monitoring/micrometer/MicrometerMetrics.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import java.util.Map;
77
import java.util.Optional;
88

9+
import io.javaoperatorsdk.operator.OperatorException;
910
import io.javaoperatorsdk.operator.api.monitoring.Metrics;
1011
import io.javaoperatorsdk.operator.api.reconciler.RetryInfo;
1112
import io.javaoperatorsdk.operator.processing.event.Event;
@@ -33,7 +34,13 @@ public <T> T timeControllerExecution(ControllerExecution<T> execution) {
3334
.publishPercentileHistogram()
3435
.register(registry);
3536
try {
36-
final var result = timer.record(execution::execute);
37+
final var result = timer.record(() -> {
38+
try {
39+
return execution.execute();
40+
} catch (Exception e) {
41+
throw new OperatorException(e);
42+
}
43+
});
3744
final var successType = execution.successTypeName(result);
3845
registry
3946
.counter(execName + ".success", "controller", name, "type", successType)
@@ -58,6 +65,7 @@ public void cleanupDoneFor(ResourceID customResourceUid) {
5865
incrementCounter(customResourceUid, "events.delete");
5966
}
6067

68+
@Override
6169
public void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfoNullable) {
6270
Optional<RetryInfo> retryInfo = Optional.ofNullable(retryInfoNullable);
6371
incrementCounter(resourceID, RECONCILIATIONS + "started",
@@ -72,7 +80,7 @@ public void finishedReconciliation(ResourceID resourceID) {
7280
incrementCounter(resourceID, RECONCILIATIONS + "success");
7381
}
7482

75-
public void failedReconciliation(ResourceID resourceID, RuntimeException exception) {
83+
public void failedReconciliation(ResourceID resourceID, Exception exception) {
7684
var cause = exception.getCause();
7785
if (cause == null) {
7886
cause = exception;

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/OperatorException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ public OperatorException(String message) {
88
super(message);
99
}
1010

11+
public OperatorException(Throwable cause) {
12+
super(cause);
13+
}
14+
1115
public OperatorException(String message, Throwable cause) {
1216
super(message, cause);
1317
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/monitoring/Metrics.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ default void receivedEvent(Event event) {}
1313

1414
default void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfo) {}
1515

16-
default void failedReconciliation(ResourceID resourceID, RuntimeException exception) {}
16+
default void failedReconciliation(ResourceID resourceID, Exception exception) {}
1717

1818
default void cleanupDoneFor(ResourceID customResourceUid) {}
1919

@@ -27,10 +27,10 @@ interface ControllerExecution<T> {
2727

2828
String successTypeName(T result);
2929

30-
T execute();
30+
T execute() throws Exception;
3131
}
3232

33-
default <T> T timeControllerExecution(ControllerExecution<T> execution) {
33+
default <T> T timeControllerExecution(ControllerExecution<T> execution) throws Exception {
3434
return execution.execute();
3535
}
3636

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/DefaultContext.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
public class DefaultContext<P extends HasMetadata> implements Context<P> {
1111

12-
private final RetryInfo retryInfo;
12+
private RetryInfo retryInfo;
1313
private final Controller<P> controller;
1414
private final P primaryResource;
1515
private final ControllerConfiguration<P> controllerConfiguration;
@@ -44,4 +44,9 @@ public ControllerConfiguration<P> getControllerConfiguration() {
4444
public ManagedDependentResourceContext managedDependentResourceContext() {
4545
return managedDependentResourceContext;
4646
}
47+
48+
public DefaultContext<P> setRetryInfo(RetryInfo retryInfo) {
49+
this.retryInfo = retryInfo;
50+
return this;
51+
}
4752
}
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
package io.javaoperatorsdk.operator.api.reconciler;
22

3-
import java.util.Optional;
4-
53
import io.fabric8.kubernetes.api.model.HasMetadata;
64

7-
public interface ErrorStatusHandler<T extends HasMetadata> {
5+
public interface ErrorStatusHandler<P extends HasMetadata> {
86

97
/**
108
* <p>
119
* Reconciler can implement this interface in order to update the status sub-resource in the case
1210
* an exception in thrown. In that case
13-
* {@link #updateErrorStatus(HasMetadata, RetryInfo, RuntimeException)} is called automatically.
11+
* {@link #updateErrorStatus(HasMetadata, Context, Exception)} is called automatically.
1412
* <p>
1513
* The result of the method call is used to make a status update on the custom resource. This is
1614
* always a sub-resource update request, so no update on custom resource itself (like spec of
@@ -21,10 +19,10 @@ public interface ErrorStatusHandler<T extends HasMetadata> {
2119
* should not be updates on custom resource after it is marked for deletion.
2220
*
2321
* @param resource to update the status on
24-
* @param retryInfo the current retry status
22+
* @param context the current context
2523
* @param e exception thrown from the reconciler
2624
* @return the updated resource
2725
*/
28-
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
26+
ErrorStatusUpdateControl<P> updateErrorStatus(P resource, Context<P> context, Exception e);
2927

3028
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package io.javaoperatorsdk.operator.api.reconciler;
2+
3+
import java.util.Optional;
4+
5+
import io.fabric8.kubernetes.api.model.HasMetadata;
6+
7+
public class ErrorStatusUpdateControl<P extends HasMetadata> {
8+
9+
private final P resource;
10+
private boolean noRetry = false;
11+
12+
public static <T extends HasMetadata> ErrorStatusUpdateControl<T> updateStatus(T resource) {
13+
return new ErrorStatusUpdateControl<>(resource);
14+
}
15+
16+
public static <T extends HasMetadata> ErrorStatusUpdateControl<T> noStatusUpdate() {
17+
return new ErrorStatusUpdateControl<>(null);
18+
}
19+
20+
private ErrorStatusUpdateControl(P resource) {
21+
this.resource = resource;
22+
}
23+
24+
/**
25+
* Instructs the controller to not retry the error. This is useful for non-recoverable errors.
26+
*
27+
* @return ErrorStatusUpdateControl
28+
*/
29+
public ErrorStatusUpdateControl<P> withNoRetry() {
30+
this.noRetry = true;
31+
return this;
32+
}
33+
34+
public Optional<P> getResource() {
35+
return Optional.ofNullable(resource);
36+
}
37+
38+
public boolean isNoRetry() {
39+
return noRetry;
40+
}
41+
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/Reconciler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ public interface Reconciler<R extends HasMetadata> {
1313
* @return UpdateControl to manage updates on the custom resource (usually the status) after
1414
* reconciliation.
1515
*/
16-
UpdateControl<R> reconcile(R resource, Context<R> context);
16+
UpdateControl<R> reconcile(R resource, Context<R> context) throws Exception;
1717

1818
/**
1919
* Note that this method is used in combination with finalizers. If automatic finalizer handling

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,32 +60,36 @@ public Controller(Reconciler<P> reconciler,
6060
public DeleteControl cleanup(P resource, Context<P> context) {
6161
dependents.cleanup(resource, context);
6262

63-
return metrics().timeControllerExecution(
64-
new ControllerExecution<>() {
65-
@Override
66-
public String name() {
67-
return "cleanup";
68-
}
63+
try {
64+
return metrics().timeControllerExecution(
65+
new ControllerExecution<>() {
66+
@Override
67+
public String name() {
68+
return "cleanup";
69+
}
6970

70-
@Override
71-
public String controllerName() {
72-
return configuration.getName();
73-
}
71+
@Override
72+
public String controllerName() {
73+
return configuration.getName();
74+
}
7475

75-
@Override
76-
public String successTypeName(DeleteControl deleteControl) {
77-
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
78-
}
76+
@Override
77+
public String successTypeName(DeleteControl deleteControl) {
78+
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
79+
}
7980

80-
@Override
81-
public DeleteControl execute() {
82-
return reconciler.cleanup(resource, context);
83-
}
84-
});
81+
@Override
82+
public DeleteControl execute() {
83+
return reconciler.cleanup(resource, context);
84+
}
85+
});
86+
} catch (Exception e) {
87+
throw new OperatorException(e);
88+
}
8589
}
8690

8791
@Override
88-
public UpdateControl<P> reconcile(P resource, Context<P> context) {
92+
public UpdateControl<P> reconcile(P resource, Context<P> context) throws Exception {
8993
dependents.reconcile(resource, context);
9094

9195
return metrics().timeControllerExecution(
@@ -113,7 +117,7 @@ public String successTypeName(UpdateControl<P> result) {
113117
}
114118

115119
@Override
116-
public UpdateControl<P> execute() {
120+
public UpdateControl<P> execute() throws Exception {
117121
return reconciler.reconcile(resource, context);
118122
}
119123
});

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventProcessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ TimerEventSource<R> retryEventSource() {
242242
* according to the retry timing if there was an exception.
243243
*/
244244
private void handleRetryOnException(
245-
ExecutionScope<R> executionScope, RuntimeException exception) {
245+
ExecutionScope<R> executionScope, Exception exception) {
246246
RetryExecution execution = getOrInitRetryExecution(executionScope);
247247
var customResourceID = executionScope.getCustomResourceID();
248248
boolean eventPresent = eventMarker.eventPresent(customResourceID);

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/PostExecutionControl.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@ final class PostExecutionControl<R extends HasMetadata> {
88

99
private final boolean onlyFinalizerHandled;
1010
private final R updatedCustomResource;
11-
private final RuntimeException runtimeException;
11+
private final Exception runtimeException;
1212

1313
private Long reScheduleDelay = null;
1414

1515
private PostExecutionControl(
1616
boolean onlyFinalizerHandled,
1717
R updatedCustomResource,
18-
RuntimeException runtimeException) {
18+
Exception runtimeException) {
1919
this.onlyFinalizerHandled = onlyFinalizerHandled;
2020
this.updatedCustomResource = updatedCustomResource;
2121
this.runtimeException = runtimeException;
@@ -35,7 +35,7 @@ public static <R extends HasMetadata> PostExecutionControl<R> customResourceUpda
3535
}
3636

3737
public static <R extends HasMetadata> PostExecutionControl<R> exceptionDuringExecution(
38-
RuntimeException exception) {
38+
Exception exception) {
3939
return new PostExecutionControl<>(false, null, exception);
4040
}
4141

@@ -60,7 +60,7 @@ public PostExecutionControl<R> withReSchedule(long delay) {
6060
return this;
6161
}
6262

63-
public Optional<RuntimeException> getRuntimeException() {
63+
public Optional<Exception> getRuntimeException() {
6464
return Optional.ofNullable(runtimeException);
6565
}
6666

0 commit comments

Comments
 (0)