View Javadoc
1   package emissary.grpc.exceptions;
2   
3   import io.grpc.Status;
4   import io.grpc.StatusRuntimeException;
5   import org.apache.commons.lang3.StringUtils;
6   
7   /**
8    * Encapsulates bad data errors related to an external service. These errors should be treated as unrecoverable and
9    * should not be made due to temporal server state.
10   */
11  public class ServiceException extends RuntimeException {
12  
13      private static final long serialVersionUID = 1371863576664142288L;
14  
15      public static final String GRPC_ERROR_PREFIX = "Encountered gRPC runtime status error. ";
16  
17      public ServiceException(String errorMessage) {
18          super(errorMessage);
19      }
20  
21      public ServiceException(String errorMessage, Throwable err) {
22          super(errorMessage, err);
23      }
24  
25      public static void handleGrpcStatusRuntimeException(StatusRuntimeException e) {
26          if (e == null) {
27              throw new ServiceException("Service returned a null exception");
28          }
29          Status status = e.getStatus();
30          if (status == null) {
31              throw new ServiceException("Service returned a null status: " + e.getMessage(), e);
32          }
33          // code shouldn't ever be null, but we check for safety
34          Status.Code code = status.getCode();
35          if (code == null) {
36              throw new ServiceException("Service returned a status with a null code: " + e.getMessage(), e);
37          }
38  
39          switch (code) {
40              case DEADLINE_EXCEEDED:
41                  throw new ServiceException(GRPC_ERROR_PREFIX + "gRPC client connection has timed out: " + e.getMessage(), e);
42              case UNAVAILABLE: {
43                  // Likely server has gone down. Could be a crash or resources were scaled down
44                  String desc = status.getDescription();
45                  if (StringUtils.isNotEmpty(desc) && desc.contains("Network closed for unknown reason")) {
46                      // So-called "poison pill" files have resulted in crashes for unknown reasons.
47                      // Out of an abundance of caution, we consider these files as failures.
48                      throw new ServiceException(GRPC_ERROR_PREFIX +
49                              "It's possible service crashed due to a misbehaving file: " + e.getMessage(), e);
50                  }
51                  // Otherwise, we indicate the server is not live
52                  throw new ServiceNotLiveException(GRPC_ERROR_PREFIX + "It's likely service crashed: " + e.getMessage(), e);
53              }
54              case CANCELLED:
55                  throw new ServiceException(GRPC_ERROR_PREFIX + "It's likely a client side interrupt occurred: " + e.getMessage(), e);
56              case RESOURCE_EXHAUSTED:
57                  // Likely we've exceeded the maximum number of concurrent requests
58                  throw new ServiceNotReadyException(GRPC_ERROR_PREFIX +
59                          "It's likely we've exceeded the maximum number of requests: " + e.getMessage(), e);
60              case INTERNAL:
61                  // Likely server killed itself due to OOM or other conditions
62                  throw new ServiceException(GRPC_ERROR_PREFIX +
63                          "It's likely a gpu OOM error or other resource error has occurred: " + e.getMessage(), e);
64              default:
65                  throw new ServiceException(GRPC_ERROR_PREFIX +
66                          "This is an unhandled code type. Please add it to the list of gRPC exceptions: " + e.getMessage(), e);
67          }
68      }
69  }