Skip to content
This repository was archived by the owner on Dec 6, 2025. It is now read-only.

Commit baffb92

Browse files
authored
panic recover (#876)
1 parent 3aa7035 commit baffb92

File tree

4 files changed

+160
-0
lines changed

4 files changed

+160
-0
lines changed

libs/common/grpc.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func StartNewGRPCServer(ctx context.Context, addr string, registerServerHook fun
9090
func DefaultUnaryInterceptors(metrics *prometheusGrpcProvider.ServerMetrics) []grpc.UnaryServerInterceptor {
9191
return []grpc.UnaryServerInterceptor{
9292
metrics.UnaryServerInterceptor(),
93+
hwgrpc.UnaryPanicRecoverInterceptor(),
9394
hwgrpc.UnaryLoggingInterceptor,
9495
hwgrpc.UnaryErrorQualityControlInterceptor,
9596
hwgrpc.UnaryLocaleInterceptor,
@@ -106,6 +107,7 @@ func DefaultUnaryInterceptors(metrics *prometheusGrpcProvider.ServerMetrics) []g
106107
func DefaultStreamInterceptors(metrics *prometheusGrpcProvider.ServerMetrics) []grpc.StreamServerInterceptor {
107108
return []grpc.StreamServerInterceptor{
108109
metrics.StreamServerInterceptor(),
110+
hwgrpc.StreamPanicRecoverInterceptor(),
109111
hwgrpc.StreamLoggingInterceptor,
110112
hwgrpc.StreamErrorQualityControlInterceptor,
111113
hwgrpc.StreamLocaleInterceptor,
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package hwgrpc
2+
3+
import (
4+
"context"
5+
"runtime/debug"
6+
"telemetry"
7+
8+
"common/hwerr"
9+
"common/locale"
10+
11+
"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery"
12+
"github.com/prometheus/client_golang/prometheus"
13+
zlog "github.com/rs/zerolog/log"
14+
"google.golang.org/grpc"
15+
"google.golang.org/grpc/codes"
16+
)
17+
18+
var panicsRecovered = telemetry.NewLazyCounter(prometheus.CounterOpts{
19+
Name: "services_panics_recovered_total",
20+
Help: "Total number of panics recovered by PanicRecoverInterceptor",
21+
})
22+
23+
func recoveryHandlerFn() recovery.RecoveryHandlerFuncContext {
24+
return func(ctx context.Context, recovered any) (err error) {
25+
zlog.Ctx(ctx).
26+
Error().
27+
Any("recovered", recovered).
28+
Str("stack", string(debug.Stack())).
29+
Msg("recovered a panic")
30+
31+
panicsRecovered.Counter().Inc()
32+
33+
return hwerr.NewStatusError(ctx, codes.Internal, "panic recovered", locale.GenericError(ctx))
34+
}
35+
}
36+
37+
func UnaryPanicRecoverInterceptor() grpc.UnaryServerInterceptor {
38+
panicsRecovered.Ensure()
39+
40+
return recovery.UnaryServerInterceptor(
41+
recovery.WithRecoveryHandlerContext(recoveryHandlerFn()),
42+
)
43+
}
44+
45+
func StreamPanicRecoverInterceptor() grpc.StreamServerInterceptor {
46+
panicsRecovered.Ensure()
47+
48+
return recovery.StreamServerInterceptor(
49+
recovery.WithRecoveryHandlerContext(recoveryHandlerFn()),
50+
)
51+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package hwgrpc
2+
3+
import (
4+
"context"
5+
"telemetry"
6+
"testing"
7+
8+
"github.com/grpc-ecosystem/go-grpc-middleware/v2/testing/testpb"
9+
"github.com/stretchr/testify/suite"
10+
"google.golang.org/grpc"
11+
"google.golang.org/grpc/codes"
12+
"google.golang.org/grpc/status"
13+
)
14+
15+
type recoveryAssertService struct {
16+
testpb.TestServiceServer
17+
}
18+
19+
func (s *recoveryAssertService) Ping(ctx context.Context, ping *testpb.PingRequest) (*testpb.PingResponse, error) {
20+
if ping.GetValue() == "panic" {
21+
panic("very bad thing happened")
22+
}
23+
return s.TestServiceServer.Ping(ctx, ping)
24+
}
25+
26+
func (s *recoveryAssertService) PingList(ping *testpb.PingListRequest, stream testpb.TestService_PingListServer) error {
27+
if ping.Value == "panic" {
28+
panic("very bad thing happened")
29+
}
30+
return s.TestServiceServer.PingList(ping, stream)
31+
}
32+
33+
type RecoverySuite struct {
34+
*testpb.InterceptorTestSuite
35+
}
36+
37+
func TestPanicRecoverInterceptor(t *testing.T) {
38+
telemetry.SetupMetrics(context.Background(), nil)
39+
s := &RecoverySuite{
40+
InterceptorTestSuite: &testpb.InterceptorTestSuite{
41+
TestService: &recoveryAssertService{TestServiceServer: &testpb.TestPingService{}},
42+
ServerOpts: []grpc.ServerOption{
43+
grpc.StreamInterceptor(StreamPanicRecoverInterceptor()),
44+
grpc.UnaryInterceptor(UnaryPanicRecoverInterceptor()),
45+
},
46+
},
47+
}
48+
suite.Run(t, s)
49+
}
50+
51+
func (s *RecoverySuite) TestUnary_SuccessfulRequest() {
52+
_, err := s.Client.Ping(s.SimpleCtx(), testpb.GoodPing)
53+
s.Require().NoError(err)
54+
}
55+
56+
func (s *RecoverySuite) TestUnary_PanicRequest() {
57+
_, err := s.Client.Ping(s.SimpleCtx(), &testpb.PingRequest{Value: "panic"})
58+
s.Require().Error(err)
59+
st, ok := status.FromError(err)
60+
s.Require().True(ok, "not a status error")
61+
s.Require().Equal(codes.Internal, st.Code())
62+
}
63+
64+
func (s *RecoverySuite) TestStream_SuccessfulReceive() {
65+
stream, err := s.Client.PingList(s.SimpleCtx(), testpb.GoodPingList)
66+
s.Require().NoError(err, "should not fail on establishing the stream")
67+
pong, err := stream.Recv()
68+
s.Require().NoError(err, "no error must occur")
69+
s.Require().NotNil(pong, "pong must not be nil")
70+
}
71+
72+
func (s *RecoverySuite) TestStream_PanickingReceive() {
73+
stream, err := s.Client.PingList(s.SimpleCtx(), &testpb.PingListRequest{Value: "panic"})
74+
s.Require().NoError(err, "should not fail on establishing the stream")
75+
_, err = stream.Recv()
76+
s.Require().Error(err)
77+
st, ok := status.FromError(err)
78+
s.Require().True(ok, "not a status error")
79+
s.Require().Equal(codes.Internal, st.Code())
80+
}

libs/telemetry/setup.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package telemetry
33
import (
44
"context"
55
"errors"
6+
"github.com/prometheus/client_golang/prometheus/promauto"
67
"hwutil"
78
"net/http"
89
"os"
@@ -106,3 +107,29 @@ func SetupMetrics(ctx context.Context, shutdown func(error)) {
106107
func PrometheusRegistry() *prometheus.Registry {
107108
return prometheusRegistry
108109
}
110+
111+
// LazyCounter prevents access to PrometheusRegistry, before it is initialized
112+
// by creating the counter only when it is needed
113+
type LazyCounter struct {
114+
opts prometheus.CounterOpts
115+
counter *prometheus.Counter
116+
}
117+
118+
func NewLazyCounter(opts prometheus.CounterOpts) LazyCounter {
119+
return LazyCounter{
120+
opts: opts,
121+
counter: nil,
122+
}
123+
}
124+
125+
func (lc *LazyCounter) Counter() prometheus.Counter {
126+
if lc.counter != nil {
127+
return *lc.counter
128+
}
129+
lc.counter = hwutil.PtrTo(promauto.With(prometheusRegistry).NewCounter(lc.opts))
130+
return *lc.counter
131+
}
132+
133+
func (lc *LazyCounter) Ensure() {
134+
lc.Counter()
135+
}

0 commit comments

Comments
 (0)