Skip to content

Commit 89e048e

Browse files
authored
feat(gRPC): retry & healthcheck (#200)
1 parent 372c7c0 commit 89e048e

File tree

3 files changed

+583
-41
lines changed

3 files changed

+583
-41
lines changed

lib/resty/etcd/v3.lua

Lines changed: 132 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ local now = ngx.now
1313
local sub_str = string.sub
1414
local str_byte = string.byte
1515
local str_char = string.char
16+
local str_find = string.find
1617
local ipairs = ipairs
1718
local pairs = pairs
1819
local pcall = pcall
@@ -281,6 +282,45 @@ local function serialize_and_encode_base64(serialize_fn, data)
281282
end
282283

283284

285+
local function choose_grpc_endpoint(self)
286+
local connect_opts = {
287+
max_recv_msg_size = 2147483647,
288+
}
289+
290+
local endpoint, err = choose_endpoint(self)
291+
if not endpoint then
292+
return nil, err, nil
293+
end
294+
295+
if endpoint.scheme == "https" then
296+
connect_opts.insecure = false
297+
end
298+
299+
connect_opts.tls_verify = self.ssl_verify
300+
connect_opts.client_cert = self.ssl_cert_path
301+
connect_opts.client_key = self.ssl_key_path
302+
connect_opts.trusted_ca = self.trusted_ca
303+
304+
local target
305+
if self.unix_socket_proxy then
306+
target = self.unix_socket_proxy
307+
else
308+
target = endpoint.address .. ":" .. endpoint.port
309+
end
310+
311+
utils.log_info("etcd grpc connect to ", target)
312+
local conn, err = self.grpc.connect(target, connect_opts)
313+
if not conn then
314+
return nil, err, endpoint.http_host
315+
end
316+
317+
-- we disable health check when proxying via unix socket,
318+
-- so the http_host will always point to a real address when the failure is reported
319+
conn.http_host = endpoint.http_host
320+
return conn
321+
end
322+
323+
284324
function _M.new(opts)
285325
local timeout = opts.timeout
286326
local ttl = opts.ttl
@@ -396,6 +436,7 @@ function _M.new(opts)
396436

397437
ssl_cert_path = opts.ssl_cert_path,
398438
ssl_key_path = opts.ssl_key_path,
439+
trusted_ca = opts.trusted_ca,
399440
extra_headers = extra_headers,
400441
sni = sni,
401442
unix_socket_proxy = unix_socket_proxy,
@@ -418,48 +459,13 @@ function _M.new(opts)
418459
cli.grpc = grpc
419460
cli.call_opts = {}
420461

421-
local connect_opts = {
422-
max_recv_msg_size = 2147483647,
423-
}
424-
425-
local endpoint, err = choose_endpoint(cli)
426-
if not endpoint then
427-
return nil, err
428-
end
429-
430-
if endpoint.scheme == "https" then
431-
connect_opts.insecure = false
432-
end
433-
434-
connect_opts.tls_verify = cli.ssl_verify
435-
connect_opts.client_cert = cli.ssl_cert_path
436-
connect_opts.client_key = cli.ssl_key_path
437-
connect_opts.trusted_ca = opts.trusted_ca
438-
439-
local conn, err
440-
if unix_socket_proxy then
441-
conn, err = grpc.connect(unix_socket_proxy, connect_opts)
442-
else
443-
conn, err = grpc.connect(endpoint.address .. ":" .. endpoint.port, connect_opts)
444-
end
462+
local conn, err = choose_grpc_endpoint(cli)
445463
if not conn then
446464
return nil, err
447465
end
448466
cli.conn = conn
449467

450-
cli = setmetatable(cli, grpc_mt)
451-
452-
if cli.user then
453-
local auth_req = {name = cli.user, password = cli.password}
454-
local res, err = cli:grpc_call("etcdserverpb.Auth", "Authenticate", auth_req)
455-
if not res then
456-
return nil, err
457-
end
458-
459-
cli.grpc_token = res.body.token
460-
end
461-
462-
return cli
468+
return setmetatable(cli, grpc_mt)
463469
end
464470

465471
local sema, err = semaphore.new()
@@ -1017,6 +1023,17 @@ do
10171023
{"token", ""}
10181024
}
10191025
function get_grpc_metadata(self)
1026+
if not self.grpc_token and self.user then
1027+
local auth_req = {name = self.user, password = self.password}
1028+
local res, err = self:grpc_call("etcdserverpb.Auth",
1029+
"Authenticate", auth_req)
1030+
if not res then
1031+
return nil, err
1032+
end
1033+
1034+
self.grpc_token = res.body.token
1035+
end
1036+
10201037
if self.grpc_token then
10211038
metadata[1][2] = self.grpc_token
10221039
return metadata
@@ -1043,15 +1060,22 @@ function _grpc_M.create_grpc_watch_stream(self, key, attr, opts)
10431060
self.call_opts.timeout = self.timeout * 1000
10441061
end
10451062

1046-
self.call_opts.metadata = get_grpc_metadata(self)
1063+
local data, err = get_grpc_metadata(self)
1064+
if err then
1065+
return nil, err
1066+
end
1067+
self.call_opts.metadata = data
10471068

10481069
local st, err = conn:new_server_stream("etcdserverpb.Watch", "Watch", req, self.call_opts)
10491070
if not st then
1071+
-- report but don't retry by itself - APISIX will retry syncing after failed
1072+
health_check.report_failure(conn.http_host)
10501073
return nil, err
10511074
end
10521075

10531076
local res, err = st:recv()
10541077
if not res then
1078+
health_check.report_failure(conn.http_host)
10551079
return nil, err
10561080
end
10571081

@@ -1062,6 +1086,7 @@ end
10621086
function _grpc_M.read_grpc_watch_stream(self, watching_stream)
10631087
local res, err = watching_stream:recv()
10641088
if not res then
1089+
health_check.report_failure(self.conn.http_host)
10651090
return nil, err
10661091
end
10671092

@@ -1151,8 +1176,16 @@ function _grpc_M.convert_grpc_to_http_res(self, res)
11511176
end
11521177

11531178

1179+
local function filter_out_no_retry_err(err)
1180+
if str_find(err, "key is not provided", 1, true) then
1181+
return err
1182+
end
1183+
1184+
return nil
1185+
end
1186+
1187+
11541188
function _grpc_M.grpc_call(self, serv, meth, attr, key, val, opts)
1155-
local conn = self.conn
11561189
attr.key = key
11571190
if val then
11581191
attr.value = serialize_grpc_value(self.serializer.serialize, val)
@@ -1165,9 +1198,67 @@ function _grpc_M.grpc_call(self, serv, meth, attr, key, val, opts)
11651198
self.call_opts.timeout = self.timeout * 1000
11661199
end
11671200
self.call_opts.int64_encoding = self.grpc.INT64_AS_STRING
1168-
self.call_opts.metadata = get_grpc_metadata(self)
11691201

1170-
local res, err = conn:call(serv, meth, attr, self.call_opts)
1202+
if meth ~= "Authenticate" then
1203+
local data, err = get_grpc_metadata(self)
1204+
if err then
1205+
return nil, err
1206+
end
1207+
self.call_opts.metadata = data
1208+
end
1209+
1210+
local conn = self.conn
1211+
local http_host
1212+
local res, err
1213+
if health_check.conf.retry then
1214+
local max_retry = #self.endpoints * health_check.conf.max_fails + 1
1215+
for i = 1, max_retry do
1216+
if conn then
1217+
http_host = conn.http_host
1218+
res, err = conn:call(serv, meth, attr, self.call_opts)
1219+
if res then
1220+
self.conn = conn
1221+
break
1222+
end
1223+
1224+
if filter_out_no_retry_err(err) then
1225+
return nil, err
1226+
end
1227+
end
1228+
1229+
health_check.report_failure(http_host)
1230+
1231+
if i < max_retry then
1232+
utils.log_warn("Tried ", http_host, " failed: ",
1233+
err, ". Retrying")
1234+
end
1235+
1236+
conn, err, http_host = choose_grpc_endpoint(self)
1237+
if not conn and not http_host then
1238+
-- no endpoint can be retries
1239+
return nil, err
1240+
end
1241+
end
1242+
else
1243+
res, err = self.conn:call(serv, meth, attr, self.call_opts)
1244+
if not res then
1245+
if filter_out_no_retry_err(err) then
1246+
return nil, err
1247+
end
1248+
1249+
health_check.report_failure(self.conn.http_host)
1250+
1251+
local conn, new_err = choose_grpc_endpoint(self)
1252+
if not conn then
1253+
utils.log_info("failed to use next connection: ", new_err)
1254+
return nil, err
1255+
end
1256+
1257+
self.conn = conn
1258+
return nil, err
1259+
end
1260+
end
1261+
11711262
return self:convert_grpc_to_http_res(res), err
11721263
end
11731264

t/v3/add-auth.sh

100644100755
File mode changed.

0 commit comments

Comments
 (0)