Skip to content

Commit 1b8ade7

Browse files
author
OpenShift Bot
authored
Merge pull request #14790 from smarterclayton/router_metrics_port
Merged by openshift-bot
2 parents 75c6d6e + 778c6d4 commit 1b8ade7

File tree

8 files changed

+164
-30
lines changed

8 files changed

+164
-30
lines changed

images/router/haproxy/conf/haproxy-config.template

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,11 @@ defaults
140140
{{- end }}
141141
{{- end }}
142142

143+
{{ if (gt .StatsPort -1) }}
143144
{{ if (gt .StatsPort 0) }}
144-
listen stats :{{.StatsPort}}
145+
listen stats :{{.StatsPort}}
145146
{{- else }}
146-
listen stats :1936
147+
listen stats :1936
147148
{{- end }}
148149
mode http
149150
# Health check monitoring uri.
@@ -158,6 +159,7 @@ defaults
158159
stats uri /
159160
stats auth {{.StatsUser}}:{{.StatsPassword}}
160161
{{- end }}
162+
{{- end }}
161163

162164
{{ if .BindPorts -}}
163165
frontend public
@@ -166,6 +168,10 @@ frontend public
166168
tcp-request inspect-delay 5s
167169
tcp-request content accept if HTTP
168170

171+
{{- if (eq .StatsPort -1) }}
172+
monitor-uri /_______internal_router_healthz
173+
{{- end }}
174+
169175
# check if we need to redirect/force using https.
170176
acl secure_redirect base,map_reg(/var/lib/haproxy/conf/os_route_http_redirect.map) -m found
171177
redirect scheme https if secure_redirect

images/router/haproxy/reload-haproxy

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ readonly numeric_re='^[0-9]+$'
1212

1313
function haproxyHealthCheck() {
1414
local wait_time=${MAX_RELOAD_WAIT_TIME:-$max_wait_time}
15-
local port=${STATS_PORT:-"1936"}
15+
local port=${ROUTER_SERVICE_HTTP_PORT:-"80"}
16+
local url="http://localhost:${port}"
1617
local retries=0
1718
local start_ts=$(date +"%s")
19+
local proxy_proto="${ROUTER_USE_PROXY_PROTOCOL-}"
1820

1921
if ! [[ $wait_time =~ $numeric_re ]]; then
2022
echo " - Invalid max reload wait time, using default $max_wait_time ..."
@@ -23,18 +25,39 @@ function haproxyHealthCheck() {
2325

2426
local end_ts=$((start_ts + wait_time))
2527

26-
local proxy_proto="${ROUTER_USE_PROXY_PROTOCOL:-FALSE}"
27-
echo " - Proxy protocol '${proxy_proto}'. Checking HAProxy /healthz on port $port ..."
28+
# test with proxy protocol on
29+
if [[ "${proxy_proto}" == "TRUE" || "${proxy_proto}" == "true" ]]; then
30+
echo " - Proxy protocol on, checking ${url} ..."
31+
while true; do
32+
local statusline=$(echo $'PROXY UNKNOWN\r\nGET / HTTP/1.1\r\n' | socat tcp-connect:localhost:${port} stdio | head -1)
33+
34+
if [[ "$statusline" == *" 503 "* ]]; then
35+
echo " - Health check ok : $retries retry attempt(s)."
36+
return 0
37+
fi
38+
39+
if [ $(date +"%s") -ge $end_ts ]; then
40+
echo " - Exceeded max wait time ($wait_time) in health check - $retries retry attempt(s)."
41+
return 1
42+
fi
43+
44+
sleep 0.5
45+
retries=$((retries + 1))
46+
done
47+
return 0
48+
fi
49+
50+
echo " - Checking ${url} ..."
2851
while true; do
29-
local httpcode=$(curl $timeout_opts -s -o /dev/null -I -w "%{http_code}" http://localhost:${port}/healthz)
52+
local httpcode=$(curl $timeout_opts -s -o /dev/null -I -w "%{http_code}" ${url})
3053

31-
if [ "$httpcode" == "200" ]; then
32-
echo " - HAProxy port $port health check ok : $retries retry attempt(s)."
54+
if [ "$httpcode" == "503" ]; then
55+
echo " - Health check ok : $retries retry attempt(s)."
3356
return 0
3457
fi
3558

3659
if [ $(date +"%s") -ge $end_ts ]; then
37-
echo " - Exceeded max wait time ($wait_time) in HAProxy health check - $retries retry attempt(s)."
60+
echo " - Exceeded max wait time ($wait_time) in health check - $retries retry attempt(s)."
3861
return 1
3962
fi
4063

pkg/cmd/admin/router/router.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -682,13 +682,8 @@ func RunCmdRouter(f *clientcmd.Factory, cmd *cobra.Command, out, errout io.Write
682682
}
683683
// automatically start the internal metrics agent if we are handling a known type
684684
if cfg.Type == "haproxy-router" {
685-
env["ROUTER_LISTEN_ADDR"] = fmt.Sprintf("0.0.0.0:%d", defaultStatsPort-1)
685+
env["ROUTER_LISTEN_ADDR"] = fmt.Sprintf("0.0.0.0:%d", cfg.StatsPort)
686686
env["ROUTER_METRICS_TYPE"] = "haproxy"
687-
ports = append(ports, kapi.ContainerPort{
688-
Name: "router-stats",
689-
ContainerPort: int32(defaultStatsPort - 1),
690-
Protocol: kapi.ProtocolTCP,
691-
})
692687
}
693688
env.Add(secretEnv)
694689
if len(defaultCert) > 0 {
@@ -803,9 +798,9 @@ func RunCmdRouter(f *clientcmd.Factory, cmd *cobra.Command, out, errout io.Write
803798
t.Annotations = make(map[string]string)
804799
}
805800
t.Annotations["prometheus.io/scrape"] = "true"
806-
t.Annotations["prometheus.io/port"] = "1935"
807-
t.Annotations["prometheus.io/username"] = cfg.StatsUsername
808-
t.Annotations["prometheus.io/password"] = cfg.StatsPassword
801+
t.Annotations["prometheus.io/port"] = "1936"
802+
t.Annotations["prometheus.openshift.io/username"] = cfg.StatsUsername
803+
t.Annotations["prometheus.openshift.io/password"] = cfg.StatsPassword
809804
t.Spec.ClusterIP = clusterIP
810805
for j, servicePort := range t.Spec.Ports {
811806
for _, targetPort := range ports {

pkg/cmd/infra/router/router.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func (o *RouterSelection) Bind(flag *pflag.FlagSet) {
7575
flag.BoolVar(&o.AllowWildcardRoutes, "allow-wildcard-routes", cmdutil.Env("ROUTER_ALLOW_WILDCARD_ROUTES", "") == "true", "Allow wildcard host names for routes")
7676
flag.BoolVar(&o.DisableNamespaceOwnershipCheck, "disable-namespace-ownership-check", cmdutil.Env("ROUTER_DISABLE_NAMESPACE_OWNERSHIP_CHECK", "") == "true", "Disables the namespace ownership checks for a route host with different paths or for overlapping host names in the case of wildcard routes. Please be aware that if namespace ownership checks are disabled, routes in a different namespace can use this mechanism to 'steal' sub-paths for existing domains. This is only safe if route creation privileges are restricted, or if all the users can be trusted.")
7777
flag.BoolVar(&o.EnableIngress, "enable-ingress", cmdutil.Env("ROUTER_ENABLE_INGRESS", "") == "true", "Enable configuration via ingress resources")
78-
flag.StringVar(&o.ListenAddr, "listen-addr", cmdutil.Env("ROUTER_LISTEN_ADDR", ""), "The name of an interface to listen on to expose metrics and health checking. If not specified, will not listen.")
78+
flag.StringVar(&o.ListenAddr, "listen-addr", cmdutil.Env("ROUTER_LISTEN_ADDR", ""), "The name of an interface to listen on to expose metrics and health checking. If not specified, will not listen. Overrides stats port.")
7979
}
8080

8181
// RouteSelectionFunc returns a func that identifies the host for a route.

pkg/cmd/infra/router/template.go

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package router
33
import (
44
"errors"
55
"fmt"
6+
"net"
7+
"net/url"
68
"os"
79
"strconv"
810
"strings"
@@ -116,7 +118,7 @@ type RouterStats struct {
116118
}
117119

118120
func (o *RouterStats) Bind(flag *pflag.FlagSet) {
119-
flag.StringVar(&o.StatsPortString, "stats-port", util.Env("STATS_PORT", ""), "If the underlying router implementation can provide statistics this is a hint to expose it on this port.")
121+
flag.StringVar(&o.StatsPortString, "stats-port", util.Env("STATS_PORT", ""), "If the underlying router implementation can provide statistics this is a hint to expose it on this port. Ignored if listen-addr is specified.")
120122
flag.StringVar(&o.StatsPassword, "stats-password", util.Env("STATS_PASSWORD", ""), "If the underlying router implementation can provide statistics this is the requested password for auth.")
121123
flag.StringVar(&o.StatsUsername, "stats-user", util.Env("STATS_USERNAME", ""), "If the underlying router implementation can provide statistics this is the requested username for auth.")
122124
}
@@ -179,6 +181,22 @@ func (o *TemplateRouterOptions) Complete() error {
179181
}
180182
o.StatsPort = statsPort
181183
}
184+
if len(o.ListenAddr) > 0 {
185+
_, port, err := net.SplitHostPort(o.ListenAddr)
186+
if err != nil {
187+
return fmt.Errorf("listen-addr is not valid: %v", err)
188+
}
189+
// stats port on listen-addr overrides stats port argument
190+
statsPort, err := strconv.Atoi(port)
191+
if err != nil {
192+
return fmt.Errorf("listen-addr port is not valid: %v", err)
193+
}
194+
o.StatsPort = statsPort
195+
} else {
196+
if o.StatsPort != 0 {
197+
o.ListenAddr = fmt.Sprintf("0.0.0.0:%d", o.StatsPort)
198+
}
199+
}
182200

183201
if nsecs := int(o.ReloadInterval.Seconds()); nsecs < 1 {
184202
return fmt.Errorf("invalid reload interval: %v - must be a positive duration", nsecs)
@@ -222,9 +240,10 @@ func (o *TemplateRouterOptions) Validate() error {
222240

223241
// Run launches a template router using the provided options. It never exits.
224242
func (o *TemplateRouterOptions) Run() error {
225-
var reloadCallbacks []func()
243+
statsPort := o.StatsPort
244+
226245
switch {
227-
case o.MetricsType == "haproxy" && len(o.ListenAddr) > 0:
246+
case o.MetricsType == "haproxy":
228247
if len(o.StatsUsername) == 0 || len(o.StatsPassword) == 0 {
229248
glog.Warningf("Metrics were requested but no username or password has been provided - the metrics endpoint will not be accessible to prevent accidental security breaches")
230249
}
@@ -266,6 +285,7 @@ func (o *TemplateRouterOptions) Run() error {
266285
exported = append(exported, i)
267286
}
268287
}
288+
269289
_, err := haproxy.NewPrometheusCollector(haproxy.PrometheusOptions{
270290
// Only template router customizers who alter the image should need this
271291
ScrapeURI: util.Env("ROUTER_METRICS_HAPROXY_SCRAPE_URI", ""),
@@ -279,23 +299,32 @@ func (o *TemplateRouterOptions) Run() error {
279299
if err != nil {
280300
return err
281301
}
282-
//reloadCallbacks = append(reloadCallbacks, e.CollectNow)
283-
}
284-
if len(o.ListenAddr) > 0 {
285-
metrics.Listen(o.ListenAddr, o.StatsUsername, o.StatsPassword)
302+
303+
// Metrics will handle healthz on the stats port, and instruct the template router to disable stats completely.
304+
// The underlying router must provide a custom health check if customized which will be called into.
305+
statsPort = -1
306+
httpURL := util.Env("ROUTER_METRICS_READY_HTTP_URL", fmt.Sprintf("http://%s:%s/_______internal_router_healthz", "localhost", util.Env("ROUTER_SERVICE_HTTP_PORT", "80")))
307+
u, err := url.Parse(httpURL)
308+
if err != nil {
309+
return fmt.Errorf("ROUTER_METRICS_READY_HTTP_URL must be a valid URL or empty: %v", err)
310+
}
311+
check := metrics.HTTPBackendAvailable(u)
312+
if useProxy := util.Env("ROUTER_USE_PROXY_PROTOCOL", ""); useProxy == "true" || useProxy == "TRUE" {
313+
check = metrics.ProxyProtocolHTTPBackendAvailable(u)
314+
}
315+
metrics.Listen(o.ListenAddr, o.StatsUsername, o.StatsPassword, check)
286316
}
287317

288318
pluginCfg := templateplugin.TemplatePluginConfig{
289319
WorkingDir: o.WorkingDir,
290320
TemplatePath: o.TemplateFile,
291321
ReloadScriptPath: o.ReloadScript,
292322
ReloadInterval: o.ReloadInterval,
293-
ReloadCallbacks: reloadCallbacks,
294323
DefaultCertificate: o.DefaultCertificate,
295324
DefaultCertificatePath: o.DefaultCertificatePath,
296325
DefaultCertificateDir: o.DefaultCertificateDir,
297326
DefaultDestinationCAPath: o.DefaultDestinationCAPath,
298-
StatsPort: o.StatsPort,
327+
StatsPort: statsPort,
299328
StatsUsername: o.StatsUsername,
300329
StatsPassword: o.StatsPassword,
301330
PeerService: o.RouterService,

pkg/router/metrics/health.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package metrics
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"io"
7+
"io/ioutil"
8+
"net"
9+
"net/http"
10+
"net/url"
11+
"time"
12+
13+
"github.com/golang/glog"
14+
15+
"k8s.io/apiserver/pkg/server/healthz"
16+
"k8s.io/kubernetes/pkg/probe"
17+
probehttp "k8s.io/kubernetes/pkg/probe/http"
18+
)
19+
20+
var errBackend = fmt.Errorf("backend reported failure")
21+
22+
// HTTPBackendAvailable returns a healthz check that verifies a backend responds to a GET to
23+
// the provided URL with 2xx or 3xx response.
24+
func HTTPBackendAvailable(u *url.URL) healthz.HealthzChecker {
25+
p := probehttp.New()
26+
return healthz.NamedCheck("backend-http", func(r *http.Request) error {
27+
result, _, err := p.Probe(u, nil, 2*time.Second)
28+
if err != nil {
29+
return err
30+
}
31+
if result != probe.Success {
32+
return errBackend
33+
}
34+
return nil
35+
})
36+
}
37+
38+
// ProxyProtocolHTTPBackendAvailable returns a healthz check that verifies a backend supporting
39+
// the HAProxy PROXY protocol responds to a GET to the provided URL with 2xx or 3xx response.
40+
func ProxyProtocolHTTPBackendAvailable(u *url.URL) healthz.HealthzChecker {
41+
dialer := &net.Dialer{
42+
Timeout: 2 * time.Second,
43+
DualStack: true,
44+
}
45+
return healthz.NamedCheck("backend-proxy-http", func(r *http.Request) error {
46+
conn, err := dialer.Dial("tcp", u.Host)
47+
if err != nil {
48+
return err
49+
}
50+
conn.SetDeadline(time.Now().Add(2 * time.Second))
51+
br := bufio.NewReader(conn)
52+
if _, err := conn.Write([]byte("PROXY UNKNOWN\r\n")); err != nil {
53+
return err
54+
}
55+
req := &http.Request{Method: "GET", URL: u, Proto: "HTTP/1.1", ProtoMajor: 1, ProtoMinor: 1}
56+
if err := req.Write(conn); err != nil {
57+
return err
58+
}
59+
res, err := http.ReadResponse(br, req)
60+
if err != nil {
61+
return err
62+
}
63+
64+
// read full body
65+
defer res.Body.Close()
66+
if _, err := io.Copy(ioutil.Discard, res.Body); err != nil {
67+
glog.V(4).Infof("Error discarding probe body contents: %v", err)
68+
}
69+
70+
if res.StatusCode < http.StatusOK && res.StatusCode >= http.StatusBadRequest {
71+
glog.V(4).Infof("Probe failed for %s, Response: %v", u.String(), res)
72+
return errBackend
73+
}
74+
glog.V(4).Infof("Probe succeeded for %s, Response: %v", u.String(), res)
75+
return nil
76+
})
77+
}

pkg/router/metrics/metrics.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ import (
1313
// Listen starts a server for health, metrics, and profiling on the provided listen port.
1414
// It will terminate the process if the server fails. Metrics and profiling are only exposed
1515
// if username and password are provided and the user's input matches.
16-
func Listen(listenAddr string, username, password string) {
16+
func Listen(listenAddr string, username, password string, checks ...healthz.HealthzChecker) {
1717
go func() {
1818
mux := http.NewServeMux()
19-
healthz.InstallHandler(mux)
19+
healthz.InstallHandler(mux, checks...)
2020

2121
// TODO: exclude etcd and other unused metrics
2222

test/integration/router_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,10 @@ func TestRouterBindsPortsAfterSync(t *testing.T) {
16731673
err := wait.Poll(time.Millisecond*100, time.Duration(reloadInterval)*2*time.Second, func() (bool, error) {
16741674
_, err := getRoute(routeAddress, routeAddress, scheme, nil, "")
16751675
lastErr = nil
1676+
1677+
if err != nil && strings.Contains(err.Error(), "connection refused") {
1678+
err = ErrUnavailable
1679+
}
16761680
switch err {
16771681
case ErrUnavailable:
16781682
return true, nil

0 commit comments

Comments
 (0)