Skip to content

Add proper error message to OVS health check #17890

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pkg/network/node/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func waitForOVS(network, addr string) error {
// from the OVS server and then checks healthFn, and one that periodically checks
// healthFn. If healthFn returns false in either of these two cases while the OVS
// server is responsive the node process will terminate.
func runOVSHealthCheck(network, addr string, healthFn func() bool) {
func runOVSHealthCheck(network, addr string, healthFn func() error) {
// this loop holds an open socket connection to OVS until it times out, then
// checks for health
go utilwait.Until(func() {
Expand All @@ -67,8 +67,8 @@ func runOVSHealthCheck(network, addr string, healthFn func() bool) {
glog.V(2).Infof("SDN healthcheck unable to ping OVS server: %v", err)
return false, nil
}
if !healthFn() {
return false, fmt.Errorf("OVS health check failed")
if err := healthFn(); err != nil {
return false, fmt.Errorf("OVS health check failed: %v", err)
}
return true, nil
})
Expand All @@ -92,7 +92,7 @@ func runOVSHealthCheck(network, addr string, healthFn func() bool) {
glog.V(2).Infof("SDN healthcheck unable to ping OVS server: %v", err)
return
}
if !healthFn() {
if err := healthFn(); err != nil {
glog.Fatalf("SDN healthcheck detected unhealthy OVS server, restarting: %v", err)
}
glog.V(4).Infof("SDN healthcheck succeeded")
Expand Down
25 changes: 13 additions & 12 deletions pkg/network/node/sdn_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package node

import (
"errors"
"fmt"
"net"
"time"
Expand All @@ -20,17 +21,17 @@ import (
"github.com/vishvananda/netlink"
)

func (plugin *OsdnNode) alreadySetUp(localSubnetGatewayCIDR string, clusterNetworkCIDR []string) bool {
func (plugin *OsdnNode) alreadySetUp(localSubnetGatewayCIDR string, clusterNetworkCIDR []string) error {
var found bool

l, err := netlink.LinkByName(Tun0)
if err != nil {
return false
return err
}

addrs, err := netlink.AddrList(l, netlink.FAMILY_V4)
if err != nil {
return false
return err
}
found = false
for _, addr := range addrs {
Expand All @@ -40,12 +41,12 @@ func (plugin *OsdnNode) alreadySetUp(localSubnetGatewayCIDR string, clusterNetwo
}
}
if !found {
return false
return errors.New("local subnet gateway CIDR not found")
}

routes, err := netlink.RouteList(l, netlink.FAMILY_V4)
if err != nil {
return false
return err
}
for _, clusterCIDR := range clusterNetworkCIDR {
found = false
Expand All @@ -56,15 +57,15 @@ func (plugin *OsdnNode) alreadySetUp(localSubnetGatewayCIDR string, clusterNetwo
}
}
if !found {
return false
return errors.New("cluster CIDR not found")
}
}

if !plugin.oc.AlreadySetUp() {
return false
return errors.New("plugin is not setup")
}

return true
return nil
}

func deleteLocalSubnetRoute(device, localSubnetCIDR string) {
Expand Down Expand Up @@ -96,7 +97,7 @@ func deleteLocalSubnetRoute(device, localSubnetCIDR string) {
})

if err != nil {
glog.Errorf("Error removing %s route from dev %s: %v; if the route appears later it will not be deleted.", localSubnetCIDR, device, err)
glog.Errorf("error removing %s route from dev %s: %v; if the route appears later it will not be deleted.", localSubnetCIDR, device, err)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our convention is user returned errors start with lower case and log messages start with upper case. In this case 'Error removing...' is fine as this is a log msg.

}
}

Expand Down Expand Up @@ -133,8 +134,8 @@ func (plugin *OsdnNode) SetupSDN() (bool, error) {
}

var changed bool
if plugin.alreadySetUp(gwCIDR, clusterNetworkCIDRs) {
glog.V(5).Infof("[SDN setup] no SDN setup required")
if err := plugin.alreadySetUp(gwCIDR, clusterNetworkCIDRs); err == nil {
glog.V(5).Infof("[SDN setup] no SDN setup required: %v", err)
} else {
glog.Infof("[SDN setup] full SDN setup required")
if err := plugin.setup(clusterNetworkCIDRs, localSubnetCIDR, localSubnetGateway, gwCIDR); err != nil {
Expand All @@ -145,7 +146,7 @@ func (plugin *OsdnNode) SetupSDN() (bool, error) {

// TODO: make it possible to safely reestablish node configuration after restart
// If OVS goes down and fails the health check, restart the entire process
healthFn := func() bool { return plugin.alreadySetUp(gwCIDR, clusterNetworkCIDRs) }
healthFn := func() error { return plugin.alreadySetUp(gwCIDR, clusterNetworkCIDRs) }
runOVSHealthCheck(ovsDialDefaultNetwork, ovsDialDefaultAddress, healthFn)

return changed, nil
Expand Down