Skip to content

Commit a21509b

Browse files
committed
Rearrange egressip internals, add duplication tests
There should never be multiple HostSubnets or multiple NetNamespaces claiming the same egress IP, but if there are, we need to track them carefully so we don't get out sync with reality after things are fixed.
1 parent b10a138 commit a21509b

File tree

2 files changed

+295
-124
lines changed

2 files changed

+295
-124
lines changed

pkg/network/node/egressip.go

Lines changed: 166 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,25 @@ import (
2020
)
2121

2222
type nodeEgress struct {
23-
nodeIP string
24-
25-
// requestedIPs are the EgressIPs listed on the node's HostSubnet
23+
nodeIP string
2624
requestedIPs sets.String
27-
// assignedIPs are the IPs actually in use on the node
28-
assignedIPs sets.String
2925
}
3026

3127
type namespaceEgress struct {
32-
vnid uint32
33-
34-
// requestedIP is the egress IP it wants (NetNamespace.EgressIPs[0])
28+
vnid uint32
3529
requestedIP string
36-
// assignedIP is an egress IP actually in use on nodeIP
37-
assignedIP string
38-
nodeIP string
30+
}
31+
32+
type egressIPInfo struct {
33+
ip string
34+
35+
nodes []*nodeEgress
36+
namespaces []*namespaceEgress
37+
38+
assignedNodeIP string
39+
assignedIPTablesMark string
40+
assignedVNID uint32
41+
blockedVNIDs map[uint32]bool
3942
}
4043

4144
type egressIPWatcher struct {
@@ -48,13 +51,9 @@ type egressIPWatcher struct {
4851
networkInformers networkinformers.SharedInformerFactory
4952
iptables *NodeIPTables
5053

51-
// from HostSubnets
52-
nodesByNodeIP map[string]*nodeEgress
53-
nodesByEgressIP map[string]*nodeEgress
54-
55-
// From NetNamespaces
56-
namespacesByVNID map[uint32]*namespaceEgress
57-
namespacesByEgressIP map[string]*namespaceEgress
54+
nodesByNodeIP map[string]*nodeEgress
55+
namespacesByVNID map[uint32]*namespaceEgress
56+
egressIPs map[string]*egressIPInfo
5857

5958
localEgressLink netlink.Link
6059
localEgressNet *net.IPNet
@@ -67,11 +66,9 @@ func newEgressIPWatcher(oc *ovsController, localIP string, masqueradeBit *int32)
6766
oc: oc,
6867
localIP: localIP,
6968

70-
nodesByNodeIP: make(map[string]*nodeEgress),
71-
nodesByEgressIP: make(map[string]*nodeEgress),
72-
73-
namespacesByVNID: make(map[uint32]*namespaceEgress),
74-
namespacesByEgressIP: make(map[string]*namespaceEgress),
69+
nodesByNodeIP: make(map[string]*nodeEgress),
70+
namespacesByVNID: make(map[uint32]*namespaceEgress),
71+
egressIPs: make(map[string]*egressIPInfo),
7572
}
7673
if masqueradeBit != nil {
7774
eip.masqueradeBit = 1 << uint32(*masqueradeBit)
@@ -106,6 +103,47 @@ func getMarkForVNID(vnid, masqueradeBit uint32) string {
106103
return fmt.Sprintf("0x%08x", vnid)
107104
}
108105

106+
func (eip *egressIPWatcher) ensureEgressIPInfo(egressIP string) *egressIPInfo {
107+
eg := eip.egressIPs[egressIP]
108+
if eg == nil {
109+
eg = &egressIPInfo{ip: egressIP}
110+
eip.egressIPs[egressIP] = eg
111+
}
112+
return eg
113+
}
114+
115+
func (eg *egressIPInfo) addNode(node *nodeEgress) {
116+
if len(eg.nodes) != 0 {
117+
utilruntime.HandleError(fmt.Errorf("Multiple nodes claiming EgressIP %q (nodes %q, %q)", eg.ip, node.nodeIP, eg.nodes[0].nodeIP))
118+
}
119+
eg.nodes = append(eg.nodes, node)
120+
}
121+
122+
func (eg *egressIPInfo) deleteNode(node *nodeEgress) {
123+
for i := range eg.nodes {
124+
if eg.nodes[i] == node {
125+
eg.nodes = append(eg.nodes[:i], eg.nodes[i+1:]...)
126+
return
127+
}
128+
}
129+
}
130+
131+
func (eg *egressIPInfo) addNamespace(ns *namespaceEgress) {
132+
if len(eg.namespaces) != 0 {
133+
utilruntime.HandleError(fmt.Errorf("Multiple namespaces claiming EgressIP %q (NetIDs %d, %d)", eg.ip, ns.vnid, eg.namespaces[0].vnid))
134+
}
135+
eg.namespaces = append(eg.namespaces, ns)
136+
}
137+
138+
func (eg *egressIPInfo) deleteNamespace(ns *namespaceEgress) {
139+
for i := range eg.namespaces {
140+
if eg.namespaces[i] == ns {
141+
eg.namespaces = append(eg.namespaces[:i], eg.namespaces[i+1:]...)
142+
return
143+
}
144+
}
145+
}
146+
109147
func (eip *egressIPWatcher) watchHostSubnets() {
110148
funcs := common.InformerFuncs(&networkapi.HostSubnet{}, eip.handleAddOrUpdateHostSubnet, eip.handleDeleteHostSubnet)
111149
eip.networkInformers.Network().InternalVersion().HostSubnets().Informer().AddEventHandler(funcs)
@@ -137,7 +175,6 @@ func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []stri
137175
node = &nodeEgress{
138176
nodeIP: nodeIP,
139177
requestedIPs: sets.NewString(),
140-
assignedIPs: sets.NewString(),
141178
}
142179
eip.nodesByNodeIP[nodeIP] = node
143180
} else if len(nodeEgressIPs) == 0 {
@@ -148,89 +185,19 @@ func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []stri
148185

149186
// Process new EgressIPs
150187
for _, ip := range node.requestedIPs.Difference(oldRequestedIPs).UnsortedList() {
151-
if oldNode := eip.nodesByEgressIP[ip]; oldNode != nil {
152-
utilruntime.HandleError(fmt.Errorf("Multiple nodes claiming EgressIP %q (nodes %q, %q)", ip, node.nodeIP, oldNode.nodeIP))
153-
continue
154-
}
155-
156-
eip.nodesByEgressIP[ip] = node
157-
eip.maybeAddEgressIP(ip)
188+
eg := eip.ensureEgressIPInfo(ip)
189+
eg.addNode(node)
190+
eip.syncEgressIP(eg)
158191
}
159192

160193
// Process removed EgressIPs
161194
for _, ip := range oldRequestedIPs.Difference(node.requestedIPs).UnsortedList() {
162-
if oldNode := eip.nodesByEgressIP[ip]; oldNode != node {
163-
// User removed a duplicate EgressIP
195+
eg := eip.egressIPs[ip]
196+
if eg == nil {
164197
continue
165198
}
166-
167-
eip.deleteEgressIP(ip)
168-
delete(eip.nodesByEgressIP, ip)
169-
}
170-
}
171-
172-
func (eip *egressIPWatcher) maybeAddEgressIP(egressIP string) {
173-
node := eip.nodesByEgressIP[egressIP]
174-
ns := eip.namespacesByEgressIP[egressIP]
175-
if ns == nil {
176-
return
177-
}
178-
179-
mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
180-
nodeIP := ""
181-
182-
if node != nil && !node.assignedIPs.Has(egressIP) {
183-
node.assignedIPs.Insert(egressIP)
184-
nodeIP = node.nodeIP
185-
if node.nodeIP == eip.localIP {
186-
if err := eip.assignEgressIP(egressIP, mark); err != nil {
187-
utilruntime.HandleError(fmt.Errorf("Error assigning Egress IP %q: %v", egressIP, err))
188-
nodeIP = ""
189-
}
190-
}
191-
}
192-
193-
if ns.assignedIP != egressIP || ns.nodeIP != nodeIP {
194-
ns.assignedIP = egressIP
195-
ns.nodeIP = nodeIP
196-
197-
err := eip.oc.SetNamespaceEgressViaEgressIP(ns.vnid, ns.nodeIP, mark)
198-
if err != nil {
199-
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
200-
}
201-
}
202-
}
203-
204-
func (eip *egressIPWatcher) deleteEgressIP(egressIP string) {
205-
node := eip.nodesByEgressIP[egressIP]
206-
ns := eip.namespacesByEgressIP[egressIP]
207-
if node == nil || ns == nil {
208-
return
209-
}
210-
211-
mark := getMarkForVNID(ns.vnid, eip.masqueradeBit)
212-
if node.nodeIP == eip.localIP {
213-
if err := eip.releaseEgressIP(egressIP, mark); err != nil {
214-
utilruntime.HandleError(fmt.Errorf("Error releasing Egress IP %q: %v", egressIP, err))
215-
}
216-
node.assignedIPs.Delete(egressIP)
217-
}
218-
219-
if ns.assignedIP == egressIP {
220-
ns.assignedIP = ""
221-
ns.nodeIP = ""
222-
}
223-
224-
var err error
225-
if ns.requestedIP == "" {
226-
// Namespace no longer wants EgressIP
227-
err = eip.oc.SetNamespaceEgressNormal(ns.vnid)
228-
} else {
229-
// Namespace still wants EgressIP but no node provides it
230-
err = eip.oc.SetNamespaceEgressDropped(ns.vnid)
231-
}
232-
if err != nil {
233-
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
199+
eg.deleteNode(node)
200+
eip.syncEgressIP(eg)
234201
}
235202
}
236203

@@ -266,45 +233,120 @@ func (eip *egressIPWatcher) updateNamespaceEgress(vnid uint32, egressIP string)
266233

267234
ns := eip.namespacesByVNID[vnid]
268235
if ns == nil {
236+
if egressIP == "" {
237+
return
238+
}
269239
ns = &namespaceEgress{vnid: vnid}
270240
eip.namespacesByVNID[vnid] = ns
241+
} else if egressIP == "" {
242+
delete(eip.namespacesByVNID, vnid)
271243
}
244+
272245
if ns.requestedIP == egressIP {
273246
return
274247
}
275-
if oldNS := eip.namespacesByEgressIP[egressIP]; oldNS != nil {
276-
utilruntime.HandleError(fmt.Errorf("Multiple NetNamespaces claiming EgressIP %q (NetIDs %d, %d)", egressIP, ns.vnid, oldNS.vnid))
277-
return
278-
}
279248

280-
if ns.assignedIP != "" {
281-
oldEgressIP := ns.assignedIP
282-
eip.deleteEgressIP(oldEgressIP)
283-
delete(eip.namespacesByEgressIP, oldEgressIP)
284-
ns.assignedIP = ""
285-
ns.nodeIP = ""
249+
if ns.requestedIP != "" {
250+
eg := eip.egressIPs[ns.requestedIP]
251+
if eg != nil {
252+
eg.deleteNamespace(ns)
253+
eip.syncEgressIP(eg)
254+
}
286255
}
256+
287257
ns.requestedIP = egressIP
288-
eip.namespacesByEgressIP[egressIP] = ns
289-
eip.maybeAddEgressIP(egressIP)
258+
if egressIP == "" {
259+
return
260+
}
261+
262+
eg := eip.ensureEgressIPInfo(egressIP)
263+
eg.addNamespace(ns)
264+
eip.syncEgressIP(eg)
290265
}
291266

292267
func (eip *egressIPWatcher) deleteNamespaceEgress(vnid uint32) {
293-
eip.Lock()
294-
defer eip.Unlock()
268+
eip.updateNamespaceEgress(vnid, "")
269+
}
295270

296-
ns := eip.namespacesByVNID[vnid]
297-
if ns == nil {
298-
return
271+
func (eip *egressIPWatcher) syncEgressIP(eg *egressIPInfo) {
272+
assignedNodeIPChanged := eip.syncEgressIPTablesState(eg)
273+
eip.syncEgressOVSState(eg, assignedNodeIPChanged)
274+
}
275+
276+
func (eip *egressIPWatcher) syncEgressIPTablesState(eg *egressIPInfo) bool {
277+
// The egressIPInfo should have an assigned node IP if and only if the
278+
// egress IP is active (ie, it is assigned to exactly 1 node and exactly
279+
// 1 namespace).
280+
egressIPActive := (len(eg.nodes) == 1 && len(eg.namespaces) == 1)
281+
assignedNodeIPChanged := false
282+
if egressIPActive && eg.assignedNodeIP != eg.nodes[0].nodeIP {
283+
eg.assignedNodeIP = eg.nodes[0].nodeIP
284+
eg.assignedIPTablesMark = getMarkForVNID(eg.namespaces[0].vnid, eip.masqueradeBit)
285+
assignedNodeIPChanged = true
286+
if eg.assignedNodeIP == eip.localIP {
287+
if err := eip.assignEgressIP(eg.ip, eg.assignedIPTablesMark); err != nil {
288+
utilruntime.HandleError(fmt.Errorf("Error assigning Egress IP %q: %v", eg.ip, err))
289+
eg.assignedNodeIP = ""
290+
}
291+
}
292+
} else if !egressIPActive && eg.assignedNodeIP != "" {
293+
if eg.assignedNodeIP == eip.localIP {
294+
if err := eip.releaseEgressIP(eg.ip, eg.assignedIPTablesMark); err != nil {
295+
utilruntime.HandleError(fmt.Errorf("Error releasing Egress IP %q: %v", eg.ip, err))
296+
}
297+
}
298+
eg.assignedNodeIP = ""
299+
eg.assignedIPTablesMark = ""
300+
assignedNodeIPChanged = true
301+
}
302+
return assignedNodeIPChanged
303+
}
304+
305+
func (eip *egressIPWatcher) syncEgressOVSState(eg *egressIPInfo, assignedNodeIPChanged bool) {
306+
var blockedVNIDs map[uint32]bool
307+
308+
// If multiple namespaces are assigned to the same EgressIP, we need to block
309+
// outgoing traffic from all of them.
310+
if len(eg.namespaces) > 1 {
311+
eg.assignedVNID = 0
312+
blockedVNIDs = make(map[uint32]bool)
313+
for _, ns := range eg.namespaces {
314+
blockedVNIDs[ns.vnid] = true
315+
if !eg.blockedVNIDs[ns.vnid] {
316+
err := eip.oc.SetNamespaceEgressDropped(ns.vnid)
317+
if err != nil {
318+
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
319+
}
320+
}
321+
}
322+
}
323+
324+
// If we have, or had, a single egress namespace, then update the OVS flows if
325+
// something has changed
326+
var err error
327+
if len(eg.namespaces) == 1 && (eg.assignedVNID != eg.namespaces[0].vnid || assignedNodeIPChanged) {
328+
eg.assignedVNID = eg.namespaces[0].vnid
329+
delete(eg.blockedVNIDs, eg.assignedVNID)
330+
err = eip.oc.SetNamespaceEgressViaEgressIP(eg.assignedVNID, eg.assignedNodeIP, getMarkForVNID(eg.assignedVNID, eip.masqueradeBit))
331+
} else if len(eg.namespaces) == 0 && eg.assignedVNID != 0 {
332+
err = eip.oc.SetNamespaceEgressNormal(eg.assignedVNID)
333+
eg.assignedVNID = 0
334+
}
335+
if err != nil {
336+
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
299337
}
300338

301-
if ns.assignedIP != "" {
302-
ns.requestedIP = ""
303-
egressIP := ns.assignedIP
304-
eip.deleteEgressIP(egressIP)
305-
delete(eip.namespacesByEgressIP, egressIP)
339+
// If we previously had blocked VNIDs, we need to unblock any that have been removed
340+
// from the duplicates list
341+
for vnid := range eg.blockedVNIDs {
342+
if !blockedVNIDs[vnid] {
343+
err := eip.oc.SetNamespaceEgressNormal(vnid)
344+
if err != nil {
345+
utilruntime.HandleError(fmt.Errorf("Error updating Namespace egress rules: %v", err))
346+
}
347+
}
306348
}
307-
delete(eip.namespacesByVNID, vnid)
349+
eg.blockedVNIDs = blockedVNIDs
308350
}
309351

310352
func (eip *egressIPWatcher) assignEgressIP(egressIP, mark string) error {

0 commit comments

Comments
 (0)