@@ -25,6 +25,7 @@ import (
25
25
"k8s.io/client-go/tools/record"
26
26
"k8s.io/klog/v2"
27
27
utilnet "k8s.io/utils/net"
28
+ "sigs.k8s.io/knftables"
28
29
29
30
"github.com/ovn-org/libovsdb/client"
30
31
@@ -40,6 +41,7 @@ import (
40
41
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice"
41
42
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager"
42
43
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport"
44
+ nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables"
43
45
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning"
44
46
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager"
45
47
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute"
@@ -117,6 +119,9 @@ type DefaultNodeNetworkController struct {
117
119
// retry framework for endpoint slices, used for the removal of stale conntrack entries for services
118
120
retryEndpointSlices * retry.RetryFramework
119
121
122
+ // retry framework for nodes, used for updating routes/nftables rules for node PMTUD guarding
123
+ retryNodes * retry.RetryFramework
124
+
120
125
apbExternalRouteNodeController * apbroute.ExternalGatewayNodeController
121
126
122
127
networkManager networkmanager.Interface
@@ -181,12 +186,23 @@ func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, net
181
186
182
187
nc .initRetryFrameworkForNode ()
183
188
189
+ err = setupPMTUDNFTSets ()
190
+ if err != nil {
191
+ return nil , fmt .Errorf ("failed to setup PMTUD nftables sets: %w" , err )
192
+ }
193
+
194
+ err = setupPMTUDNFTChain ()
195
+ if err != nil {
196
+ return nil , fmt .Errorf ("failed to setup PMTUD nftables chain: %w" , err )
197
+ }
198
+
184
199
return nc , nil
185
200
}
186
201
187
202
func (nc * DefaultNodeNetworkController ) initRetryFrameworkForNode () {
188
203
nc .retryNamespaces = nc .newRetryFrameworkNode (factory .NamespaceExGwType )
189
204
nc .retryEndpointSlices = nc .newRetryFrameworkNode (factory .EndpointSliceForStaleConntrackRemovalType )
205
+ nc .retryNodes = nc .newRetryFrameworkNode (factory .NodeType )
190
206
}
191
207
192
208
func (oc * DefaultNodeNetworkController ) shouldReconcileNetworkChange (old , new util.NetInfo ) bool {
@@ -1238,6 +1254,10 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
1238
1254
if err != nil {
1239
1255
return fmt .Errorf ("failed to watch endpointSlices: %w" , err )
1240
1256
}
1257
+ err = nc .WatchNodes ()
1258
+ if err != nil {
1259
+ return fmt .Errorf ("failed to watch nodes: %w" , err )
1260
+ }
1241
1261
}
1242
1262
1243
1263
if nc .healthzServer != nil {
@@ -1445,6 +1465,144 @@ func (nc *DefaultNodeNetworkController) WatchNamespaces() error {
1445
1465
return err
1446
1466
}
1447
1467
1468
+ func (nc * DefaultNodeNetworkController ) WatchNodes () error {
1469
+ _ , err := nc .retryNodes .WatchResource ()
1470
+ return err
1471
+ }
1472
+
1473
+ // addOrUpdateNode handles creating flows or nftables rules for each node to handle PMTUD
1474
+ func (nc * DefaultNodeNetworkController ) addOrUpdateNode (node * corev1.Node ) error {
1475
+ var nftElems []* knftables.Element
1476
+ var addrs []string
1477
+ for _ , address := range node .Status .Addresses {
1478
+ if address .Type != corev1 .NodeInternalIP {
1479
+ continue
1480
+ }
1481
+ nodeIP := net .ParseIP (address .Address )
1482
+ if nodeIP == nil {
1483
+ continue
1484
+ }
1485
+
1486
+ addrs = append (addrs , nodeIP .String ())
1487
+ klog .Infof ("Adding remote node %q, IP: %s to PMTUD blocking rules" , node .Name , nodeIP )
1488
+ if utilnet .IsIPv4 (nodeIP ) {
1489
+ nftElems = append (nftElems , & knftables.Element {
1490
+ Set : types .NFTNoPMTUDRemoteNodeIPsv4 ,
1491
+ Key : []string {nodeIP .String ()},
1492
+ })
1493
+ } else {
1494
+ nftElems = append (nftElems , & knftables.Element {
1495
+ Set : types .NFTNoPMTUDRemoteNodeIPsv6 ,
1496
+ Key : []string {nodeIP .String ()},
1497
+ })
1498
+ }
1499
+ }
1500
+
1501
+ gw := nc .Gateway .(* gateway )
1502
+ gw .openflowManager .updateBridgePMTUDFlowCache (getPMTUDKey (node .Name ), addrs )
1503
+
1504
+ if len (nftElems ) > 0 {
1505
+ if err := nodenft .UpdateNFTElements (nftElems ); err != nil {
1506
+ return fmt .Errorf ("unable to update NFT elements for node %q, error: %w" , node .Name , err )
1507
+ }
1508
+ }
1509
+
1510
+ return nil
1511
+ }
1512
+
1513
+ func removePMTUDNodeNFTRules (nodeIPs []net.IP ) error {
1514
+ var nftElems []* knftables.Element
1515
+ for _ , nodeIP := range nodeIPs {
1516
+ // Remove IPs from NFT sets
1517
+ if utilnet .IsIPv4 (nodeIP ) {
1518
+ nftElems = append (nftElems , & knftables.Element {
1519
+ Set : types .NFTNoPMTUDRemoteNodeIPsv4 ,
1520
+ Key : []string {nodeIP .String ()},
1521
+ })
1522
+ } else {
1523
+ nftElems = append (nftElems , & knftables.Element {
1524
+ Set : types .NFTNoPMTUDRemoteNodeIPsv6 ,
1525
+ Key : []string {nodeIP .String ()},
1526
+ })
1527
+ }
1528
+ }
1529
+ if len (nftElems ) > 0 {
1530
+ if err := nodenft .DeleteNFTElements (nftElems ); err != nil {
1531
+ return err
1532
+ }
1533
+ }
1534
+ return nil
1535
+ }
1536
+
1537
+ func (nc * DefaultNodeNetworkController ) deleteNode (node * corev1.Node ) {
1538
+ gw := nc .Gateway .(* gateway )
1539
+ gw .openflowManager .deleteFlowsByKey (getPMTUDKey (node .Name ))
1540
+ ipsToRemove := make ([]net.IP , 0 )
1541
+ for _ , address := range node .Status .Addresses {
1542
+ if address .Type != corev1 .NodeInternalIP {
1543
+ continue
1544
+ }
1545
+ nodeIP := net .ParseIP (address .Address )
1546
+ if nodeIP == nil {
1547
+ continue
1548
+ }
1549
+ ipsToRemove = append (ipsToRemove , nodeIP )
1550
+ }
1551
+
1552
+ klog .Infof ("Deleting NFT elements for node: %s" , node .Name )
1553
+ if err := removePMTUDNodeNFTRules (ipsToRemove ); err != nil {
1554
+ klog .Errorf ("Failed to delete nftables rules for PMTUD blocking for node %q: %v" , node .Name , err )
1555
+ }
1556
+ }
1557
+
1558
+ func (nc * DefaultNodeNetworkController ) syncNodes (objs []interface {}) error {
1559
+ var keepNFTSetElemsV4 , keepNFTSetElemsV6 []* knftables.Element
1560
+ var errors []error
1561
+ klog .Infof ("Starting node controller node sync" )
1562
+ start := time .Now ()
1563
+ for _ , obj := range objs {
1564
+ node , ok := obj .(* corev1.Node )
1565
+ if ! ok {
1566
+ klog .Errorf ("Spurious object in syncNodes: %v" , obj )
1567
+ continue
1568
+ }
1569
+ if node .Name == nc .name {
1570
+ continue
1571
+ }
1572
+ for _ , address := range node .Status .Addresses {
1573
+ if address .Type != corev1 .NodeInternalIP {
1574
+ continue
1575
+ }
1576
+ nodeIP := net .ParseIP (address .Address )
1577
+ if nodeIP == nil {
1578
+ continue
1579
+ }
1580
+
1581
+ // Remove IPs from NFT sets
1582
+ if utilnet .IsIPv4 (nodeIP ) {
1583
+ keepNFTSetElemsV4 = append (keepNFTSetElemsV4 , & knftables.Element {
1584
+ Set : types .NFTNoPMTUDRemoteNodeIPsv4 ,
1585
+ Key : []string {nodeIP .String ()},
1586
+ })
1587
+ } else {
1588
+ keepNFTSetElemsV6 = append (keepNFTSetElemsV6 , & knftables.Element {
1589
+ Set : types .NFTNoPMTUDRemoteNodeIPsv6 ,
1590
+ Key : []string {nodeIP .String ()},
1591
+ })
1592
+ }
1593
+ }
1594
+ }
1595
+ if err := recreateNFTSet (types .NFTNoPMTUDRemoteNodeIPsv4 , keepNFTSetElemsV4 ); err != nil {
1596
+ errors = append (errors , err )
1597
+ }
1598
+ if err := recreateNFTSet (types .NFTNoPMTUDRemoteNodeIPsv6 , keepNFTSetElemsV6 ); err != nil {
1599
+ errors = append (errors , err )
1600
+ }
1601
+
1602
+ klog .Infof ("Node controller node sync done. Time taken: %s" , time .Since (start ))
1603
+ return utilerrors .Join (errors ... )
1604
+ }
1605
+
1448
1606
// validateVTEPInterfaceMTU checks if the MTU of the interface that has ovn-encap-ip is big
1449
1607
// enough to carry the `config.Default.MTU` and the Geneve header. If the MTU is not big
1450
1608
// enough, it will return an error
@@ -1485,6 +1643,10 @@ func (nc *DefaultNodeNetworkController) validateVTEPInterfaceMTU() error {
1485
1643
return nil
1486
1644
}
1487
1645
1646
+ func getPMTUDKey (nodeName string ) string {
1647
+ return fmt .Sprintf ("%s_pmtud" , nodeName )
1648
+ }
1649
+
1488
1650
func configureSvcRouteViaBridge (routeManager * routemanager.Controller , bridge string ) error {
1489
1651
return configureSvcRouteViaInterface (routeManager , bridge , DummyNextHopIPs ())
1490
1652
}
0 commit comments