Skip to content

Commit d5e025d

Browse files
author
Aritra Basu
committed
vpp-manager: move IPv6 LL discovery after Linux tap up
This patch fixes intermittent DHCPv6/link-local punt loops caused by a startup race in IPv6 link-local discovery. LL discovery ran in configureVppUplinkInterface() before Linux had brought the tap UP, so LL was intermittently missing. Without LL /128 in punt table, punted link-local packets matched fe80::/10 (ip6-link-local DPO), got redirected back to per-interface LL FIB, and looped until VPP recursion guard dropped them. - Add configureIPv6LinkLocal() and call it from runVpp() after configureLinuxTap() (LinkSetUp) and before InterfaceAdminUp(). - Poll for tap LL with bounded retries after tap UP; if not found, return an error so runVpp() terminates VPP. - Program LL-specific state in configureIPv6LinkLocal(): * punt-table LL /128 route * LL address on uplink in VPP * ND proxy for LL - Remove old LL handling from the previous setup path. Signed-off-by: Aritra Basu <aritrbas@cisco.com>
1 parent a3a531d commit d5e025d

File tree

1 file changed

+84
-49
lines changed

1 file changed

+84
-49
lines changed

vpp-manager/vpp_runner.go

Lines changed: 84 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -177,19 +177,81 @@ func (v *VppRunner) configurePunt(tapSwIfIndex uint32, ifState config.LinuxInter
177177
return errors.Wrapf(err, "error adding vpp side routes for interface")
178178
}
179179
}
180-
if ifState.IPv6LinkLocal.IPNet != nil {
181-
err = v.vpp.RouteAdd(&types.Route{
182-
Table: common.PuntTableID,
183-
Dst: common.FullyQualified(ifState.IPv6LinkLocal.IP),
184-
Paths: []types.RoutePath{{
185-
Gw: ifState.IPv6LinkLocal.IP,
186-
SwIfIndex: tapSwIfIndex,
187-
}},
188-
})
180+
return nil
181+
}
182+
183+
// configureIPv6LinkLocal discovers the IPv6 link-local address for the tap
184+
// interface and configures the associated VPP routes and proxy entries.
185+
// This MUST be called AFTER configureLinuxTap() has brought the tap UP,
186+
// because the kernel only generates the link-local address on NETDEV_UP
187+
// (via addrconf_notify). Calling it earlier would race with address
188+
// generation and intermittently fail, leaving the punt table without a
189+
// /128 entry for the link-local address. Without that entry, punted
190+
// link-local traffic (e.g. DHCPv6 on UDP/546) matches the built-in
191+
// fe80::/10 → ip6-link-local DPO, which redirects packets back to the
192+
// per-interface link-local FIB, creating a receive → punt → redirect →
193+
// ip6-link-local → receive loop that VPP drops after 5 iterations.
194+
func (v *VppRunner) configureIPv6LinkLocal(ifSpec *config.UplinkInterfaceSpec, ifState *config.LinuxInterfaceState) error {
195+
if !ifState.HasNodeIP6() {
196+
return nil
197+
}
198+
199+
// Poll for the link-local address. The tap is already UP so the kernel
200+
// should assign it within a few seconds (typically < 1s, DAD may add ~1s).
201+
for i := uint32(0); i < *config.GetCalicoVppDebug().FetchV6LLntries; i++ {
202+
time.Sleep(time.Second)
203+
link, err := netlink.LinkByName(ifSpec.InterfaceName)
189204
if err != nil {
190-
return errors.Wrapf(err, "error adding vpp side routes for interface")
205+
log.WithError(err).Warnf("configureIPv6LinkLocal: cannot find interface %s", ifSpec.InterfaceName)
206+
continue
207+
}
208+
addresses, err := netlink.AddrList(link, netlink.FAMILY_V6)
209+
if err != nil {
210+
log.WithError(err).Warnf("configureIPv6LinkLocal: could not list v6 addresses on %s", ifSpec.InterfaceName)
211+
continue
191212
}
213+
for _, addr := range addresses {
214+
if addr.IP.IsLinkLocalUnicast() {
215+
log.Infof("configureIPv6LinkLocal: using link-local addr %s for %s",
216+
common.FullyQualified(addr.IP), ifSpec.InterfaceName)
217+
ifState.IPv6LinkLocal = addr
218+
goto found
219+
}
220+
}
221+
}
222+
223+
return errors.Errorf("could not find v6 LL address for %s after %ds",
224+
ifSpec.InterfaceName, *config.GetCalicoVppDebug().FetchV6LLntries)
225+
226+
found:
227+
// Add LL /128 route to punt table so that punted link-local traffic
228+
// reaches the host via tap instead of hitting fe80::/10 → ip6-link-local.
229+
err := v.vpp.RouteAdd(&types.Route{
230+
Table: common.PuntTableID,
231+
Dst: common.FullyQualified(ifState.IPv6LinkLocal.IP),
232+
Paths: []types.RoutePath{{
233+
Gw: ifState.IPv6LinkLocal.IP,
234+
SwIfIndex: ifState.TapSwIfIndex,
235+
}},
236+
})
237+
if err != nil {
238+
return errors.Wrapf(err, "error adding LL punt route for %s", ifSpec.InterfaceName)
239+
}
240+
241+
// Add LL address to the uplink interface
242+
err = v.vpp.AddInterfaceAddress(ifSpec.SwIfIndex, common.FullyQualified(ifState.IPv6LinkLocal.IP))
243+
if err != nil {
244+
return errors.Wrapf(err, "Error adding LL address %s to uplink interface %d",
245+
common.FullyQualified(ifState.IPv6LinkLocal.IP), ifSpec.SwIfIndex)
246+
}
247+
248+
// Enable ND proxy for the LL address
249+
err = v.vpp.EnableIP6NdProxy(ifState.TapSwIfIndex, ifState.IPv6LinkLocal.IP)
250+
if err != nil {
251+
return errors.Wrapf(err, "Error configuring ND proxy for LL address %s",
252+
ifState.IPv6LinkLocal.IP.String())
192253
}
254+
193255
return nil
194256
}
195257

@@ -588,34 +650,6 @@ func (v *VppRunner) configureVppUplinkInterface(
588650

589651
ifState.TapSwIfIndex = tapSwIfIndex
590652

591-
if ifState.HasNodeIP6() {
592-
// wait 5s for the interface creation in linux and fetch its LL address
593-
doublebreak:
594-
for i := uint32(0); i <= *config.GetCalicoVppDebug().FetchV6LLntries; i++ {
595-
time.Sleep(time.Second)
596-
link, err := netlink.LinkByName(ifSpec.InterfaceName)
597-
if err != nil {
598-
log.WithError(err).Warnf("cannot find interface %s", ifSpec.InterfaceName)
599-
continue
600-
}
601-
addresses, err := netlink.AddrList(link, netlink.FAMILY_V6)
602-
if err != nil {
603-
log.WithError(err).Warnf("could not find v6 address on link %s", ifSpec.InterfaceName)
604-
continue
605-
}
606-
for _, addr := range addresses { // addresses are only v6 here see above
607-
if addr.IP.IsLinkLocalUnicast() {
608-
log.Infof("Using link-local addr %s for %s", common.FullyQualified(addr.IP), ifSpec.InterfaceName)
609-
ifState.IPv6LinkLocal = addr
610-
break doublebreak
611-
}
612-
}
613-
if i == *config.GetCalicoVppDebug().FetchV6LLntries-1 {
614-
log.Warnf("Could not find v6 LL address for %s after %ds", ifSpec.InterfaceName, *config.GetCalicoVppDebug().FetchV6LLntries)
615-
}
616-
}
617-
}
618-
619653
vrfs, err := v.setupTapVRF(&ifSpec, ifState, tapSwIfIndex)
620654
if err != nil {
621655
return errors.Wrap(err, "error configuring VRF for tap")
@@ -662,17 +696,6 @@ func (v *VppRunner) configureVppUplinkInterface(
662696
}
663697
}
664698

665-
if ifState.IPv6LinkLocal.IPNet != nil {
666-
err = v.vpp.AddInterfaceAddress(ifSpec.SwIfIndex, common.FullyQualified(ifState.IPv6LinkLocal.IP))
667-
if err != nil {
668-
return errors.Wrapf(err, "Error adding address %s to uplink interface: %d", common.FullyQualified(ifState.IPv6LinkLocal.IP), ifSpec.SwIfIndex)
669-
}
670-
err = v.vpp.EnableIP6NdProxy(tapSwIfIndex, ifState.IPv6LinkLocal.IP)
671-
if err != nil {
672-
return errors.Wrapf(err, "Error configuring nd proxy for address %s", ifState.IPv6LinkLocal.IP.String())
673-
}
674-
}
675-
676699
/*
677700
* Add ND proxy for IPv6 gateway addresses.
678701
* Without ND proxy for gateway, host's NS for gateway is dropped with "neighbor
@@ -1079,6 +1102,18 @@ func (v *VppRunner) runVpp() (err error) {
10791102
}
10801103
}
10811104

1105+
// Discover IPv6 link-local addresses and configure punt table routes,
1106+
// uplink addresses, and ND proxy. This runs after configureLinuxTap()
1107+
// has brought the taps UP, so the kernel has generated the LL addresses.
1108+
for idx := 0; idx < len(v.params.UplinksSpecs); idx++ {
1109+
err = v.configureIPv6LinkLocal(&v.params.UplinksSpecs[idx], v.conf[idx])
1110+
if err != nil {
1111+
terminateVpp("Error configuring IPv6 link-local for %s: %v", v.params.UplinksSpecs[idx].InterfaceName, err)
1112+
<-vppDeadChan
1113+
return errors.Wrapf(err, "Error configuring IPv6 link-local for %s", v.params.UplinksSpecs[idx].InterfaceName)
1114+
}
1115+
}
1116+
10821117
// Set the TAP interfaces admin-up in VPP last, after all Linux-side
10831118
// configuration is complete.
10841119
for idx := 0; idx < len(v.params.UplinksSpecs); idx++ {

0 commit comments

Comments
 (0)