Patch looks good to me. Applied.

Thanks for that.


Simon.

On 08.05.2026 22:46, Sagie D. wrote:
Declaring struct iovec buff as static indeed resolves the memory
ballooning issue, and is the better approach because it minimizes the
heap fragmentation, as you pointed out.

Following is a patch that adds IPv6 neighbor support. Note the diff is
against the previous bpf.c version (without the static declarations).

------------------------------------------------------

The arp_enumerate() function (for *BSD) is extended into a per-family
helper arp_enumerate_family(), called sequentially for AF_INET and
AF_INET6, allowing the NDP neighbour cache to be enumerated alongside
the ARP table. An empty table for either family is treated as vacuous
success rather than an error. Both tables are acquired in *BSD via
PF_ROUTE sysctl calls that return raw kernel structures; consequently,
for IPv6, the sockaddr_in6 peer address extracted from them has an
embedded link-local scope ID. This ID is extracted into sin6_scope_id,
and bytes 2-3 of the address are then cleared, per the KAME API
contract.

Signed-off-by: Sagie D.
---
diff --git a/src/bpf.c b/src/bpf.c
index dd67735..8543b26 100644
--- a/src/bpf.c
+++ b/src/bpf.c
@@ -47,56 +47,84 @@ static union all_addr del_addr;

  #if defined(HAVE_BSD_NETWORK) && !defined(__APPLE__)

-int arp_enumerate(void *parm, callback_t callback)
+static int arp_enumerate_family(int family, void *parm, callback_t callback)
  {
    int mib[6];
    size_t needed;
    char *next;
    struct rt_msghdr *rtm;
-  struct sockaddr_inarp *sin2;
    struct sockaddr_dl *sdl;
-  struct iovec buff;
+  static struct iovec buff = { NULL, 0 };
    int rc;

-  buff.iov_base = NULL;
-  buff.iov_len = 0;
-
    mib[0] = CTL_NET;
    mib[1] = PF_ROUTE;
    mib[2] = 0;
-  mib[3] = AF_INET;
+  mib[3] = family;
    mib[4] = NET_RT_FLAGS;
  #ifdef RTF_LLINFO
    mib[5] = RTF_LLINFO;
  #else
    mib[5] = 0;
-#endif
+#endif
+
    if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1 || needed == 0)
-    return 0;
+    return 1;  /* not a failure: unsupported or empty table */

-  while (1)
+  while (1)
      {
        if (!expand_buf(&buff, needed))
-    return 0;
+        return 0;
        if ((rc = sysctl(mib, 6, buff.iov_base, &needed, NULL, 0)) == 0 ||
-      errno != ENOMEM)
-    break;
+          errno != ENOMEM)
+        break;
        needed += needed / 8;
      }
+
    if (rc == -1)
      return 0;
-
-  for (next = buff.iov_base ; next < (char *)buff.iov_base + needed;
next += rtm->rtm_msglen)
+
+  for (next = buff.iov_base; next < (char *)buff.iov_base + needed;
next += rtm->rtm_msglen)
      {
        rtm = (struct rt_msghdr *)next;
-      sin2 = (struct sockaddr_inarp *)(rtm + 1);
-      sdl = (struct sockaddr_dl *)((char *)sin2 + SA_SIZE(sin2));
-      if (!callback.af_unspec(AF_INET, &sin2->sin_addr, LLADDR(sdl),
sdl->sdl_alen, parm))
-    return 0;
+
+      if (family == AF_INET)
+        {
+          struct sockaddr_inarp *sin2 = (struct sockaddr_inarp *)(rtm + 1);
+          sdl = (struct sockaddr_dl *)((char *)sin2 + SA_SIZE(sin2));
+          if (!callback.af_unspec(AF_INET, &sin2->sin_addr,
+                                  LLADDR(sdl), sdl->sdl_alen, parm))
+            return 0;
+        }
+      else
+        {
+          struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(rtm + 1);
+          sdl = (struct sockaddr_dl *)((char *)sin6 + SA_SIZE(sin6));
+          if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+            {
+              /* PF_ROUTE sysctl returns raw kernel structures with
the interface
+                 index embedded in bytes 2-3 of link-local addresses.
Extract it
+                 into sin6_scope_id per the KAME API contract before
clearing. */
+              sin6->sin6_scope_id =
((uint32_t)(sin6->sin6_addr.s6_addr[2]) << 8) |
+                                    sin6->sin6_addr.s6_addr[3];
+              sin6->sin6_addr.s6_addr[2] = 0;
+              sin6->sin6_addr.s6_addr[3] = 0;
+            }
+          if (!callback.af_unspec(AF_INET6, &sin6->sin6_addr,
+                                  LLADDR(sdl), sdl->sdl_alen, parm))
+            return 0;
+        }
      }

    return 1;
  }
+
+static int arp_enumerate(void *parm, callback_t callback)
+{
+  if (!arp_enumerate_family(AF_INET, parm, callback))
+    return 0;
+  return arp_enumerate_family(AF_INET6, parm, callback);
+}
  #endif /* defined(HAVE_BSD_NETWORK) && !defined(__APPLE__) */



On Mon, 4 May 2026 at 18:45, Simon Kelley <[email protected]> wrote:

Good catch.

That code has been there eating memory for 15 years, and was added just
before the dnsmasq git repository started, so how it ended up like that
is a bit of a mystery.

The code looks like it's using a common design pattern in dnsmasq, where
the buffer is stored in a long-lived iovec and expand_buf() only does
anything if the existing buffer is too small. This avoids lots of
malloc()/free() calls in hot code paths and resulting heap
fragmentation. The only problem is that the iovec in this case is not
long-lived. My guess is that this code got copied from elsewhere, and
the importance of that detail was missed.

The fix is to declare struct iovec buff as static. Then the buffer
becomes long-lived and all the rest of the code, without any free()
calls, makes sense.

I just committed

https://thekelleys.org.uk/gitweb/?p=dnsmasq.git;a=commit;h=4bcc9650fac9a48502679cd793d269ef60caef07

which does exactly that. It would be great if you could check it works
OK on your tests.


What would be _really_ nice (hint, hint) would be to extend this code to
return IPv6 neighbours.


Cheers,

Simon.




On 26.04.2026 17:18, Sagie Duchovne-Nave wrote:
arp_enumerate() allocates a heap buffer via expand_buf() to hold the
kernel ARP table dump retrieved through sysctl(NET_RT_FLAGS). This
buffer is never freed on any return path -- neither the early error
returns nor the normal return after iteration -- causing a leak on
every call.

The leak is most acute in the DHCPv6 path. get_client_mac() calls
find_mac() up to five times per packet with lazy=0. Because the
'updated' flag is local to each find_mac() invocation, a cached
ARP_EMPTY entry for an unresolvable IPv6 address does not short-
circuit the kernel lookup: each call falls through to iface_enumerate()
-> arp_enumerate(), leaking one buffer per call. This yields up to
five leaked allocations per DHCPv6 SOLICIT packet. The leak size per
call equals the full system-wide IPv4 ARP table dump across all
interfaces.

The condition is readily triggered by a DHCPv6 client whose MAC
address cannot be resolved via NDP -- which is the common case on
FreeBSD, because arp_enumerate() queries NET_RT_FLAGS/RTF_LLINFO,
which returns IPv4 ARP entries only; IPv6 NDP neighbour entries are
not included. As a result every IPv6 MAC lookup fails unconditionally
on FreeBSD, every failed lookup produces an ARP_EMPTY record that is
never promoted, and every subsequent packet for that client leaks five
buffers.

Fix: free buff.iov_base on all return paths in arp_enumerate(),
including the early returns inside the retry loop where iov_base may
already be non-NULL from a prior expand_buf() call.

Reported against: FreeBSD 14, dnsmasq 2.91
Observed symptom: steady process memory growth correlated with DHCPv6
SOLICIT traffic from a client whose NDP entry is absent from the
kernel table (confirmed by disabling the client stopping the balloon).

Signed-off-by: Sagie D.
---
   bpf.c | 8 +++++++-
   1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/bpf.c b/bpf.c
index XXXXXXX..XXXXXXX 100644
--- a/bpf.c
+++ b/bpf.c
@@ -xx,12 +xx,18 @@ int arp_enumerate(void *parm, callback_t callback)
     while (1)
       {
         if (!expand_buf(&buff, needed))
-        return 0;
+        {
+          free(buff.iov_base);
+          return 0;
+        }
         if ((rc = sysctl(mib, 6, buff.iov_base, &needed, NULL, 0)) == 0 ||
             errno != ENOMEM)
           break;
         needed += needed / 8;
       }
     if (rc == -1)
-    return 0;
+    {
+      free(buff.iov_base);
+      return 0;
+    }

     for (next = buff.iov_base ; next < (char *)buff.iov_base + needed;
next += rtm->rtm_msglen)
       {
         rtm = (struct rt_msghdr *)next;
         sin2 = (struct sockaddr_inarp *)(rtm + 1);
         sdl = (struct sockaddr_dl *)((char *)sin2 + SA_SIZE(sin2));
         if (!callback.af_unspec(AF_INET, &sin2->sin_addr, LLADDR(sdl),
sdl->sdl_alen, parm))
-        return 0;
+        {
+          free(buff.iov_base);
+          return 0;
+        }
       }

-  return 1;
+  free(buff.iov_base);
+  return 1;
   }

_______________________________________________
Dnsmasq-discuss mailing list
[email protected]
https://lists.thekelleys.org.uk/cgi-bin/mailman/listinfo/dnsmasq-discuss





_______________________________________________
Dnsmasq-discuss mailing list
[email protected]
https://lists.thekelleys.org.uk/cgi-bin/mailman/listinfo/dnsmasq-discuss

Reply via email to