[PATCH] iphlpapi: fix the usage of Linux's unprivileged ICMP sockets

Damjan Jovanovic damjan.jov at gmail.com
Sun Jun 20 04:47:18 CDT 2021


Linux's unprivileged ICMP sockets are completely broken in Wine, and
radically differ from MacOS's. Linux doesn't provide IP headers but
reports some of the IP header data in the recvmsg() ancillary data,
overwrites icmp_id with its own value (a "port" number you can bind()
to but can't getsockname()), and on error MSG_ERRQUEUE reads the
offending packet's ICMP instead of the reply's while ancillary data
provides some of the reply's ICMP fields.

This patch fixes these issues and gets Linux's parody implementation
to a largely functional level.

Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=8332
Signed-off-by: Damjan Jovanovic <damjan.jov at gmail.com>
---
 dlls/iphlpapi/icmp.c | 173 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 168 insertions(+), 5 deletions(-)
-------------- next part --------------
diff --git a/dlls/iphlpapi/icmp.c b/dlls/iphlpapi/icmp.c
index 8ef03219e52..6305b3ca9ad 100644
--- a/dlls/iphlpapi/icmp.c
+++ b/dlls/iphlpapi/icmp.c
@@ -62,6 +62,9 @@
 #ifdef HAVE_SYS_POLL_H
 # include <sys/poll.h>
 #endif
+#if defined(__linux__)
+# include <linux/errqueue.h>
+#endif
 
 #define USE_WS_PREFIX
 
@@ -107,6 +110,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag);
 typedef struct {
     int sid;
     IP_OPTION_INFORMATION default_opts;
+    BOOL unprivileged;
 } icmp_t;
 
 #define IP_OPTS_UNKNOWN     0
@@ -145,7 +149,7 @@ static int in_cksum(u_short *addr, int len)
 }
 
 /* Receive a reply (IPv4); this function uses, takes ownership of and will always free `buffer` */
-static DWORD icmp_get_reply(int sid, unsigned char *buffer, DWORD send_time, void *reply_buf, DWORD reply_size, DWORD timeout)
+static DWORD icmp_get_reply(int sid, unsigned char *buffer, DWORD send_time, void *reply_buf, DWORD reply_size, DWORD timeout, BOOL unprivileged)
 {
     int repsize = MAXIPLEN + MAXICMPLEN + min(65535, reply_size);
     struct icmp *icmp_header = (struct icmp*)buffer;
@@ -155,9 +159,11 @@ static DWORD icmp_get_reply(int sid, unsigned char *buffer, DWORD send_time, voi
     unsigned short id, seq, cksum;
     struct sockaddr_in addr;
     int ip_header_len = 0;
-    socklen_t addrlen;
     struct pollfd fdr;
     DWORD recv_time;
+    struct msghdr msg;
+    struct iovec iov;
+    char cbuf[512];
     int res;
 
     id = icmp_header->icmp_id;
@@ -165,13 +171,128 @@ static DWORD icmp_get_reply(int sid, unsigned char *buffer, DWORD send_time, voi
     cksum = icmp_header->icmp_cksum;
     fdr.fd = sid;
     fdr.events = POLLIN;
-    addrlen = sizeof(addr);
 
     while (poll(&fdr,1,timeout)>0) {
         recv_time = GetTickCount();
-        res=recvfrom(sid, buffer, repsize, 0, (struct sockaddr*)&addr, &addrlen);
+
+        iov.iov_base = buffer;
+        iov.iov_len = repsize;
+        msg.msg_name = &addr;
+        msg.msg_namelen = sizeof(addr);
+        msg.msg_iov = &iov;
+        msg.msg_iovlen = 1;
+        msg.msg_flags = 0;
+        msg.msg_control = cbuf;
+        msg.msg_controllen = sizeof(cbuf);
+        res=recvmsg(sid, &msg, MSG_DONTWAIT);
         TRACE("received %d bytes from %s\n",res, inet_ntoa(addr.sin_addr));
         ier->Status=IP_REQ_TIMED_OUT;
+#if defined(__linux__)
+        if (unprivileged) {
+            if (res < 0) {
+                res = recvmsg(sid, &msg, MSG_DONTWAIT | MSG_ERRQUEUE);
+            }
+            /* What recvmsg() gave us for:
+             * - Linux's unprivileged ICMP sockets:
+             *    - On success:                 | reply's ICMP            | reply's payload               |
+             *    - MSG_ERRQUEUE:               | offending packet's ICMP | offending packet's payload    |
+             *                                   with some of the ICMP reply's data in the ancillary data
+             * - Everything else:
+             *    - On success:    | reply's IP | reply's ICMP            | reply's payload               |
+             *    - On error:      | reply's IP | reply's ICMP            | offending IP | offending ICMP |
+             *                                                            ----often found in payload-------
+             *
+             * So for Linux's parody implementation, we generate some semblance of the reply's IP header,
+             * and for errors, ICMP header, from the recvmsg() ancillary data, but ignore the offending
+             * packet's data for ICMP errors, as it's hard to reconstruct, and Windows doesn't seem to
+             * provide any ICMP error data anyway:
+             *     On FreeBSD router: route add -host 1.2.3.4 127.0.0.1 -reject
+             *     Ping 1.2.3.4 from Windows 7, the reply is ICMP host unreachable, IcmpSendEcho() returns 1
+             *     with:
+             *         ICMP_ECHO_REPLY: {
+             *             Status: 11003 (IP_DEST_HOST_UNREACHABLE)
+             *             DataSize: 0   <----- NO ICMP error data!!!
+             *             Options: {
+             *                 Ttl: 64
+             *                 Tos: 0
+             *                 Flags: 0
+             *             }
+             *         }
+             */
+            if (res >= 0) {
+                struct cmsghdr *cmsg;
+                int ttl = 0;
+                u_char tos = 0;
+                struct in_addr ip_dst = { 0 };
+                u_char *options = NULL;
+                int options_len = 0;
+                int err_type = 0;
+                int err_code = 0;
+                int err_info = 0;
+                int icmp_err_size = 0;
+                for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+                    if (cmsg->cmsg_level == SOL_IP) {
+                        if (cmsg->cmsg_type == IP_OPTIONS) {
+                            struct ip_opts *opts;
+                            u_char *option;
+                            opts = (struct ip_opts*) CMSG_DATA(cmsg);
+                            option = (u_char*) opts->ip_opts;
+                            while (IPOPT_NUMBER(*option) != IPOPT_EOL) {
+                                option += 1 + option[1];
+                            }
+                            options = (u_char*) opts->ip_opts;
+                            options_len = option - (u_char*)CMSG_DATA(cmsg);
+                        } else if (cmsg->cmsg_type == IP_PKTINFO) {
+                            struct in_pktinfo pktinfo;
+                            memcpy(&pktinfo, CMSG_DATA(cmsg), sizeof(pktinfo));
+                            ip_dst = pktinfo.ipi_addr;
+                        } else if (cmsg->cmsg_type == IP_TTL) {
+                            memcpy(&ttl, CMSG_DATA(cmsg), sizeof(int));
+                        } else if (cmsg->cmsg_type == IP_TOS) {
+                            tos = *(u_char*)CMSG_DATA(cmsg);
+                        } else if (cmsg->cmsg_type == IP_RECVERR) {
+                            struct sock_extended_err *err = (struct sock_extended_err*) CMSG_DATA(cmsg);
+                            res = 0; /* on error, trim all reply data, like Windows seems to */
+                            if (err->ee_origin == SO_EE_ORIGIN_ICMP) {
+                                err_type = err->ee_type;
+                                err_code = err->ee_code;
+                                err_info = err->ee_info;
+                                icmp_err_size = ICMP_MINLEN;
+                            } else {
+                                FIXME("unsupported ee_origin %d\n", err->ee_origin);
+                                break;
+                            }
+                        }
+                    }
+                }
+                memmove(&buffer[sizeof(struct ip) + options_len + icmp_err_size], buffer, res);
+                ip_header->ip_v = 4;
+                ip_header->ip_hl = (sizeof(struct ip) + options_len) >> 2;
+                ip_header->ip_tos = tos;
+                ip_header->ip_len = sizeof(struct ip) + options_len + icmp_err_size + res;
+                ip_header->ip_id = 0;
+                ip_header->ip_off = 0;
+                ip_header->ip_ttl = ttl;
+                ip_header->ip_p = IPPROTO_ICMP;
+                ip_header->ip_src = addr.sin_addr;
+                ip_header->ip_dst = ip_dst;
+                if (options)
+                    memcpy(&buffer[sizeof(struct ip)], options, options_len);
+                icmp_header = (struct icmp*)(((char*)ip_header) + (ip_header->ip_hl << 2));
+                if (icmp_err_size) {
+                    icmp_header->icmp_type = err_type;
+                    icmp_header->icmp_code = err_code;
+                    icmp_header->icmp_hun.ih_void = err_info; /* FIXME: check */
+                } else {
+                    /* Linux kernel overwrites the icmp_id with its own one, but only gives us replies that match it:
+                     * https://lwn.net/Articles/443051/
+                     * Rewrite it back so it matches what we expect later: */
+                    icmp_header->icmp_id = id;
+                }
+                res += sizeof(struct ip) + options_len + icmp_err_size;
+            }
+        }
+#endif
 
         /* Check whether we should ignore this packet */
         if ((ip_header->ip_p==IPPROTO_ICMP) && (res>=sizeof(struct ip)+ICMP_MINLEN)) {
@@ -229,7 +350,11 @@ static DWORD icmp_get_reply(int sid, unsigned char *buffer, DWORD send_time, voi
                     ier->Status=IP_SOURCE_QUENCH;
                     break;
                 }
+#if defined(__linux__)
+                if (!unprivileged && ier->Status!=IP_REQ_TIMED_OUT) {
+#else
                 if (ier->Status!=IP_REQ_TIMED_OUT) {
+#endif
                     struct ip* rep_ip_header;
                     struct icmp* rep_icmp_header;
                     /* The ICMP header size of all the packets we accept is the same */
@@ -359,6 +484,7 @@ done:
 HANDLE WINAPI Icmp6CreateFile(VOID)
 {
     icmp_t* icp;
+    BOOL unprivileged = FALSE;
 
     int sid=socket(AF_INET6,SOCK_RAW,IPPROTO_ICMPV6);
     if (sid < 0)
@@ -366,6 +492,23 @@ HANDLE WINAPI Icmp6CreateFile(VOID)
         /* Some systems (e.g. Linux 3.0+ and Mac OS X) support
            non-privileged ICMP via SOCK_DGRAM type. */
         sid=socket(AF_INET6,SOCK_DGRAM,IPPROTO_ICMPV6);
+        if (sid >= 0)
+        {
+#if defined(__linux__)
+            int on = 1;
+            if (setsockopt(sid, SOL_IP, IP_PKTINFO, &on, sizeof(on)))
+                ERR("setsockopt IP_PKTINFO failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVERR, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVERR failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVOPTS, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVOPTS failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVTOS, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVTOS failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVTTL, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVTTL failed, errno %d\n", errno);
+#endif
+            unprivileged = TRUE;
+        }
     }
     if (sid < 0) {
         ERR_(winediag)("Failed to use ICMPV6 (network ping), this requires special permissions.\n");
@@ -381,6 +524,7 @@ HANDLE WINAPI Icmp6CreateFile(VOID)
     }
     icp->sid=sid;
     icp->default_opts.OptionsSize=IP_OPTS_UNKNOWN;
+    icp->unprivileged = unprivileged;
     return (HANDLE)icp;
 }
 
@@ -417,6 +561,7 @@ DWORD WINAPI Icmp6SendEcho2(
 HANDLE WINAPI IcmpCreateFile(VOID)
 {
     icmp_t* icp;
+    BOOL unprivileged = FALSE;
 
     int sid=socket(AF_INET,SOCK_RAW,IPPROTO_ICMP);
     if (sid < 0)
@@ -424,6 +569,23 @@ HANDLE WINAPI IcmpCreateFile(VOID)
         /* Some systems (e.g. Linux 3.0+ and Mac OS X) support
            non-privileged ICMP via SOCK_DGRAM type. */
         sid=socket(AF_INET,SOCK_DGRAM,IPPROTO_ICMP);
+        if (sid >= 0)
+        {
+#if defined(__linux__)
+            int on = 1;
+            if (setsockopt(sid, SOL_IP, IP_PKTINFO, &on, sizeof(on)))
+                ERR("setsockopt IP_PKTINFO failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVERR, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVERR failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVOPTS, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVOPTS failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVTOS, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVTOS failed, errno %d\n", errno);
+            if (setsockopt(sid, SOL_IP, IP_RECVTTL, &on, sizeof(on)))
+                ERR("setsockopt IP_RECVTTL failed, errno %d\n", errno);
+#endif
+            unprivileged = TRUE;
+        }
     }
     if (sid < 0) {
         ERR_(winediag)("Failed to use ICMP (network ping), this requires special permissions.\n");
@@ -439,6 +601,7 @@ HANDLE WINAPI IcmpCreateFile(VOID)
     }
     icp->sid=sid;
     icp->default_opts.OptionsSize=IP_OPTS_UNKNOWN;
+    icp->unprivileged = unprivileged;
     return (HANDLE)icp;
 }
 
@@ -664,7 +827,7 @@ DWORD WINAPI IcmpSendEcho2Ex(
         return 0;
     }
 
-    return icmp_get_reply(icp->sid, buffer, send_time, ReplyBuffer, ReplySize, Timeout);
+    return icmp_get_reply(icp->sid, buffer, send_time, ReplyBuffer, ReplySize, Timeout, icp->unprivileged);
 }
 
 /*


More information about the wine-devel mailing list