[PATCH v3 5/9] winedbg: Escape special characters in GDB packet reply.

Rémi Bernon rbernon at codeweavers.com
Tue Nov 16 12:30:45 CST 2021


On 11/16/21 17:51, Jinoh Kang wrote:
> There are four special characters in GDB's remote serial protocol:
> 
> - '$' (0x24): start of packet
> - '}' (0x7D): escape
> - '*' (0x2A): run-length encoding repeat count delimiter
> - '#' (0x23): end of packet; start of checksum
> 
> In particular, the '#' and '}' characters are problematic since they
> are often used in library filenames.  A few examples:
> 
> - %SystemRoot%\assembly\NativeImages_v[.NET ver]\[module+hash]#\*\*.dll
> - {CLSID or UUID}\*\.dll
> 
> To make GDB happy with those filenames, we scan for those characters and
> escape them properly.
> 
> While we are at it, also remove the assert in the packet_reply function
> that checks for '$' and '#' in the packet payload.
> 
> Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
> ---
>   programs/winedbg/gdbproxy.c | 68 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 60 insertions(+), 8 deletions(-)
> 
> diff --git a/programs/winedbg/gdbproxy.c b/programs/winedbg/gdbproxy.c
> index e6b1d998f7d..bdb73659ade 100644
> --- a/programs/winedbg/gdbproxy.c
> +++ b/programs/winedbg/gdbproxy.c
> @@ -752,18 +752,71 @@ static void packet_reply_val(struct gdb_context* gdbctx, ULONG_PTR val, int len)
>       }
>   }
>   
> +static const unsigned char gdb_special_chars_lookup_table[4] = {
> +    /* The characters should be sorted by its value modulo table length. */
> +
> +    0x24, /* $: 001001|00 */
> +    0x7D, /* }: 011111|01 */
> +    0x2A, /* *: 001010|10 */
> +    0x23  /* #: 001000|11 */
> +};
> +
> +static inline BOOL is_gdb_special_char(unsigned char val)
> +{
> +    /* A note on the GDB special character scanning code:
> +     *
> +     * We cannot use strcspn() since we plan to transmit binary data in
> +     * packet reply, which can contain NULL (0x00) bytes.  We also don't want
> +     * to slow down memory dump transfers.  Therefore, we use a tiny lookup
> +     * table that contains all the four special characters to speed up scanning.
> +     *
> +     * Note: strcspn() uses lookup tables as well, but as of Wine 6.19
> +     * msvcrt!strcspn allocates 1024 bytes (sizeof(BOOL)*256) of table
> +     * on the stack and populates it on the fly.  It would be slower and less
> +     * cache-friendly than a preallocated, tiny static lookup table.
> +     */
> +
> +    const size_t length = ARRAY_SIZE(gdb_special_chars_lookup_table);
> +    return gdb_special_chars_lookup_table[val % length] == val;
> +}
> +
> +static void packet_reply_add_raw(struct gdb_context* gdbctx, const void* data, size_t len)
> +{
> +    const unsigned char *ptr = data, *sptr, *end = ptr + len;
> +    size_t seg_len;
> +    int is_end;
> +

I'm not sure what "seg_" is supposed to mean? How about just "len", and 
"tmp" instead of sptr?

> +    while (ptr != end)
> +    {
> +        for (sptr = ptr; sptr != end && !is_gdb_special_char(*sptr); sptr++)
> +            ;
> +
> +        seg_len = sptr - ptr;
> +        is_end = sptr == end;
> +
> +        packet_reply_grow(gdbctx, is_end ? seg_len : seg_len + 2);
> +        memcpy(&gdbctx->out_buf[gdbctx->out_len], ptr, seg_len);
> +        gdbctx->out_len += seg_len;
> +        ptr += seg_len;
> +
> +        if (is_end)
> +            break;
> +
> +        gdbctx->out_buf[gdbctx->out_len++] = 0x7D;
> +        gdbctx->out_buf[gdbctx->out_len++] = 0x20 ^ *ptr++;
> +    }
> +}
> +

What about something like:

     const unsigned char *begin = data, *end = begin + len, *ptr = begin;

     while (ptr != end)
     {
         while (ptr != end && !is_gdb_special_char(*ptr)) ptr++;

         packet_reply_grow(gdbctx, ptr - begin + 2);
         memcpy(&gdbctx->out_buf[gdbctx->out_len], begin, ptr - begin);
         gdbctx->out_len += ptr - begin;
         if (ptr == end) break;

         gdbctx->out_buf[gdbctx->out_len++] = 0x7D;
         gdbctx->out_buf[gdbctx->out_len++] = 0x20 ^ *ptr++;
         begin = ptr;
     }

Could do with a len variable too if you feel like it's better.

>   static inline void packet_reply_add(struct gdb_context* gdbctx, const char* str)
>   {
> -    int len = strlen(str);
> -    packet_reply_grow(gdbctx, len);
> -    memcpy(&gdbctx->out_buf[gdbctx->out_len], str, len);
> -    gdbctx->out_len += len;
> +    packet_reply_add_raw(gdbctx, str, strlen(str));
>   }
>   
>   static void packet_reply_open(struct gdb_context* gdbctx)
>   {
>       assert(gdbctx->out_curr_packet == -1);
> -    packet_reply_add(gdbctx, "$");
> +    packet_reply_grow(gdbctx, 1);
> +    gdbctx->out_buf[gdbctx->out_len++] = '$';
>       gdbctx->out_curr_packet = gdbctx->out_len;
>   }
>   
> @@ -773,7 +826,8 @@ static void packet_reply_close(struct gdb_context* gdbctx)
>       int plen;
>   
>       plen = gdbctx->out_len - gdbctx->out_curr_packet;
> -    packet_reply_add(gdbctx, "#");
> +    packet_reply_grow(gdbctx, 1);
> +    gdbctx->out_buf[gdbctx->out_len++] = '#';
>       cksum = checksum(&gdbctx->out_buf[gdbctx->out_curr_packet], plen);
>       packet_reply_hex_to(gdbctx, &cksum, 1);
>       gdbctx->out_curr_packet = -1;
> @@ -812,8 +866,6 @@ static enum packet_return packet_reply(struct gdb_context* gdbctx, const char* p
>   {
>       packet_reply_open(gdbctx);
>   
> -    assert(strchr(packet, '$') == NULL && strchr(packet, '#') == NULL);
> -
>       packet_reply_add(gdbctx, packet);
>   
>       packet_reply_close(gdbctx);
> 

After this change packet_reply_add helper seems a little useless, I 
suggest maybe:

* Keep packet_reply_add like it is, and use it everywhere some static 
strings are added, including in open/close here, where we know we don't 
need escaping.

* Introduce the new packet_reply_add_raw, and use it when appending 
binary or dynamic data.
-- 
Rémi Bernon <rbernon at codeweavers.com>



More information about the wine-devel mailing list