[5/7] ntdll: Implement RtlDecompressFragment.

Thu Jul 9 08:35:07 CDT 2015

On 09.07.2015 14:04, Matteo Bruni wrote:
> Hi,
> 
> a bunch of nitpicks which you can take or ignore:
> 

Hi Matteo,

thanks for the review.

> 2015-07-09 3:07 GMT+02:00 Sebastian Lackner <sebastian at fds-team.de>:
>> Based on a patch by Michael Müller.
>>
>> For https://bugs.winehq.org/show_bug.cgi?id=37449
>>
>> ---
>>  dlls/ntdll/ntdll.spec               |    2
>>  dlls/ntdll/rtl.c                    |  214 ++++++++++++++++++++++++++++++++++--
>>  dlls/ntdll/tests/rtl.c              |    3
>>  dlls/ntoskrnl.exe/ntoskrnl.exe.spec |    2
>>  4 files changed, 210 insertions(+), 11 deletions(-)
>>
>> diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec
>> index 75dc647..ca3561d 100644
>> --- a/dlls/ntdll/ntdll.spec
>> +++ b/dlls/ntdll/ntdll.spec
>> @@ -513,7 +513,7 @@
>>  @ stdcall RtlDecodePointer(ptr)
>>  # @ stub RtlDecodeSystemPointer
>>  @ stdcall RtlDecompressBuffer(long ptr long ptr long ptr)
>> -@ stub RtlDecompressFragment
>> +@ stdcall RtlDecompressFragment(long ptr long ptr long long ptr ptr)
>>  @ stub RtlDefaultNpAcl
>>  @ stub RtlDelete
>>  @ stdcall RtlDeleteAce(ptr long)
>> diff --git a/dlls/ntdll/rtl.c b/dlls/ntdll/rtl.c
>> index d9e448a..9a196d2 100644
>> --- a/dlls/ntdll/rtl.c
>> +++ b/dlls/ntdll/rtl.c
>> @@ -1309,17 +1309,219 @@ NTSTATUS WINAPI RtlCompressBuffer(USHORT format, PUCHAR uncompressed, ULONG unco
>>      }
>>  }
>>
>> +/* decompress a single LZNT1 chunk */
>> +static PUCHAR lznt1_decompress_chunk(UCHAR *dst, ULONG dst_size, UCHAR *src, ULONG src_size)
> 
> PUCHAR -> UCHAR *

Good catch. I'll wait for the result of the commit round, when this patch wasn't accepted
yet I'll resend a version with this style issue fixed.

> 
>> +{
>> +    UCHAR *src_cur = src, *src_end = src + src_size;
>> +    UCHAR *dst_cur = dst, *dst_end = dst + dst_size;
>> +    ULONG displacement_bits, length_bits;
>> +    ULONG code_displacement, code_length;
>> +    WORD flags, code;
>> +
>> +    while (src_cur < src_end && dst_cur < dst_end)
>> +    {
>> +        flags = 0x8000 | *src_cur++;
>> +        while ((flags & 0xFF00) && src_cur < src_end)
> 
> It looks like we're mostly using lowercase in hex literals.

Same here (although I wasn't really aware that we have a convention for that ^^).

> 
>> +        {
>> +            if (flags & 1)
>> +            {
>> +                /* backwards reference */
>> +                if (src_cur + sizeof(WORD) > src_end)
>> +                    return NULL;
>> +
>> +                code = *(WORD *)src_cur;
>> +                src_cur += sizeof(WORD);
>> +
>> +                /* find length / displacement bits */
>> +                for (displacement_bits = 12; displacement_bits > 4; displacement_bits--)
>> +                    if ((1 << (displacement_bits - 1)) < dst_cur - dst) break;
>> +
>> +                length_bits       = 16 - displacement_bits;
>> +                code_length       = (code & ((1 << length_bits) - 1)) + 3;
>> +                code_displacement = (code >> length_bits) + 1;
>> +
>> +                if (dst_cur < dst + code_displacement)
>> +                    return NULL;
>> +
>> +                /* copy bytes of chunk - we can't use memcpy()
>> +                 * since source and dest can be overlapping */
> 
> I would also mention that you can't use memmove() either because you
> actually want to copy any overlapping byte over and over again, as
> necessary.

Yep. I agree, the comment could probably be a bit more detailed here because
its a very tricky.

> 
>> +                while (code_length--)
>> +                {
>> +                    if (dst_cur >= dst_end) return dst_cur;
>> +                    *dst_cur = *(dst_cur - code_displacement);
>> +                    dst_cur++;
>> +                }
>> +            }
>> +            else
>> +            {
>> +                /* uncompressed data */
>> +                if (dst_cur >= dst_end) return dst_cur;
>> +                *dst_cur++ = *src_cur++;
>> +            }
>> +            flags >>= 1;
>> +        }
>> +    }
>> +
>> +    return dst_cur;
>> +}
>> +
>> +/* decompress data encoded with LZNT1 */
>> +static NTSTATUS lznt1_decompress(UCHAR *dst, ULONG dst_size, UCHAR *src, ULONG src_size,
>> +                                 ULONG offset, ULONG *final_size, UCHAR *workspace)
>> +{
>> +    UCHAR *src_cur = src, *src_end = src + src_size;
>> +    UCHAR *dst_cur = dst, *dst_end = dst + dst_size;
>> +    ULONG chunk_size, block_size;
>> +    WORD chunk_header;
>> +    UCHAR *ptr;
>> +
>> +    if (src_cur + sizeof(WCHAR) > src_end)
>> +        return STATUS_BAD_COMPRESSION_BUFFER;
>> +
>> +    /* skip over chunks which have a distance >= 0x1000 to the destination offset */
>> +    while (offset >= 0x1000 && src_cur + sizeof(WCHAR) <= src_end)
>> +    {
>> +        chunk_header = *(WORD *)src_cur;
>> +        src_cur += sizeof(WCHAR);
> 
> Why this mixing of WORD and WCHAR? They do match in practice but still...

Oops. That is still remaining from a previous iteration of the patchset, but it shouldn't
matter in practice. In this case it probably also makes sense to fix it, even if the patch
was already committed, otherwise its confusing for future devs.

> 
>> +        if (!chunk_header) goto out;
>> +        chunk_size = (chunk_header & 0xFFF) + 1;
>> +
>> +        if (src_cur + chunk_size > src_end)
>> +            return STATUS_BAD_COMPRESSION_BUFFER;
>> +
>> +        src_cur += chunk_size;
>> +        offset  -= 0x1000;
>> +    }
>> +
>> +    /* check if a chunk is included partially */
>> +    if (offset && src_cur + sizeof(WCHAR) <= src_end)
>> +    {
>> +        chunk_header = *(WORD *)src_cur;
>> +        src_cur += sizeof(WCHAR);
>> +        if (!chunk_header) goto out;
>> +        chunk_size = (chunk_header & 0xFFF) + 1;
>> +
>> +        if (src_cur + chunk_size > src_end)
>> +            return STATUS_BAD_COMPRESSION_BUFFER;
>> +
>> +        if (dst_cur >= dst_end)
>> +            goto out;
>> +
>> +        if (chunk_header & 0x8000)
>> +        {
>> +            /* compressed chunk */
>> +            if (!workspace) return STATUS_ACCESS_VIOLATION;
>> +            ptr = lznt1_decompress_chunk(workspace, 0x1000, src_cur, chunk_size);
> 
> I guess in theory you could avoid decompressing the whole chunk if you
> need less than the whole chunk to be decompressed. It's not strictly
> required though so if doing that makes the code too convoluted then
> ignore it.

It might be possible, but it shouldn't matter too much in practice. This part is only
executed when offset != 0 for the first (partial) chunk, so always less than a page.

> 
>> +            if (!ptr) return STATUS_BAD_COMPRESSION_BUFFER;
>> +            if (ptr - workspace > offset)
>> +            {
>> +                block_size = min((ptr - workspace) - offset, dst_end - dst_cur);
>> +                memcpy(dst_cur, workspace + offset, block_size);
>> +                dst_cur += block_size;
>> +            }
>> +        }
>> +        else
>> +        {
>> +            /* uncompressed chunk */
>> +            if (chunk_size > offset)
>> +            {
>> +                block_size = min(chunk_size - offset, dst_end - dst_cur);
>> +                memcpy(dst_cur, src_cur + offset, block_size);
>> +                dst_cur += block_size;
>> +            }
>> +        }
>> +
>> +        src_cur += chunk_size;
>> +    }
>> +
>> +    /* remaining blocks */
>> +    while (src_cur + sizeof(WCHAR) <= src_end)
>> +    {
>> +        chunk_header = *(WORD *)src_cur;
>> +        src_cur += sizeof(WCHAR);
>> +        if (!chunk_header) goto out;
>> +        chunk_size = (chunk_header & 0xFFF) + 1;
>> +
>> +        if (src_cur + chunk_size > src_end)
>> +            return STATUS_BAD_COMPRESSION_BUFFER;
>> +
>> +        /* fill space with padding */
>> +        block_size = ((dst_cur - dst) + offset) & 0xFFF;
>> +        if (block_size)
>> +        {
>> +            block_size = 0x1000 - block_size;
>> +            if (dst_cur + block_size >= dst_end)
>> +                goto out;
>> +            memset(dst_cur, 0, block_size);
>> +            dst_cur += block_size;
>> +        }
> 
> I'm probably dense but I don't follow this padding part. Don't you
> still need to memset to 0 the "remaining part" even if dst_end <=
> dst_cur + block_size (obviously stopping at dst_end) and then proceed
> to decompress the chunk? Unless you're not supposed to decompress the
> last chunk in that case?

Padding is only required when the previous chunk was decompressed to less than 4096
bytes. It is not required for the last chunk, and not when the next chunk would not
fit into the buffer anymore - thats why this looks a bit complicated. According to
the tests this seems to match the Windows behaviour.

> 
>> +
>> +        if (dst_cur >= dst_end)
>> +            goto out;
>> +
>> +        if (chunk_header & 0x8000)
>> +        {
>> +            /* compressed chunk */
>> +            dst_cur = lznt1_decompress_chunk(dst_cur, dst_end - dst_cur, src_cur, chunk_size);
>> +            if (!dst_cur) return STATUS_BAD_COMPRESSION_BUFFER;
>> +        }
>> +        else
>> +        {
>> +            /* uncompressed chunk */
>> +            block_size = min(chunk_size, dst_end - dst_cur);
>> +            memcpy(dst_cur, src_cur, block_size);
>> +            dst_cur += block_size;
>> +        }
>> +
>> +        src_cur += chunk_size;
>> +    }
>> +
>> +out:
>> +    if (final_size)
>> +        *final_size = dst_cur - dst;
>> +
>> +    return STATUS_SUCCESS;
>> +
>> +}
>> +
>> +/******************************************************************************
>> + *  RtlDecompressFragment      [NTDLL.@]
>> + */
>> +NTSTATUS RtlDecompressFragment(USHORT format, PUCHAR uncompressed, ULONG uncompressed_size,
>> +                               PUCHAR compressed, ULONG compressed_size, ULONG offset,
>> +                               PULONG final_size, PVOID workspace)
>> +{
>> +    TRACE("0x%04x, %p, %u, %p, %u, %u, %p, %p\n", format, uncompressed,
>> +          uncompressed_size, compressed, compressed_size, offset, final_size, workspace);
>> +
>> +    switch (format & ~COMPRESSION_ENGINE_MAXIMUM)
>> +    {
>> +        case COMPRESSION_FORMAT_LZNT1:
>> +            return lznt1_decompress(uncompressed, uncompressed_size, compressed,
>> +                                    compressed_size, offset, final_size, workspace);
>> +
>> +        case COMPRESSION_FORMAT_NONE:
>> +        case COMPRESSION_FORMAT_DEFAULT:
>> +            return STATUS_INVALID_PARAMETER;
>> +
>> +        default:
>> +            FIXME("format %u not implemented\n", format);
>> +            return STATUS_UNSUPPORTED_COMPRESSION;
>> +    }
>> +}
>> +
>> +
>>  /******************************************************************************
>>   *  RtlDecompressBuffer                [NTDLL.@]
>>   */
>> -NTSTATUS WINAPI RtlDecompressBuffer(USHORT CompressionFormat, PUCHAR UncompressedBuffer,
>> -                                    ULONG UncompressedBufferSize, PUCHAR CompressedBuffer,
>> -                                    ULONG CompressedBufferSize, PULONG FinalUncompressedSize)
>> +NTSTATUS WINAPI RtlDecompressBuffer(USHORT format, PUCHAR uncompressed, ULONG uncompressed_size,
>> +                                    PUCHAR compressed, ULONG compressed_size, PULONG final_size)
>>  {
>> -    FIXME("0x%04x, %p, %u, %p, %u, %p :stub\n", CompressionFormat, UncompressedBuffer, UncompressedBufferSize,
>> -         CompressedBuffer, CompressedBufferSize, FinalUncompressedSize);
>> +    TRACE("0x%04x, %p, %u, %p, %u, %p\n", format, uncompressed,
>> +        uncompressed_size, compressed, compressed_size, final_size);
>>
>> -    return STATUS_NOT_IMPLEMENTED;
>> +    return RtlDecompressFragment(format, uncompressed, uncompressed_size,
>> +                                 compressed, compressed_size, 0, final_size, NULL);
>>  }
>>
>>  /***********************************************************************
>> diff --git a/dlls/ntdll/tests/rtl.c b/dlls/ntdll/tests/rtl.c
>> index c6a7023..99fd413 100644
>> --- a/dlls/ntdll/tests/rtl.c
>> +++ b/dlls/ntdll/tests/rtl.c
>> @@ -1663,11 +1663,8 @@ static void test_RtlCompressBuffer(void)
>>      memset(buf2, 0x11, sizeof(buf2));
>>      status = pRtlDecompressBuffer(COMPRESSION_FORMAT_LZNT1, buf2, sizeof(buf2),
>>                                    buf1, buf_size, &final_size);
>> -    todo_wine
>>      ok(status == STATUS_SUCCESS, "got wrong status 0x%08x\n", status);
>> -    todo_wine
>>      ok(final_size == sizeof(test_buffer), "got wrong final_size %u\n", final_size);
>> -    todo_wine
>>      ok(!memcmp(buf2, test_buffer, sizeof(test_buffer)), "got wrong decoded data\n");
>>      ok(buf2[sizeof(test_buffer)] == 0x11, "too many bytes written\n");
>>
>> diff --git a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec
>> index 11d1c62..0bb7d04 100644
>> --- a/dlls/ntoskrnl.exe/ntoskrnl.exe.spec
>> +++ b/dlls/ntoskrnl.exe/ntoskrnl.exe.spec
>> @@ -977,7 +977,7 @@
>>  @ stub RtlCustomCPToUnicodeN
>>  @ stdcall RtlDecompressBuffer(long ptr long ptr long ptr) ntdll.RtlDecompressBuffer
>>  @ stub RtlDecompressChunks
>> -@ stub RtlDecompressFragment
>> +@ stdcall RtlDecompressFragment(long ptr long ptr long long ptr ptr) ntdll.RtlDecompressFragment
>>  @ stub RtlDelete
>>  @ stdcall RtlDeleteAce(ptr long) ntdll.RtlDeleteAce
>>  @ stdcall RtlDeleteAtomFromAtomTable(ptr long) ntdll.RtlDeleteAtomFromAtomTable
>> --
>> 2.4.5
>>
>>
> 
>