oleaut32: Add ARM support to DispCallFunc().

Mon Oct 23 16:07:43 CDT 2017

Hi Donna and Welcome to the Wine Project!

Whats your motivation for this patch?
You did a very good job for a firsttimer!

Some more notes below:

Am 22.10.2017 um 03:04 schrieb Donna Whisnant:
> Adds ARM ABI support to DispCallFunc() to allow IDispatch invoke calls
> to succeed on ARM platforms.  This change specifically targets only
> 32-bit little-endian (ARMEL) platform CPUs.  It's believed to likely be
> compatible with 64-bit and/or big-endian (ARMEB) platforms, but testing
> for those other platforms should be completed before enabling.
> 
> Tested on Raspbian Stretch 2017-09-07 RPi image using a Qemu systemd
> container to compile and run it on an x86-64 Host..  When running on
> Raspberry Pi hardware, a 3G/1G split kernel needs to be used for Wine
> to function.
> 
> Signed-off-by: Donna Whisnant <dewhisna at dewtronics.com>
> ---
>  dlls/oleaut32/typelib.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 219 insertions(+), 2 deletions(-)
> 
> diff --git a/dlls/oleaut32/typelib.c b/dlls/oleaut32/typelib.c
> index ebf6d85..d7ef212 100644
> --- a/dlls/oleaut32/typelib.c
> +++ b/dlls/oleaut32/typelib.c
> @@ -6302,8 +6302,48 @@ static HRESULT WINAPI ITypeInfo_fnGetIDsOfNames( ITypeInfo2 *iface,
>      return DISP_E_UNKNOWNNAME;
>  }
>  
> +#if defined(__arm__) && defined(__ARM_32BIT_STATE) && !defined(__ARMEB__)
> +/* Note: this may work OK on 64-bit and/or ARMEB, but has currently only been tested for 32-bit ARMEL */

While i doubt there is something risky here regarding endianess you can keep the ARMEB check, but get rid of the 32bit check, it might be about Thunk(16bit) vs. ARM(32bit) but I'm not sure.
Anyway, this won't work on 64bit, because the instruction set is totally different, but it inspires me to do the 64bit version as soon this lands upstream ;)

>  
> -#ifdef __i386__
> +extern LONGLONG call_method( void *func, int nb_stk_args, const DWORD *stk_args, const DWORD *reg_args );
> +__ASM_GLOBAL_FUNC( call_method,
> +                    /* r0 = *func
> +                     * r1 = nb_stk_args
> +                     * r2 = *stk_args (pointer to 'nb_stk_args' DWORD values to push on stack)
> +                     * r3 = *reg_args (pointer to 8, 64-bit d0-d7 (double) values OR as 16, 32-bit s0-s15 (float) values, followed by 4, 32-bit (DWORD) r0-r3 values)
> +                     */
> +
> +                    "push {fp, lr}\n\t"              /* Save frame pointer and return address (stack still aligned to 8 bytes) */
> +                    __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
> +                    __ASM_CFI(".cfi_rel_offset %fp,0\n\t")
> +                    "add fp, sp, #4\n\t"            /* Setup frame pointer */

Is this adjusted FP used somewhere? (I forgot a lot of things regarding arm32...)

> +                    __ASM_CFI(".cfi_def_cfa_register %fp\n\t")
> +
> +                    "lsls r1, r1, #2\n\t"           /* r1 = nb_stk_args * sizeof(DWORD) */
> +                    "beq 1f\n\t"                    /* Skip allocation if no stack args */
> +                    "add r2, r2, r1\n"              /* Calculate ending address of incoming stack data */
> +                    "2:\tldr ip, [r2, #-4]!\n\t"    /* Get next value */
> +                    "str ip, [sp, #-4]!\n\t"        /* Push it on the stack */
> +                    "subs r1, r1, #4\n\t"           /* Decrement count */
> +                    "bgt 2b\n\t"                    /* Loop till done */
> +
> +                    "1:\tvldm r3!, {s0-s15}\n\t"    /* Load the s0-s15/d0-d7 arguments */
> +                    "mov ip, r0\n\t"                /* Save the function call address to ip before we nuke r0 with arguments to pass */
> +                    "ldm r3, {r0-r3}\n\t"           /* Load the r0-r3 arguments */
> +
> +                    "blx ip\n\t"                    /* Call the target function */
> +
> +                    "sub sp, fp, #4\n\t"            /* Clean the stack using 'fp' to allow for either stdcall or cdecl calls */
> +                    __ASM_CFI(".cfi_def_cfa %sp,4\n\t")
> +                    "pop {fp, pc}\n\t"              /* Restore FP and return */
> +                    __ASM_CFI(".cfi_same_value %fp\n\t")    /* Here for completeness and symmetry with other cpus, but won't be reached */
> +                )
> +
> +/* same function but returning single/double floating point */
> +static float (* const call_float_method)(void *, int, const DWORD *, const DWORD *) = (void *)call_method;
> +static double (* const call_double_method)(void *, int, const DWORD *, const DWORD *) = (void *)call_method;
> +
> +#elif defined(__i386__)
>  
>  extern LONGLONG call_method( void *func, int nb_args, const DWORD *args, int *stack_offset );
>  __ASM_GLOBAL_FUNC( call_method,
> @@ -6637,7 +6677,184 @@ DispCallFunc(
>      void* pvInstance, ULONG_PTR oVft, CALLCONV cc, VARTYPE vtReturn, UINT cActuals,
>      VARTYPE* prgvt, VARIANTARG** prgpvarg, VARIANT* pvargResult)
>  {
> -#ifdef __i386__
> +#if defined(__arm__) && defined(__ARM_32BIT_STATE) && !defined(__ARMEB__)
> +/* Note: this may work OK on 64-bit and/or ARMEB, but has currently only been tested for 32-bit ARMEL */

Same as above

> +    int argspos;
> +    void *func;
> +    UINT i;
> +    DWORD *args;
> +    struct {
> +        union {
> +            float s[16];
> +            double d[8];
> +        } sd;
> +        DWORD r[4];
> +    } regs;
> +    int rcount;     /* 32-bit register index count */
> +    int scount;     /* single-precision float register index count (will be incremented twice for doubles, plus alignment) */
> +
> +    TRACE("(%p, %ld, %d, %d, %d, %p, %p, %p (vt=%d))\n",
> +        pvInstance, oVft, cc, vtReturn, cActuals, prgvt, prgpvarg,
> +        pvargResult, V_VT(pvargResult));

You can go up to 100 chars per line, so be free to rearrange this

> +
> +    if (cc != CC_STDCALL && cc != CC_CDECL)
> +    {
> +        FIXME("unsupported calling convention %d\n",cc);
> +        return E_INVALIDARG;
> +    }
> +
> +    argspos = 0;
> +    rcount = 0;
> +    scount = 0;
> +
> +    /* Determine if we need to pass a pointer for the return value as arg 0.  If so, do that */
> +    /*  first as it will need to be in the 'r' registers:                                    */
> +    switch (vtReturn)
> +    {
> +    case VT_DECIMAL:
> +    case VT_VARIANT:
> +        regs.r[rcount++] = (DWORD)pvargResult;  /* arg 0 is a pointer to the result */
> +        break;
> +    case VT_HRESULT:
> +        WARN("invalid return type %u\n", vtReturn);
> +        return E_INVALIDARG;
> +    default:                    /* And all others are in 'r', 's', or 'd' registers or have no return value */
> +        break;
> +    }
> +
> +    if (pvInstance)
> +    {
> +        const FARPROC *vtable = *(FARPROC **)pvInstance;
> +        func = vtable[oVft/sizeof(void *)];
> +        regs.r[rcount++] = (DWORD)pvInstance; /* the This pointer is always the first parameter */
> +    }
> +    else func = (void *)oVft;
> +
> +    /* maximum size for an argument is sizeof(VARIANT).  Also allow for return pointer and stack alignment. */
> +    args = heap_alloc(sizeof(VARIANT) * cActuals + sizeof(DWORD) * 4 );
> +
> +    for (i = 0; i < cActuals; i++)
> +    {
> +        VARIANT *arg = prgpvarg[i];
> +        DWORD *pdwarg = (DWORD *)(arg);     /* a reinterpret_cast of the variant, used for copying structures when they are split between registers and stack */
> +        int ntemp;              /* Used for counting words split between registers and stack */
> +
> +        switch (prgvt[i])
> +        {
> +        case VT_EMPTY:
> +            break;
> +        case VT_R4:             /* these must be 4-byte aligned, and put in 's' regs or stack, as they are single-floats */
> +            if (scount < 16)
> +            {
> +                regs.sd.s[scount++] = V_R4(arg);
> +            } else {
> +                args[argspos++] = V_UI4(arg);
> +            }

I think no curly brackets are necessary here at all, but we usually don't open them in the same line as the if/else statement for new code 

> +            break;
> +        case VT_R8:             /* these must be 8-byte aligned, and put in 'd' regs or stack, as they are double-floats */
> +        case VT_DATE:
> +            if (scount < 15)
> +            {
> +                scount += (scount % 2); /* align scount to next whole double */
> +                regs.sd.d[scount/2] = V_R8(arg);
> +                scount += 2;
> +            } else {
> +                scount = 16;                /* Make sure we flag that all 's' regs are full */
> +                argspos += (argspos % 2);   /* align argspos to 8-bytes */
> +                memcpy( &args[argspos], &V_R8(arg), sizeof(V_R8(arg)) );
> +                argspos += sizeof(V_R8(arg)) / sizeof(DWORD);
> +            }
> +            break;
> +        case VT_I8:             /* these must be 8-byte aligned, and put in 'r' regs or stack, as they are long-longs */
> +        case VT_UI8:
> +        case VT_CY:
> +            if (rcount < 3)
> +            {
> +                rcount += (rcount % 2);     /* align rcount to 8-byte register pair */
> +                memcpy( &regs.r[rcount], &V_UI8(arg), sizeof(V_UI8(arg)) );
> +                rcount += sizeof(V_UI8(arg)) / sizeof(DWORD);
> +            } else {
> +                rcount = 4;                 /* Make sure we flag that all 'r' regs are full */
> +                argspos += (argspos % 2);   /* align argspos to 8-bytes */
> +                memcpy( &args[argspos], &V_UI8(arg), sizeof(V_UI8(arg)) );
> +                argspos += sizeof(V_UI8(arg)) / sizeof(DWORD);
> +            }
> +            break;
> +        case VT_DECIMAL:        /* these structures are 8-byte aligned, and put in 'r' regs or stack, can be split between the two */
> +        case VT_VARIANT:
> +            /* 8-byte align 'r' and/or stack: */
> +            if (rcount < 3)
> +            {
> +                rcount += (rcount % 2);
> +            } else {
> +                rcount = 4;
> +                argspos += (argspos % 2);
> +            }
> +            ntemp = sizeof(*arg) / sizeof(DWORD);
> +            while (ntemp > 0)
> +            {
> +                if (rcount < 4)
> +                {
> +                    regs.r[rcount++] = *pdwarg++;
> +                } else {
> +                    args[argspos++] = *pdwarg++;
> +                }
> +                --ntemp;
> +            }
> +            break;
> +        case VT_BOOL:  /* VT_BOOL is 16-bit but BOOL is 32-bit, needs to be extended */
> +            if (rcount < 4)
> +            {
> +                regs.r[rcount++] = V_BOOL(arg);
> +            } else {
> +                args[argspos++] = V_BOOL(arg);
> +            }
> +            break;
> +        default:
> +            if (rcount < 4)
> +            {
> +                regs.r[rcount++] = V_UI4(arg);
> +            } else {
> +                args[argspos++] = V_UI4(arg);
> +            }
> +            break;
> +        }
> +        TRACE("arg %u: type %s %s\n", i, debugstr_vt(prgvt[i]), debugstr_variant(arg));
> +    }
> +
> +    argspos += (argspos % 2);   /* Make sure stack function alignment is 8-byte */
> +
> +    TRACE("rcount: %d, scount: %d, argspos: %d\n", rcount, scount, argspos);
> +
> +    switch (vtReturn)
> +    {
> +    case VT_EMPTY:      /* EMPTY = no return value */
> +    case VT_DECIMAL:    /* DECIMAL and VARIANT already have a pointer argument passed (see above) */
> +    case VT_VARIANT:
> +        call_method( func, argspos, args, (DWORD*)&regs );
> +        break;
> +    case VT_R4:
> +        V_R4(pvargResult) = call_float_method( func, argspos, args, (DWORD*)&regs );
> +        break;
> +    case VT_R8:
> +    case VT_DATE:
> +        V_R8(pvargResult) = call_double_method( func, argspos, args, (DWORD*)&regs );
> +        break;
> +    case VT_I8:
> +    case VT_UI8:
> +    case VT_CY:
> +        V_UI8(pvargResult) = call_method(func, argspos, args, (DWORD*)&regs );
> +        break;
> +    default:
> +        V_UI4(pvargResult) = call_method( func, argspos, args, (DWORD*)&regs );
> +        break;
> +    }
> +    heap_free( args );
> +    if (vtReturn != VT_VARIANT) V_VT(pvargResult) = vtReturn;
> +    TRACE("retval: %s\n", debugstr_variant(pvargResult));
> +    return S_OK;
> +
> +#elif defined(__i386__)
>      int argspos, stack_offset;
>      void *func;
>      UINT i;
>