From 5cc1a57aede7c920d105fec1ac738ba21f144787 Mon Sep 17 00:00:00 2001 From: Nozomi Kodama Date: Mon, 25 Feb 2013 01:26:13 -1000 Subject: Explicit computations to speed up them --- dlls/d3dx9_36/math.c | 199 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 160 insertions(+), 39 deletions(-) diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index 69c3297..99360b6 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -255,54 +255,175 @@ HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutr FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm) { - D3DXVECTOR4 minor, v1, v2, v3; - FLOAT det; + + FLOAT v[4]; TRACE("pm %p\n", pm); - v1.x = pm->u.m[0][0]; v1.y = pm->u.m[1][0]; v1.z = pm->u.m[2][0]; v1.w = pm->u.m[3][0]; - v2.x = pm->u.m[0][1]; v2.y = pm->u.m[1][1]; v2.z = pm->u.m[2][1]; v2.w = pm->u.m[3][1]; - v3.x = pm->u.m[0][2]; v3.y = pm->u.m[1][2]; v3.z = pm->u.m[2][2]; v3.w = pm->u.m[3][2]; - D3DXVec4Cross(&minor, &v1, &v2, &v3); - det = - (pm->u.m[0][3] * minor.x + pm->u.m[1][3] * minor.y + pm->u.m[2][3] * minor.z + pm->u.m[3][3] * minor.w); - return det; + v[0] = pm->u.m[1][1] * pm->u.m[2][2] * pm->u.m[3][3] - + pm->u.m[1][1] * pm->u.m[2][3] * pm->u.m[3][2] - + pm->u.m[2][1] * pm->u.m[1][2] * pm->u.m[3][3] + + pm->u.m[2][1] * pm->u.m[1][3] * pm->u.m[3][2] + + pm->u.m[3][1] * pm->u.m[1][2] * pm->u.m[2][3] - + pm->u.m[3][1] * pm->u.m[1][3] * pm->u.m[2][2]; + + v[1] = -pm->u.m[1][0] * pm->u.m[2][2] * pm->u.m[3][3] + + pm->u.m[1][0] * pm->u.m[2][3] * pm->u.m[3][2] + + pm->u.m[2][0] * pm->u.m[1][2] * pm->u.m[3][3] - + pm->u.m[2][0] * pm->u.m[1][3] * pm->u.m[3][2] - + pm->u.m[3][0] * pm->u.m[1][2] * pm->u.m[2][3] + + pm->u.m[3][0] * pm->u.m[1][3] * pm->u.m[2][2]; + + v[2] = pm->u.m[1][0] * pm->u.m[2][1] * pm->u.m[3][3] - + pm->u.m[1][0] * pm->u.m[2][3] * pm->u.m[3][1] - + pm->u.m[2][0] * pm->u.m[1][1] * pm->u.m[3][3] + + pm->u.m[2][0] * pm->u.m[1][3] * pm->u.m[3][1] + + pm->u.m[3][0] * pm->u.m[1][1] * pm->u.m[2][3] - + pm->u.m[3][0] * pm->u.m[1][3] * pm->u.m[2][1]; + + v[3] = -pm->u.m[1][0] * pm->u.m[2][1] * pm->u.m[3][2] + + pm->u.m[1][0] * pm->u.m[2][2] * pm->u.m[3][1] + + pm->u.m[2][0] * pm->u.m[1][1] * pm->u.m[3][2] - + pm->u.m[2][0] * pm->u.m[1][2] * pm->u.m[3][1] - + pm->u.m[3][0] * pm->u.m[1][1] * pm->u.m[2][2] + + pm->u.m[3][0] * pm->u.m[1][2] * pm->u.m[2][1]; + + return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] + + pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3]; } D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm) { - int a, i, j; - D3DXMATRIX out; - D3DXVECTOR4 v, vec[3]; - FLOAT det; + FLOAT v[16], det; + UINT i, j; + + v[0] = pm->u.m[1][1] * pm->u.m[2][2] * pm->u.m[3][3] - + pm->u.m[1][1] * pm->u.m[2][3] * pm->u.m[3][2] - + pm->u.m[2][1] * pm->u.m[1][2] * pm->u.m[3][3] + + pm->u.m[2][1] * pm->u.m[1][3] * pm->u.m[3][2] + + pm->u.m[3][1] * pm->u.m[1][2] * pm->u.m[2][3] - + pm->u.m[3][1] * pm->u.m[1][3] * pm->u.m[2][2]; + + v[4] = -pm->u.m[1][0] * pm->u.m[2][2] * pm->u.m[3][3] + + pm->u.m[1][0] * pm->u.m[2][3] * pm->u.m[3][2] + + pm->u.m[2][0] * pm->u.m[1][2] * pm->u.m[3][3] - + pm->u.m[2][0] * pm->u.m[1][3] * pm->u.m[3][2] - + pm->u.m[3][0] * pm->u.m[1][2] * pm->u.m[2][3] + + pm->u.m[3][0] * pm->u.m[1][3] * pm->u.m[2][2]; + + v[8] = pm->u.m[1][0] * pm->u.m[2][1] * pm->u.m[3][3] - + pm->u.m[1][0] * pm->u.m[2][3] * pm->u.m[3][1] - + pm->u.m[2][0] * pm->u.m[1][1] * pm->u.m[3][3] + + pm->u.m[2][0] * pm->u.m[1][3] * pm->u.m[3][1] + + pm->u.m[3][0] * pm->u.m[1][1] * pm->u.m[2][3] - + pm->u.m[3][0] * pm->u.m[1][3] * pm->u.m[2][1]; + + v[12] = -pm->u.m[1][0] * pm->u.m[2][1] * pm->u.m[3][2] + + pm->u.m[1][0] * pm->u.m[2][2] * pm->u.m[3][1] + + pm->u.m[2][0] * pm->u.m[1][1] * pm->u.m[3][2] - + pm->u.m[2][0] * pm->u.m[1][2] * pm->u.m[3][1] - + pm->u.m[3][0] * pm->u.m[1][1] * pm->u.m[2][2] + + pm->u.m[3][0] * pm->u.m[1][2] * pm->u.m[2][1]; + + det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] + pm->u.m[0][2] * v[8] + + pm->u.m[0][3] * v[12]; + + if (det == 0.0f) + return NULL; + if (pdeterminant) + *pdeterminant = det; + + v[1] = -pm->u.m[0][1] * pm->u.m[2][2] * pm->u.m[3][3] + + pm->u.m[0][1] * pm->u.m[2][3] * pm->u.m[3][2] + + pm->u.m[2][1] * pm->u.m[0][2] * pm->u.m[3][3] - + pm->u.m[2][1] * pm->u.m[0][3] * pm->u.m[3][2] - + pm->u.m[3][1] * pm->u.m[0][2] * pm->u.m[2][3] + + pm->u.m[3][1] * pm->u.m[0][3] * pm->u.m[2][2]; + + v[5] = pm->u.m[0][0] * pm->u.m[2][2] * pm->u.m[3][3] - + pm->u.m[0][0] * pm->u.m[2][3] * pm->u.m[3][2] - + pm->u.m[2][0] * pm->u.m[0][2] * pm->u.m[3][3] + + pm->u.m[2][0] * pm->u.m[0][3] * pm->u.m[3][2] + + pm->u.m[3][0] * pm->u.m[0][2] * pm->u.m[2][3] - + pm->u.m[3][0] * pm->u.m[0][3] * pm->u.m[2][2]; + + v[9] = -pm->u.m[0][0] * pm->u.m[2][1] * pm->u.m[3][3] + + pm->u.m[0][0] * pm->u.m[2][3] * pm->u.m[3][1] + + pm->u.m[2][0] * pm->u.m[0][1] * pm->u.m[3][3] - + pm->u.m[2][0] * pm->u.m[0][3] * pm->u.m[3][1] - + pm->u.m[3][0] * pm->u.m[0][1] * pm->u.m[2][3] + + pm->u.m[3][0] * pm->u.m[0][3] * pm->u.m[2][1]; + + v[13] = pm->u.m[0][0] * pm->u.m[2][1] * pm->u.m[3][2] - + pm->u.m[0][0] * pm->u.m[2][2] * pm->u.m[3][1] - + pm->u.m[2][0] * pm->u.m[0][1] * pm->u.m[3][2] + + pm->u.m[2][0] * pm->u.m[0][2] * pm->u.m[3][1] + + pm->u.m[3][0] * pm->u.m[0][1] * pm->u.m[2][2] - + pm->u.m[3][0] * pm->u.m[0][2] * pm->u.m[2][1]; + + v[2] = pm->u.m[0][1] * pm->u.m[1][2] * pm->u.m[3][3] - + pm->u.m[0][1] * pm->u.m[1][3] * pm->u.m[3][2] - + pm->u.m[1][1] * pm->u.m[0][2] * pm->u.m[3][3] + + pm->u.m[1][1] * pm->u.m[0][3] * pm->u.m[3][2] + + pm->u.m[3][1] * pm->u.m[0][2] * pm->u.m[1][3] - + pm->u.m[3][1] * pm->u.m[0][3] * pm->u.m[1][2]; + + v[6] = -pm->u.m[0][0] * pm->u.m[1][2] * pm->u.m[3][3] + + pm->u.m[0][0] * pm->u.m[1][3] * pm->u.m[3][2] + + pm->u.m[1][0] * pm->u.m[0][2] * pm->u.m[3][3] - + pm->u.m[1][0] * pm->u.m[0][3] * pm->u.m[3][2] - + pm->u.m[3][0] * pm->u.m[0][2] * pm->u.m[1][3] + + pm->u.m[3][0] * pm->u.m[0][3] * pm->u.m[1][2]; + + v[10] = pm->u.m[0][0] * pm->u.m[1][1] * pm->u.m[3][3] - + pm->u.m[0][0] * pm->u.m[1][3] * pm->u.m[3][1] - + pm->u.m[1][0] * pm->u.m[0][1] * pm->u.m[3][3] + + pm->u.m[1][0] * pm->u.m[0][3] * pm->u.m[3][1] + + pm->u.m[3][0] * pm->u.m[0][1] * pm->u.m[1][3] - + pm->u.m[3][0] * pm->u.m[0][3] * pm->u.m[1][1]; + + v[14] = -pm->u.m[0][0] * pm->u.m[1][1] * pm->u.m[3][2] + + pm->u.m[0][0] * pm->u.m[1][2] * pm->u.m[3][1] + + pm->u.m[1][0] * pm->u.m[0][1] * pm->u.m[3][2] - + pm->u.m[1][0] * pm->u.m[0][2] * pm->u.m[3][1] - + pm->u.m[3][0] * pm->u.m[0][1] * pm->u.m[1][2] + + pm->u.m[3][0] * pm->u.m[0][2] * pm->u.m[1][1]; + + v[3] = -pm->u.m[0][1] * pm->u.m[1][2] * pm->u.m[2][3] + + pm->u.m[0][1] * pm->u.m[1][3] * pm->u.m[2][2] + + pm->u.m[1][1] * pm->u.m[0][2] * pm->u.m[2][3] - + pm->u.m[1][1] * pm->u.m[0][3] * pm->u.m[2][2] - + pm->u.m[2][1] * pm->u.m[0][2] * pm->u.m[1][3] + + pm->u.m[2][1] * pm->u.m[0][3] * pm->u.m[1][2]; + + v[7] = pm->u.m[0][0] * pm->u.m[1][2] * pm->u.m[2][3] - + pm->u.m[0][0] * pm->u.m[1][3] * pm->u.m[2][2] - + pm->u.m[1][0] * pm->u.m[0][2] * pm->u.m[2][3] + + pm->u.m[1][0] * pm->u.m[0][3] * pm->u.m[2][2] + + pm->u.m[2][0] * pm->u.m[0][2] * pm->u.m[1][3] - + pm->u.m[2][0] * pm->u.m[0][3] * pm->u.m[1][2]; + + v[11] = -pm->u.m[0][0] * pm->u.m[1][1] * pm->u.m[2][3] + + pm->u.m[0][0] * pm->u.m[1][3] * pm->u.m[2][1] + + pm->u.m[1][0] * pm->u.m[0][1] * pm->u.m[2][3] - + pm->u.m[1][0] * pm->u.m[0][3] * pm->u.m[2][1] - + pm->u.m[2][0] * pm->u.m[0][1] * pm->u.m[1][3] + + pm->u.m[2][0] * pm->u.m[0][3] * pm->u.m[1][1]; + + v[15] = pm->u.m[0][0] * pm->u.m[1][1] * pm->u.m[2][2] - + pm->u.m[0][0] * pm->u.m[1][2] * pm->u.m[2][1] - + pm->u.m[1][0] * pm->u.m[0][1] * pm->u.m[2][2] + + pm->u.m[1][0] * pm->u.m[0][2] * pm->u.m[2][1] + + pm->u.m[2][0] * pm->u.m[0][1] * pm->u.m[1][2] - + pm->u.m[2][0] * pm->u.m[0][2] * pm->u.m[1][1]; + + det = 1.0f / det; - TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm); + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + pout->u.m[i][j] = v[4 * i + j] * det; - det = D3DXMatrixDeterminant(pm); - if ( !det ) return NULL; - if ( pdeterminant ) *pdeterminant = det; - for (i=0; i<4; i++) - { - for (j=0; j<4; j++) - { - if (j != i ) - { - a = j; - if ( j > i ) a = a-1; - vec[a].x = pm->u.m[j][0]; - vec[a].y = pm->u.m[j][1]; - vec[a].z = pm->u.m[j][2]; - vec[a].w = pm->u.m[j][3]; - } - } - D3DXVec4Cross(&v, &vec[0], &vec[1], &vec[2]); - out.u.m[0][i] = pow(-1.0f, i) * v.x / det; - out.u.m[1][i] = pow(-1.0f, i) * v.y / det; - out.u.m[2][i] = pow(-1.0f, i) * v.z / det; - out.u.m[3][i] = pow(-1.0f, i) * v.w / det; - } - - *pout = out; - return pout; + return pout; } D3DXMATRIX* WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *pout, const D3DXVECTOR3 *peye, const D3DXVECTOR3 *pat, const D3DXVECTOR3 *pup) -- 1.7.10.4