Nozomi Kodama : d3dx9: Save multiplications for better performance.

Alexandre Julliard julliard at winehq.org
Thu Mar 7 13:57:15 CST 2013


Module: wine
Branch: master
Commit: 479195ea41e6360182b6f61f6a3262caa1e24255
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=479195ea41e6360182b6f61f6a3262caa1e24255

Author: Nozomi Kodama <nozomi.kodama at yahoo.com>
Date:   Mon Mar  4 00:56:53 2013 -1000

d3dx9: Save multiplications for better performance.

---

 dlls/d3dx9_36/math.c |   80 ++++++++++++++++++++++++++++----------------------
 1 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c
index 5603c0b..8adabb3 100644
--- a/dlls/d3dx9_36/math.c
+++ b/dlls/d3dx9_36/math.c
@@ -2378,60 +2378,70 @@ HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT rad
 
 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
 {
+    const FLOAT dirxx = dir->x * dir->x;
+    const FLOAT dirxy = dir->x * dir->y;
+    const FLOAT dirxz = dir->x * dir->z;
+    const FLOAT diryy = dir->y * dir->y;
+    const FLOAT diryz = dir->y * dir->z;
+    const FLOAT dirzz = dir->z * dir->z;
+    const FLOAT dirxxxx = dirxx * dirxx;
+    const FLOAT diryyyy = diryy * diryy;
+    const FLOAT dirzzzz = dirzz * dirzz;
+    const FLOAT dirxyxy = dirxy * dirxy;
 
     TRACE("out %p, order %u, dir %p\n", out, order, dir);
 
-    if ( (order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER) )
+    if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
         return out;
 
     out[0] = 0.5f / sqrt(D3DX_PI);
     out[1] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->y;
     out[2] = 0.5f / sqrt(D3DX_PI / 3.0f) * dir->z;
     out[3] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->x;
-    if ( order == 2 )
+    if (order == 2)
         return out;
 
-    out[4] = 0.5f / sqrt(D3DX_PI / 15.0f) * dir->x * dir->y;
-    out[5] = -0.5f / sqrt(D3DX_PI / 15.0f) * dir->y * dir->z;
-    out[6] = 0.25f / sqrt(D3DX_PI / 5.0f) * ( 3.0f * dir->z * dir->z - 1.0f );
-    out[7] = -0.5f / sqrt(D3DX_PI / 15.0f) * dir->x * dir->z;
-    out[8] = 0.25f / sqrt(D3DX_PI / 15.0f) * ( dir->x * dir->x - dir->y * dir->y );
-    if ( order == 3 )
+    out[4] = 0.5f / sqrt(D3DX_PI / 15.0f) * dirxy;
+    out[5] = -0.5f / sqrt(D3DX_PI / 15.0f) * diryz;
+    out[6] = 0.25f / sqrt(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
+    out[7] = -0.5f / sqrt(D3DX_PI / 15.0f) * dirxz;
+    out[8] = 0.25f / sqrt(D3DX_PI / 15.0f) * (dirxx - diryy);
+    if (order == 3)
         return out;
 
-    out[9] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dir->x * dir->x - dir->y * dir->y );
-    out[10] = sqrt(105.0f / D3DX_PI) / 2.0f * dir->x * dir->y * dir->z;
-    out[11] = -sqrt(42.0 / D3DX_PI) / 8.0f * dir->y * ( -1.0f + 5.0f * dir->z * dir->z );
-    out[12] = sqrt(7.0f / D3DX_PI) / 4.0f * dir->z * ( 5.0f * dir->z * dir->z - 3.0f );
-    out[13] = sqrt(42.0 / D3DX_PI) / 8.0f * dir->x * ( 1.0f - 5.0f * dir->z * dir->z );
-    out[14] = sqrt(105.0f / D3DX_PI) / 4.0f * dir->z * ( dir->x * dir->x - dir->y * dir->y );
-    out[15] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->x * ( dir->x * dir->x - 3.0f * dir->y * dir->y );
-    if ( order == 4 )
+    out[9] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
+    out[10] = sqrt(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
+    out[11] = -sqrt(42.0 / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
+    out[12] = sqrt(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
+    out[13] = sqrt(42.0 / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
+    out[14] = sqrt(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
+    out[15] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
+    if (order == 4)
         return out;
 
-    out[16] = 0.75f * sqrt(35.0f / D3DX_PI) * dir->x * dir->y * (dir->x * dir->x - dir->y * dir->y );
+    out[16] = 0.75f * sqrt(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
     out[17] = 3.0f * dir->z * out[9];
-    out[18] = 0.75f * sqrt(5.0f / D3DX_PI) * dir->x * dir->y * ( 7.0f * dir->z * dir->z - 1.0f );
-    out[19] = 0.375f * sqrt(10.0f / D3DX_PI) * dir->y * dir->z * ( 3.0f - 7.0f * dir->z * dir->z );
-    out[20] = 3.0f / ( 16.0f * sqrt(D3DX_PI) ) * ( 35.0f * dir->z * dir->z * dir->z * dir->z - 30.f * dir->z * dir->z + 3.0f );
-    out[21] = 0.375f * sqrt(10.0f / D3DX_PI) * dir->x * dir->z * ( 3.0f - 7.0f * dir->z * dir->z );
-    out[22] = 0.375f * sqrt(5.0f / D3DX_PI) * ( dir->x * dir->x - dir->y * dir->y ) * ( 7.0f * dir->z * dir->z - 1.0f);
+    out[18] = 0.75f * sqrt(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
+    out[19] = 0.375f * sqrt(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
+    out[20] = 3.0f / (16.0f * sqrt(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
+    out[21] = 0.375f * sqrt(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
+    out[22] = 0.375f * sqrt(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
     out[23] = 3.0 * dir->z * out[15];
-    out[24] = 3.0f / 16.0f * sqrt(35.0f / D3DX_PI) * ( dir->x * dir->x * dir->x * dir->x- 6.0f * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y );
-    if ( order == 5 )
+    out[24] = 3.0f / 16.0f * sqrt(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
+    if (order == 5)
         return out;
 
-    out[25] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->y * ( 5.0f * dir->x * dir->x * dir->x * dir->x - 10.0f * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y );
-    out[26] = 0.75f * sqrt(385.0f / D3DX_PI) * dir->x * dir->y * dir->z * ( dir->x * dir->x - dir->y * dir->y );
-    out[27] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->y * ( 3.0f * dir->x * dir->x - dir->y * dir->y ) * ( 1.0f - 9.0f * dir->z * dir->z );
-    out[28] = sqrt(1155.0f / D3DX_PI) / 4.0f * dir->x * dir->y * dir->z * ( 3.0f * dir->z * dir->z - 1.0f);
-    out[29] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->y * ( 14.0f * dir->z * dir->z - 21.0f * dir->z * dir->z * dir->z * dir->z - 1.0f );
-    out[30] = sqrt(11.0f / D3DX_PI) / 16.0f * dir->z * ( 63.0f * dir->z * dir->z * dir->z * dir->z - 70.0f * dir->z * dir->z + 15.0f );
-    out[31] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->x * ( 14.0f * dir->z * dir->z - 21.0f * dir->z * dir->z * dir->z * dir->z - 1.0f );
-    out[32] = sqrt(1155.0f / D3DX_PI) / 8.0f * dir->z * ( dir->x * dir->x - dir->y * dir->y ) * ( 3.0f * dir->z * dir->z - 1.0f );
-    out[33] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->x * ( dir->x * dir->x - 3.0f * dir->y * dir->y ) * ( 1.0f - 9.0f * dir->z * dir->z );
-    out[34] = 3.0f / 16.0f * sqrt(385.0f / D3DX_PI) * dir->z * ( dir->x * dir->x * dir->x * dir->x - 6.0 * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y );
-    out[35] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->x * ( dir->x * dir->x * dir->x * dir->x - 10.0f * dir->x * dir->x * dir->y * dir->y + 5.0f * dir->y * dir->y * dir->y * dir->y );
+    out[25] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
+    out[26] = 0.75f * sqrt(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
+    out[27] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
+    out[28] = sqrt(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
+    out[29] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
+    out[30] = sqrt(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
+    out[31] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
+    out[32] = sqrt(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
+    out[33] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
+    out[34] = 3.0f / 16.0f * sqrt(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0 * dirxyxy + diryyyy);
+    out[35] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
 
     return out;
 }




More information about the wine-cvs mailing list