Matteo Bruni : wined3d: Improve wined3d_popcount() implementation.

Alexandre Julliard julliard at wine.codeweavers.com
Wed Jan 27 11:27:20 CST 2016


Module: wine
Branch: master
Commit: 961215b8b87d0dda52f4c8a469f173aed8178a34
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=961215b8b87d0dda52f4c8a469f173aed8178a34

Author: Matteo Bruni <mbruni at codeweavers.com>
Date:   Tue Jan 26 22:51:28 2016 +0100

wined3d: Improve wined3d_popcount() implementation.

Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
Signed-off-by: Henri Verbeet <hverbeet at codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 configure                         | 33 +++++++++++++++++++++++++++++++++
 configure.ac                      |  9 +++++++++
 dlls/wined3d/arb_program_shader.c |  4 ++--
 dlls/wined3d/glsl_shader.c        |  4 ++--
 dlls/wined3d/shader.c             |  4 ++--
 dlls/wined3d/utils.c              | 11 -----------
 dlls/wined3d/wined3d_private.h    | 12 +++++++++++-
 include/config.h.in               |  3 +++
 8 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/configure b/configure
index 6edecf6..c26963d 100755
--- a/configure
+++ b/configure
@@ -16883,6 +16883,39 @@ $as_echo "#define HAVE___BUILTIN_CLZ 1" >>confdefs.h
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
+$as_echo_n "checking for __builtin_popcount... " >&6; }
+if ${ac_cv_have___builtin_popcount+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+return __builtin_popcount(1)
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_have___builtin_popcount="yes"
+else
+  ac_cv_have___builtin_popcount="no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have___builtin_popcount" >&5
+$as_echo "$ac_cv_have___builtin_popcount" >&6; }
+if test "$ac_cv_have___builtin_popcount" = "yes"
+then
+
+$as_echo "#define HAVE___BUILTIN_POPCOUNT 1" >>confdefs.h
+
+fi
+
 
 case $host_cpu in
   *i[3456789]86*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we need to define __i386__" >&5
diff --git a/configure.ac b/configure.ac
index 853277f..e434fbc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2551,6 +2551,15 @@ then
     AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.])
 fi
 
+dnl Check for __builtin_popcount
+AC_CACHE_CHECK([for __builtin_popcount], ac_cv_have___builtin_popcount,
+               AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_popcount(1)]])],
+               [ac_cv_have___builtin_popcount="yes"], [ac_cv_have___builtin_popcount="no"]))
+if test "$ac_cv_have___builtin_popcount" = "yes"
+then
+    AC_DEFINE(HAVE___BUILTIN_POPCOUNT, 1, [Define to 1 if you have the `__builtin_popcount' built-in function.])
+fi
+
 dnl *** check for the need to define platform-specific symbols
 
 case $host_cpu in
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 5dfa92f..a076cec 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -798,7 +798,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
             DWORD highest_constf = 0, clip_limit;
 
             max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
-            max_constantsF -= count_bits(reg_maps->integer_constants);
+            max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
             max_constantsF -= gl_info->reserved_arb_constants;
 
             for (i = 0; i < shader->limits->constant_float; ++i)
@@ -819,7 +819,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
             else
             {
                 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
-                clip_limit = min(count_bits(mask), 4);
+                clip_limit = min(wined3d_popcount(mask), 4);
             }
             *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
             max_constantsF -= *num_clipplanes;
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index ec3c2cb..1eb7e6d 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -1685,12 +1685,12 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont
                 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
                 if (vs_args->clip_enabled)
                     max_constantsF -= gl_info->limits.clipplanes;
-                max_constantsF -= count_bits(reg_maps->integer_constants);
+                max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
                 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
                  * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
                  * for now take this into account when calculating the number of available constants
                  */
-                max_constantsF -= count_bits(reg_maps->boolean_constants);
+                max_constantsF -= wined3d_popcount(reg_maps->boolean_constants);
                 /* Set by driver quirks in directx.c */
                 max_constantsF -= gl_info->reserved_glsl_constants;
 
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index 5730cef..140665e 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -1138,7 +1138,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
     }
     else if (!input_signature->elements && reg_maps->input_registers)
     {
-        unsigned int count = count_bits(reg_maps->input_registers);
+        unsigned int count = wined3d_popcount(reg_maps->input_registers);
         struct wined3d_shader_signature_element *e;
         unsigned int i;
 
@@ -1165,7 +1165,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
     }
     else if (reg_maps->output_registers)
     {
-        unsigned int count = count_bits(reg_maps->output_registers);
+        unsigned int count = wined3d_popcount(reg_maps->output_registers);
         struct wined3d_shader_signature_element *e;
 
         if (!(output_signature->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*output_signature->elements) * count)))
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index 5eb1696..28a3216 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -4048,17 +4048,6 @@ void get_fog_start_end(const struct wined3d_context *context, const struct wined
     }
 }
 
-/* This small helper function is used to convert a bitmask into the number of masked bits */
-unsigned int count_bits(unsigned int mask)
-{
-    unsigned int count;
-    for (count = 0; mask; ++count)
-    {
-        mask &= mask - 1;
-    }
-    return count;
-}
-
 /* Note: It's the caller's responsibility to ensure values can be expressed
  * in the requested format. UNORM formats for example can only express values
  * in the range 0.0f -> 1.0f. */
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 5558c70..b23aedb 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -257,6 +257,17 @@ static inline float float_24_to_32(DWORD in)
     }
 }
 
+static inline unsigned int wined3d_popcount(unsigned int x)
+{
+#ifdef HAVE___BUILTIN_POPCOUNT
+    return __builtin_popcount(x);
+#else
+    x -= x >> 1 & 0x55555555;
+    x = (x & 0x33333333) + (x >> 2 & 0x33333333);
+    return ((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24;
+#endif
+}
+
 #define ORM_BACKBUFFER  0
 #define ORM_FBO         1
 
@@ -2930,7 +2941,6 @@ GLenum gl_primitive_type_from_d3d(enum wined3d_primitive_type primitive_type) DE
 /* Math utils */
 void multiply_matrix(struct wined3d_matrix *dest, const struct wined3d_matrix *src1,
         const struct wined3d_matrix *src2) DECLSPEC_HIDDEN;
-unsigned int count_bits(unsigned int mask) DECLSPEC_HIDDEN;
 
 void wined3d_release_dc(HWND window, HDC dc) DECLSPEC_HIDDEN;
 
diff --git a/include/config.h.in b/include/config.h.in
index 805374e..86318c5 100644
--- a/include/config.h.in
+++ b/include/config.h.in
@@ -1311,6 +1311,9 @@
 /* Define to 1 if you have the `__builtin_clz' built-in function. */
 #undef HAVE___BUILTIN_CLZ
 
+/* Define to 1 if you have the `__builtin_popcount' built-in function. */
+#undef HAVE___BUILTIN_POPCOUNT
+
 /* Define to 1 if you have the `__res_getservers' function. */
 #undef HAVE___RES_GETSERVERS
 




More information about the wine-cvs mailing list