[PATCH 1/8] wined3d: Improve wined3d_popcount() implementation.
Matteo Bruni
mbruni at codeweavers.com
Tue Jan 26 15:51:28 CST 2016
Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
configure.ac | 9 +++++++++
dlls/wined3d/arb_program_shader.c | 4 ++--
dlls/wined3d/glsl_shader.c | 4 ++--
dlls/wined3d/shader.c | 4 ++--
dlls/wined3d/utils.c | 11 -----------
dlls/wined3d/wined3d_private.h | 12 +++++++++++-
6 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/configure.ac b/configure.ac
index 853277f..e434fbc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2551,6 +2551,15 @@ then
AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.])
fi
+dnl Check for __builtin_popcount
+AC_CACHE_CHECK([for __builtin_popcount], ac_cv_have___builtin_popcount,
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_popcount(1)]])],
+ [ac_cv_have___builtin_popcount="yes"], [ac_cv_have___builtin_popcount="no"]))
+if test "$ac_cv_have___builtin_popcount" = "yes"
+then
+ AC_DEFINE(HAVE___BUILTIN_POPCOUNT, 1, [Define to 1 if you have the `__builtin_popcount' built-in function.])
+fi
+
dnl *** check for the need to define platform-specific symbols
case $host_cpu in
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 8347802..9cb8816 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -798,7 +798,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
DWORD highest_constf = 0, clip_limit;
max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
- max_constantsF -= count_bits(reg_maps->integer_constants);
+ max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
max_constantsF -= gl_info->reserved_arb_constants;
for (i = 0; i < shader->limits->constant_float; ++i)
@@ -819,7 +819,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
else
{
unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
- clip_limit = min(count_bits(mask), 4);
+ clip_limit = min(wined3d_popcount(mask), 4);
}
*num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
max_constantsF -= *num_clipplanes;
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 6234699..dfcb877 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -1685,12 +1685,12 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont
max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
if (vs_args->clip_enabled)
max_constantsF -= gl_info->limits.clipplanes;
- max_constantsF -= count_bits(reg_maps->integer_constants);
+ max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
/* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
* so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
* for now take this into account when calculating the number of available constants
*/
- max_constantsF -= count_bits(reg_maps->boolean_constants);
+ max_constantsF -= wined3d_popcount(reg_maps->boolean_constants);
/* Set by driver quirks in directx.c */
max_constantsF -= gl_info->reserved_glsl_constants;
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index f2c3e74..ea79c41 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -1137,7 +1137,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
}
else if (!input_signature->elements && reg_maps->input_registers)
{
- unsigned int count = count_bits(reg_maps->input_registers);
+ unsigned int count = wined3d_popcount(reg_maps->input_registers);
struct wined3d_shader_signature_element *e;
unsigned int i;
@@ -1164,7 +1164,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
}
else if (reg_maps->output_registers)
{
- unsigned int count = count_bits(reg_maps->output_registers);
+ unsigned int count = wined3d_popcount(reg_maps->output_registers);
struct wined3d_shader_signature_element *e;
if (!(output_signature->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*output_signature->elements) * count)))
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index 5844aa6..6eb4508 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -4049,17 +4049,6 @@ void get_fog_start_end(const struct wined3d_context *context, const struct wined
}
}
-/* This small helper function is used to convert a bitmask into the number of masked bits */
-unsigned int count_bits(unsigned int mask)
-{
- unsigned int count;
- for (count = 0; mask; ++count)
- {
- mask &= mask - 1;
- }
- return count;
-}
-
/* Note: It's the caller's responsibility to ensure values can be expressed
* in the requested format. UNORM formats for example can only express values
* in the range 0.0f -> 1.0f. */
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index ed28507..3a83094 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -257,6 +257,17 @@ static inline float float_24_to_32(DWORD in)
}
}
+static inline unsigned int wined3d_popcount(unsigned int x)
+{
+#ifdef HAVE___BUILTIN_POPCOUNT
+ return __builtin_popcount(x);
+#else
+ x -= x >> 1 & 0x55555555;
+ x = (x & 0x33333333) + (x >> 2 & 0x33333333);
+ return ((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24;
+#endif
+}
+
#define ORM_BACKBUFFER 0
#define ORM_FBO 1
@@ -2932,7 +2943,6 @@ GLenum gl_primitive_type_from_d3d(enum wined3d_primitive_type primitive_type) DE
/* Math utils */
void multiply_matrix(struct wined3d_matrix *dest, const struct wined3d_matrix *src1,
const struct wined3d_matrix *src2) DECLSPEC_HIDDEN;
-unsigned int count_bits(unsigned int mask) DECLSPEC_HIDDEN;
void wined3d_release_dc(HWND window, HDC dc) DECLSPEC_HIDDEN;
--
2.4.10
More information about the wine-patches
mailing list