[PATCH vkd3d v2 10/10] vkd3d-shader/hlsl: Lower numeric casts.
Giovanni Mascellani
gmascellani at codeweavers.com
Tue May 10 08:08:38 CDT 2022
Signed-off-by: Giovanni Mascellani <gmascellani at codeweavers.com>
Signed-off-by: Francisco Casas <fcasas at codeweavers.com>
---
Functionality-wise, this should be able to wholly replace lower_broadcasts().
However, it generates less vectorized code, so I don't know what is the
general sentiment WRT immediately remove lower_broadcasts(), remove it
once more vectorized code is generated (or an effective vectorization pass
cares about it), or whatever else.
---
libs/vkd3d-shader/hlsl.h | 4 +
libs/vkd3d-shader/hlsl.y | 52 +++++-----
libs/vkd3d-shader/hlsl_codegen.c | 97 +++++++++++++++++++
tests/hlsl-duplicate-modifiers.shader_test | 2 +-
tests/hlsl-initializer-matrix.shader_test | 2 +-
...lsl-return-implicit-conversion.shader_test | 10 +-
tests/hlsl-shape.shader_test | 10 +-
tests/matrix-semantics.shader_test | 14 +--
8 files changed, 146 insertions(+), 45 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h
index a142fa48..34c6bfc3 100644
--- a/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d-shader/hlsl.h
@@ -709,6 +709,10 @@ struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsig
const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl, bool intrinsic);
+struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs,
+ struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node,
+ struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc);
bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y
index 7d9c0a2d..ac17f8b1 100644
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -266,7 +266,7 @@ static bool implicit_compatible_data_types(struct hlsl_type *t1, struct hlsl_typ
return false;
}
-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs,
+struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs,
struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc)
{
struct hlsl_type *src_type = node->data_type;
@@ -508,7 +508,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs
{
struct hlsl_ir_store *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc)))
+ if (!(return_value = hlsl_add_implicit_conversion(ctx, instrs, return_value, return_type, &loc)))
return NULL;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value)))
@@ -528,7 +528,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs
return jump;
}
-static struct hlsl_ir_load *add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node,
+struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node,
struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc)
{
struct hlsl_ir_node *add = NULL;
@@ -578,7 +578,7 @@ static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hl
return false;
list_add_tail(instrs, &c->node.entry);
- return !!add_load(ctx, instrs, record, &c->node, field->type, loc);
+ return !!hlsl_add_load(ctx, instrs, record, &c->node, field->type, loc);
}
static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs,
@@ -618,7 +618,7 @@ static struct hlsl_ir_node *add_matrix_scalar_load(struct hlsl_ctx *ctx, struct
if (!(add = add_binary_arithmetic_expr(ctx, instrs, HLSL_OP2_ADD, &mul->node, minor, loc)))
return NULL;
- if (!(load = add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc)))
+ if (!(load = hlsl_add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc)))
return NULL;
return &load->node;
@@ -704,7 +704,7 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls
return false;
}
- return !!add_load(ctx, instrs, array, index, data_type, loc);
+ return !!hlsl_add_load(ctx, instrs, array, index, data_type, loc);
}
static struct hlsl_struct_field *get_struct_field(struct list *fields, const char *name)
@@ -1214,7 +1214,7 @@ static struct hlsl_ir_expr *add_unary_logical_expr(struct hlsl_ctx *ctx, struct
bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL,
arg->data_type->dimx, arg->data_type->dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc)))
+ if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg, bool_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, bool_type, loc);
@@ -1235,10 +1235,10 @@ static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
+ if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
+ if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc);
@@ -1293,10 +1293,10 @@ static struct hlsl_ir_expr *add_binary_comparison_expr(struct hlsl_ctx *ctx, str
common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy);
return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
+ if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
+ if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc);
@@ -1327,10 +1327,10 @@ static struct hlsl_ir_expr *add_binary_logical_expr(struct hlsl_ctx *ctx, struct
common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
+ if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
+ if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc);
@@ -1370,10 +1370,10 @@ static struct hlsl_ir_expr *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l
return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy);
integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc)))
+ if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, return_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc)))
+ if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, integer_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc);
@@ -1479,7 +1479,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in
{
writemask = (1 << lhs_type->dimx) - 1;
- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc)))
+ if (!(rhs = hlsl_add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc)))
return NULL;
}
@@ -1598,10 +1598,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs,
return;
list_add_tail(instrs, &c->node.entry);
- if (!(load = add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc)))
+ if (!(load = hlsl_add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc)))
return;
- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc)))
+ if (!(conv = hlsl_add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc)))
return;
if (!(c = hlsl_new_uint_constant(ctx, dst_reg_offset, &src->loc)))
@@ -1886,7 +1886,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx,
return arg;
type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
- return add_implicit_conversion(ctx, params->instrs, arg, type, loc);
+ return hlsl_add_implicit_conversion(ctx, params->instrs, arg, type, loc);
}
static bool intrinsic_abs(struct hlsl_ctx *ctx,
@@ -1923,10 +1923,10 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
cast_type = hlsl_get_vector_type(ctx, base, 3);
- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc)))
+ if (!(arg1_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc)))
return false;
- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc)))
+ if (!(arg2_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc)))
return false;
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc)))
@@ -2204,7 +2204,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
hlsl_fixme(ctx, loc, "Tiled resource status argument.");
/* +1 for the mipmap level */
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[0],
+ if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[0],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc)))
return false;
@@ -2248,13 +2248,13 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
/* Only HLSL_IR_LOAD can return an object. */
sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1],
+ if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1],
hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc)))
return false;
if (params->args_count == 3)
{
- if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2],
+ if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc)))
return false;
}
@@ -2329,7 +2329,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
if (params->args_count == 3 || params->args_count == 4)
{
- if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2],
+ if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc)))
return false;
}
@@ -2359,7 +2359,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
/* Only HLSL_IR_LOAD can return an object. */
sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1],
+ if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1],
hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc)))
return false;
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c
index 5fdcd4dc..6d4b550d 100644
--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@@ -273,6 +273,102 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
return false;
}
+static bool lower_numeric_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_type *src_type, *dst_type = instr->data_type;
+ struct hlsl_ir_node *src, *value;
+ struct vkd3d_string_buffer *name;
+ static unsigned int counter = 0;
+ struct hlsl_ir_load *load;
+ struct hlsl_ir_expr *expr;
+ struct hlsl_ir_var *var;
+ unsigned int dst_idx;
+ bool broadcast;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+ if (expr->op != HLSL_OP1_CAST)
+ return false;
+
+ src = expr->operands[0].node;
+ src_type = src->data_type;
+
+ if (dst_type->type > HLSL_CLASS_LAST_NUMERIC || src_type->type > HLSL_CLASS_LAST_NUMERIC)
+ return false;
+ if (dst_type->type == HLSL_CLASS_SCALAR && src_type->type == HLSL_CLASS_SCALAR)
+ return false;
+
+ broadcast = src_type->dimx == 1 && src_type->dimy == 1;
+ assert(dst_type->dimx * dst_type->dimy <= src_type->dimx * src_type->dimy || broadcast);
+ if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX && !broadcast)
+ {
+ assert(dst_type->dimx <= src_type->dimx);
+ assert(dst_type->dimy <= src_type->dimy);
+ }
+
+ name = vkd3d_string_buffer_get(&ctx->string_buffers);
+ vkd3d_string_buffer_printf(name, "<cast-%u>", counter++);
+ var = hlsl_new_synthetic_var(ctx, name->buffer, dst_type, instr->loc);
+ vkd3d_string_buffer_release(&ctx->string_buffers, name);
+ if (!var)
+ return false;
+
+ for (dst_idx = 0; dst_idx < dst_type->dimx * dst_type->dimy; ++dst_idx)
+ {
+ struct hlsl_type *src_scalar_type, *dst_scalar_type;
+ unsigned int src_idx, src_offset, dst_offset;
+ struct hlsl_ir_store *store;
+ struct hlsl_ir_constant *c;
+
+ if (broadcast)
+ {
+ src_idx = 0;
+ }
+ else
+ {
+ if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX)
+ {
+ unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx;
+
+ src_idx = y * src_type->dimx + x;
+ }
+ else
+ {
+ src_idx = dst_idx;
+ }
+ }
+
+ dst_offset = hlsl_compute_component_offset(ctx, dst_type, dst_idx, &dst_scalar_type);
+ src_offset = hlsl_compute_component_offset(ctx, src_type, src_idx, &src_scalar_type);
+
+ if (!(c = hlsl_new_uint_constant(ctx, src_offset, &src->loc)))
+ return false;
+ list_add_before(&instr->entry, &c->node.entry);
+
+ if (!(load = hlsl_add_load(ctx, &instr->entry, src, &c->node, src_scalar_type, src->loc)))
+ return false;
+
+ if (!(value = hlsl_add_implicit_conversion(ctx, &instr->entry, &load->node, dst_scalar_type, &src->loc)))
+ return false;
+
+ if (!(c = hlsl_new_uint_constant(ctx, dst_offset, &src->loc)))
+ return false;
+ list_add_before(&instr->entry, &c->node.entry);
+
+ if (!(store = hlsl_new_store(ctx, var, &c->node, value, 0, src->loc)))
+ return false;
+ list_add_before(&instr->entry, &store->node.entry);
+ }
+
+ if (!(load = hlsl_new_load(ctx, var, NULL, dst_type, instr->loc)))
+ return false;
+ list_add_before(&instr->entry, &load->node.entry);
+ hlsl_replace_node(instr, &load->node);
+
+ return true;
+}
+
enum copy_propagation_value_state
{
VALUE_STATE_NOT_WRITTEN = 0,
@@ -1904,6 +2000,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
}
transform_ir(ctx, lower_broadcasts, body, NULL);
+ transform_ir(ctx, lower_numeric_casts, body, NULL);
while (transform_ir(ctx, fold_redundant_casts, body, NULL));
do
{
diff --git a/tests/hlsl-duplicate-modifiers.shader_test b/tests/hlsl-duplicate-modifiers.shader_test
index fcae12da..6491701a 100644
--- a/tests/hlsl-duplicate-modifiers.shader_test
+++ b/tests/hlsl-duplicate-modifiers.shader_test
@@ -7,5 +7,5 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (0.1, 0.2, 0.3, 0.4)
diff --git a/tests/hlsl-initializer-matrix.shader_test b/tests/hlsl-initializer-matrix.shader_test
index ea9de9c0..7e12b0a0 100644
--- a/tests/hlsl-initializer-matrix.shader_test
+++ b/tests/hlsl-initializer-matrix.shader_test
@@ -55,7 +55,7 @@ float4 main() : SV_TARGET
}
[test]
-todo draw quad
+draw quad
probe all rgba (21, 22, 31, 32)
diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test
index bf99d9cb..4fe8e7eb 100644
--- a/tests/hlsl-return-implicit-conversion.shader_test
+++ b/tests/hlsl-return-implicit-conversion.shader_test
@@ -5,7 +5,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader]
@@ -15,7 +15,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader]
@@ -25,7 +25,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader]
@@ -35,8 +35,8 @@ float4x1 main() : sv_target
}
[test]
-todo draw quad
-probe all rgba (0.4, 0.3, 0.2, 0.1)
+draw quad
+todo probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader]
float3 func()
diff --git a/tests/hlsl-shape.shader_test b/tests/hlsl-shape.shader_test
index 57d59534..65cc322c 100644
--- a/tests/hlsl-shape.shader_test
+++ b/tests/hlsl-shape.shader_test
@@ -211,7 +211,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader]
@@ -235,7 +235,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader]
@@ -260,7 +260,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader]
@@ -309,7 +309,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader]
@@ -321,7 +321,7 @@ float4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader]
diff --git a/tests/matrix-semantics.shader_test b/tests/matrix-semantics.shader_test
index 7106eb86..acda4a16 100644
--- a/tests/matrix-semantics.shader_test
+++ b/tests/matrix-semantics.shader_test
@@ -5,8 +5,8 @@ float4x1 main() : sv_target
}
[test]
-todo draw quad
-probe all rgba (1.0, 2.0, 3.0, 4.0)
+draw quad
+todo probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader]
row_major float1x4 main() : sv_target
@@ -15,7 +15,7 @@ row_major float1x4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
[require]
@@ -28,7 +28,7 @@ row_major float4x1 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all r 1.0
[pixel shader]
@@ -38,7 +38,7 @@ float1x4 main() : sv_target
}
[test]
-todo draw quad
+draw quad
probe all r 1.0
[pixel shader]
@@ -49,7 +49,7 @@ void main(out row_major float1x4 x : sv_target0, out float1x4 y : sv_target1)
}
[test]
-todo draw quad
+draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader fail todo]
@@ -67,5 +67,5 @@ void main(out float1x4 x : sv_target0, out float1x4 y : sv_target4)
}
[test]
-todo draw quad
+draw quad
probe all r 1.0
--
2.36.0
More information about the wine-devel
mailing list