[PATCH vkd3d 3/5] vkd3d-shader: Move SM1 code generation to a separate file.

Zebediah Figura zfigura at codeweavers.com
Mon Aug 9 21:56:17 CDT 2021


Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
---
 Makefile.am                      |   1 +
 libs/vkd3d-shader/hlsl.c         |  76 +++
 libs/vkd3d-shader/hlsl.h         |  13 +
 libs/vkd3d-shader/hlsl_codegen.c | 889 +------------------------------
 libs/vkd3d-shader/hlsl_sm1.c     | 828 ++++++++++++++++++++++++++++
 5 files changed, 922 insertions(+), 885 deletions(-)
 create mode 100644 libs/vkd3d-shader/hlsl_sm1.c

diff --git a/Makefile.am b/Makefile.am
index 07d5af97..9624485b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -172,6 +172,7 @@ libvkd3d_shader_la_SOURCES = \
 	libs/vkd3d-shader/hlsl.c \
 	libs/vkd3d-shader/hlsl.h \
 	libs/vkd3d-shader/hlsl_codegen.c \
+	libs/vkd3d-shader/hlsl_sm1.c \
 	libs/vkd3d-shader/preproc.h \
 	libs/vkd3d-shader/sm4.h \
 	libs/vkd3d-shader/spirv.c \
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c
index bc593f82..d0be0750 100644
--- a/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d-shader/hlsl.c
@@ -1,4 +1,6 @@
 /*
+ * HLSL utility functions
+ *
  * Copyright 2012 Matteo Bruni for CodeWeavers
  * Copyright 2019-2020 Zebediah Figura for CodeWeavers
  *
@@ -1449,6 +1451,80 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function
     rb_put(&ctx->functions, func->name, &func->entry);
 }
 
+unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask)
+{
+    unsigned int i, ret = 0;
+
+    /* Leave replicate swizzles alone; some instructions need them. */
+    if (swizzle == HLSL_SWIZZLE(X, X, X, X)
+            || swizzle == HLSL_SWIZZLE(Y, Y, Y, Y)
+            || swizzle == HLSL_SWIZZLE(Z, Z, Z, Z)
+            || swizzle == HLSL_SWIZZLE(W, W, W, W))
+        return swizzle;
+
+    for (i = 0; i < 4; ++i)
+    {
+        if (writemask & (1 << i))
+        {
+            ret |= (swizzle & 3) << (i * 2);
+            swizzle >>= 2;
+        }
+    }
+    return ret;
+}
+
+unsigned int hlsl_swizzle_from_writemask(unsigned int writemask)
+{
+    static const unsigned int swizzles[16] =
+    {
+        0,
+        HLSL_SWIZZLE(X, X, X, X),
+        HLSL_SWIZZLE(Y, Y, Y, Y),
+        HLSL_SWIZZLE(X, Y, X, X),
+        HLSL_SWIZZLE(Z, Z, Z, Z),
+        HLSL_SWIZZLE(X, Z, X, X),
+        HLSL_SWIZZLE(Y, Z, X, X),
+        HLSL_SWIZZLE(X, Y, Z, X),
+        HLSL_SWIZZLE(W, W, W, W),
+        HLSL_SWIZZLE(X, W, X, X),
+        HLSL_SWIZZLE(Y, W, X, X),
+        HLSL_SWIZZLE(X, Y, W, X),
+        HLSL_SWIZZLE(Z, W, X, X),
+        HLSL_SWIZZLE(X, Z, W, X),
+        HLSL_SWIZZLE(Y, Z, W, X),
+        HLSL_SWIZZLE(X, Y, Z, W),
+    };
+
+    return swizzles[writemask & 0xf];
+}
+
+unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second)
+{
+    unsigned int ret = 0, i, j = 0;
+
+    for (i = 0; i < 4; ++i)
+    {
+        if (first & (1 << i))
+        {
+            if (second & (1 << j++))
+                ret |= (1 << i);
+        }
+    }
+
+    return ret;
+}
+
+unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim)
+{
+    unsigned int ret = 0, i;
+    for (i = 0; i < dim; ++i)
+    {
+        unsigned int s = (second >> (i * 2)) & 3;
+        ret |= ((first >> (s * 2)) & 3) << (i * 2);
+    }
+    return ret;
+}
+
 static const struct hlsl_profile_info *get_target_info(const char *target)
 {
     unsigned int i;
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h
index aede3a55..e566ac7d 100644
--- a/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d-shader/hlsl.h
@@ -22,6 +22,7 @@
 
 #include "vkd3d_shader_private.h"
 #include "rbtree.h"
+#include "vkd3d_d3dx9shader.h"
 
 /* The general IR structure is inspired by Mesa GLSL hir, even though the code
  * ends up being quite different in practice. Anyway, here comes the relevant
@@ -679,6 +680,18 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int
 bool hlsl_type_is_void(const struct hlsl_type *type);
 bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
 
+unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim);
+unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second);
+unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask);
+unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
+
+struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type);
+
+bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
+        bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg);
+bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx);
+int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
+
 int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl);
 
 #endif
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c
index bef9ea84..549636ca 100644
--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@@ -20,7 +20,6 @@
 
 #include "hlsl.h"
 #include <stdio.h>
-#include "vkd3d_d3dx9shader.h"
 
 /* Split uniforms into two variables representing the constant and temp
  * registers, and copy the former to the latter, so that writes to uniforms
@@ -970,114 +969,6 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
     allocate_temp_registers_recurse(ctx, entry_func->body, &liveness);
 }
 
-static bool sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output,
-        D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg)
-{
-    unsigned int i;
-
-    static const struct
-    {
-        const char *semantic;
-        bool output;
-        enum vkd3d_shader_type shader_type;
-        unsigned int major_version;
-        D3DSHADER_PARAM_REGISTER_TYPE type;
-        DWORD offset;
-    }
-    register_table[] =
-    {
-        {"color",       true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
-        {"depth",       true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
-        {"sv_depth",    true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
-        {"sv_target",   true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
-        {"color",       false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT},
-        {"texcoord",    false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE},
-
-        {"color",       true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
-        {"depth",       true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
-        {"sv_depth",    true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
-        {"sv_target",   true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
-        {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_POSITION},
-        {"vface",       false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_FACE},
-        {"vpos",        false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_POSITION},
-
-        {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT},
-        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_FOG},
-        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POSITION},
-        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
-        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POSITION},
-        {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT},
-
-        {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT},
-        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_FOG},
-        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POSITION},
-        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
-        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POSITION},
-        {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT},
-    };
-
-    for (i = 0; i < ARRAY_SIZE(register_table); ++i)
-    {
-        if (!ascii_strcasecmp(semantic->name, register_table[i].semantic)
-                && output == register_table[i].output
-                && ctx->profile->type == register_table[i].shader_type
-                && ctx->profile->major_version == register_table[i].major_version)
-        {
-            *type = register_table[i].type;
-            if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT)
-                *reg = register_table[i].offset;
-            else
-                *reg = semantic->index;
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static bool sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx)
-{
-    static const struct
-    {
-        const char *name;
-        D3DDECLUSAGE usage;
-    }
-    semantics[] =
-    {
-        {"binormal",        D3DDECLUSAGE_BINORMAL},
-        {"blendindices",    D3DDECLUSAGE_BLENDINDICES},
-        {"blendweight",     D3DDECLUSAGE_BLENDWEIGHT},
-        {"color",           D3DDECLUSAGE_COLOR},
-        {"depth",           D3DDECLUSAGE_DEPTH},
-        {"fog",             D3DDECLUSAGE_FOG},
-        {"normal",          D3DDECLUSAGE_NORMAL},
-        {"position",        D3DDECLUSAGE_POSITION},
-        {"positiont",       D3DDECLUSAGE_POSITIONT},
-        {"psize",           D3DDECLUSAGE_PSIZE},
-        {"sample",          D3DDECLUSAGE_SAMPLE},
-        {"sv_depth",        D3DDECLUSAGE_DEPTH},
-        {"sv_position",     D3DDECLUSAGE_POSITION},
-        {"sv_target",       D3DDECLUSAGE_COLOR},
-        {"tangent",         D3DDECLUSAGE_TANGENT},
-        {"tessfactor",      D3DDECLUSAGE_TESSFACTOR},
-        {"texcoord",        D3DDECLUSAGE_TEXCOORD},
-    };
-
-    unsigned int i;
-
-    for (i = 0; i < ARRAY_SIZE(semantics); ++i)
-    {
-        if (!ascii_strcasecmp(semantic->name, semantics[i].name))
-        {
-            *usage = semantics[i].usage;
-            *usage_idx = semantic->index;
-            return true;
-        }
-    }
-
-    return false;
-}
-
 static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
 {
     assert(var->semantic.name);
@@ -1088,14 +979,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
         uint32_t reg, usage_idx;
         D3DDECLUSAGE usage;
 
-        if (!sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx))
+        if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx))
         {
             hlsl_error(ctx, var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
                     "Invalid semantic '%s'.", var->semantic.name);
             return;
         }
 
-        if (sm1_register_from_semantic(ctx, &var->semantic, output, &type, &reg))
+        if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, &reg))
         {
             TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n",
                     ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL ? "Pixel" : "Vertex", output ? "output" : "input",
@@ -1213,86 +1104,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
     }
 }
 
-static unsigned int map_swizzle(unsigned int swizzle, unsigned int writemask)
-{
-    unsigned int i, ret = 0;
-
-    /* Leave replicate swizzles alone; some instructions need them. */
-    if (swizzle == HLSL_SWIZZLE(X, X, X, X)
-            || swizzle == HLSL_SWIZZLE(Y, Y, Y, Y)
-            || swizzle == HLSL_SWIZZLE(Z, Z, Z, Z)
-            || swizzle == HLSL_SWIZZLE(W, W, W, W))
-        return swizzle;
-
-    for (i = 0; i < 4; ++i)
-    {
-        if (writemask & (1 << i))
-        {
-            ret |= (swizzle & 3) << (i * 2);
-            swizzle >>= 2;
-        }
-    }
-    return ret;
-}
-
-static unsigned int swizzle_from_writemask(unsigned int writemask)
-{
-    static const unsigned int swizzles[16] =
-    {
-        0,
-        HLSL_SWIZZLE(X, X, X, X),
-        HLSL_SWIZZLE(Y, Y, Y, Y),
-        HLSL_SWIZZLE(X, Y, X, X),
-        HLSL_SWIZZLE(Z, Z, Z, Z),
-        HLSL_SWIZZLE(X, Z, X, X),
-        HLSL_SWIZZLE(Y, Z, X, X),
-        HLSL_SWIZZLE(X, Y, Z, X),
-        HLSL_SWIZZLE(W, W, W, W),
-        HLSL_SWIZZLE(X, W, X, X),
-        HLSL_SWIZZLE(Y, W, X, X),
-        HLSL_SWIZZLE(X, Y, W, X),
-        HLSL_SWIZZLE(Z, W, X, X),
-        HLSL_SWIZZLE(X, Z, W, X),
-        HLSL_SWIZZLE(Y, Z, W, X),
-        HLSL_SWIZZLE(X, Y, Z, W),
-    };
-
-    return swizzles[writemask & 0xf];
-}
-
-static unsigned int combine_writemasks(unsigned int first, unsigned int second)
-{
-    unsigned int ret = 0, i, j = 0;
-
-    for (i = 0; i < 4; ++i)
-    {
-        if (first & (1 << i))
-        {
-            if (second & (1 << j++))
-                ret |= (1 << i);
-        }
-    }
-
-    return ret;
-}
-
-static unsigned int combine_swizzles(unsigned int first, unsigned int second, unsigned int dim)
-{
-    unsigned int ret = 0, i;
-    for (i = 0; i < dim; ++i)
-    {
-        unsigned int s = (second >> (i * 2)) & 3;
-        ret |= ((first >> (s * 2)) & 3) << (i * 2);
-    }
-    return ret;
-}
-
 static bool type_is_single_reg(const struct hlsl_type *type)
 {
     return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR;
 }
 
-static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type)
+struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct hlsl_type *type)
 {
     struct hlsl_ir_node *offset_node = deref->offset.node;
     const struct hlsl_ir_var *var = deref->var;
@@ -1331,704 +1148,6 @@ static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const
     return ret;
 }
 
-static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor)
-{
-    if (type == VKD3D_SHADER_TYPE_VERTEX)
-        return D3DVS_VERSION(major, minor);
-    else
-        return D3DPS_VERSION(major, minor);
-}
-
-static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type)
-{
-    switch (type->type)
-    {
-        case HLSL_CLASS_ARRAY:
-            return sm1_class(type->e.array.type);
-        case HLSL_CLASS_MATRIX:
-            assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
-            if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
-                return D3DXPC_MATRIX_COLUMNS;
-            else
-                return D3DXPC_MATRIX_ROWS;
-        case HLSL_CLASS_OBJECT:
-            return D3DXPC_OBJECT;
-        case HLSL_CLASS_SCALAR:
-            return D3DXPC_SCALAR;
-        case HLSL_CLASS_STRUCT:
-            return D3DXPC_STRUCT;
-        case HLSL_CLASS_VECTOR:
-            return D3DXPC_VECTOR;
-        default:
-            ERR("Invalid class %#x.\n", type->type);
-            assert(0);
-            return 0;
-    }
-}
-
-static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type)
-{
-    switch (type->base_type)
-    {
-        case HLSL_TYPE_BOOL:
-            return D3DXPT_BOOL;
-        case HLSL_TYPE_FLOAT:
-        case HLSL_TYPE_HALF:
-            return D3DXPT_FLOAT;
-        case HLSL_TYPE_INT:
-        case HLSL_TYPE_UINT:
-            return D3DXPT_INT;
-        case HLSL_TYPE_PIXELSHADER:
-            return D3DXPT_PIXELSHADER;
-        case HLSL_TYPE_SAMPLER:
-            switch (type->sampler_dim)
-            {
-                case HLSL_SAMPLER_DIM_1D:
-                    return D3DXPT_SAMPLER1D;
-                case HLSL_SAMPLER_DIM_2D:
-                    return D3DXPT_SAMPLER2D;
-                case HLSL_SAMPLER_DIM_3D:
-                    return D3DXPT_SAMPLER3D;
-                case HLSL_SAMPLER_DIM_CUBE:
-                    return D3DXPT_SAMPLERCUBE;
-                case HLSL_SAMPLER_DIM_GENERIC:
-                    return D3DXPT_SAMPLER;
-                default:
-                    ERR("Invalid dimension %#x.\n", type->sampler_dim);
-            }
-            break;
-        case HLSL_TYPE_STRING:
-            return D3DXPT_STRING;
-        case HLSL_TYPE_TEXTURE:
-            switch (type->sampler_dim)
-            {
-                case HLSL_SAMPLER_DIM_1D:
-                    return D3DXPT_TEXTURE1D;
-                case HLSL_SAMPLER_DIM_2D:
-                    return D3DXPT_TEXTURE2D;
-                case HLSL_SAMPLER_DIM_3D:
-                    return D3DXPT_TEXTURE3D;
-                case HLSL_SAMPLER_DIM_CUBE:
-                    return D3DXPT_TEXTURECUBE;
-                case HLSL_SAMPLER_DIM_GENERIC:
-                    return D3DXPT_TEXTURE;
-                default:
-                    ERR("Invalid dimension %#x.\n", type->sampler_dim);
-            }
-            break;
-        case HLSL_TYPE_VERTEXSHADER:
-            return D3DXPT_VERTEXSHADER;
-        case HLSL_TYPE_VOID:
-            return D3DXPT_VOID;
-        default:
-            assert(0);
-    }
-    assert(0);
-    return 0;
-}
-
-static const struct hlsl_type *get_array_type(const struct hlsl_type *type)
-{
-    if (type->type == HLSL_CLASS_ARRAY)
-        return get_array_type(type->e.array.type);
-    return type;
-}
-
-static unsigned int get_array_size(const struct hlsl_type *type)
-{
-    if (type->type == HLSL_CLASS_ARRAY)
-        return get_array_size(type->e.array.type) * type->e.array.elements_count;
-    return 1;
-}
-
-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start)
-{
-    const struct hlsl_type *array_type = get_array_type(type);
-    unsigned int array_size = get_array_size(type);
-    struct hlsl_struct_field *field;
-    unsigned int field_count = 0;
-    size_t fields_offset = 0;
-
-    if (type->bytecode_offset)
-        return;
-
-    if (array_type->type == HLSL_CLASS_STRUCT)
-    {
-        LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
-        {
-            field->name_bytecode_offset = put_string(buffer, field->name);
-            write_sm1_type(buffer, field->type, ctab_start);
-        }
-
-        fields_offset = bytecode_get_size(buffer) - ctab_start;
-
-        LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
-        {
-            put_u32(buffer, field->name_bytecode_offset - ctab_start);
-            put_u32(buffer, field->type->bytecode_offset - ctab_start);
-            ++field_count;
-        }
-    }
-
-    type->bytecode_offset = put_u32(buffer, sm1_class(type) | (sm1_base_type(type) << 16));
-    put_u32(buffer, type->dimy | (type->dimx << 16));
-    put_u32(buffer, array_size | (field_count << 16));
-    put_u32(buffer, fields_offset);
-}
-
-static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
-{
-    struct hlsl_ir_var *var;
-
-    list_remove(&to_sort->extern_entry);
-
-    LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
-    {
-        if (strcmp(to_sort->name, var->name) < 0)
-        {
-            list_add_before(&var->extern_entry, &to_sort->extern_entry);
-            return;
-        }
-    }
-
-    list_add_tail(sorted, &to_sort->extern_entry);
-}
-
-static void sm1_sort_externs(struct hlsl_ctx *ctx)
-{
-    struct list sorted = LIST_INIT(sorted);
-    struct hlsl_ir_var *var, *next;
-
-    LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-        sm1_sort_extern(&sorted, var);
-    list_move_tail(&ctx->extern_vars, &sorted);
-}
-
-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        struct hlsl_ir_function_decl *entry_func)
-{
-    size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset;
-    unsigned int uniform_count = 0;
-    struct hlsl_ir_var *var;
-
-    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-    {
-        if (!var->semantic.name && var->reg.allocated)
-        {
-            ++uniform_count;
-
-            if (var->is_param && var->is_uniform)
-            {
-                struct vkd3d_string_buffer *name;
-
-                if (!(name = hlsl_get_string_buffer(ctx)))
-                {
-                    buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
-                    return;
-                }
-                vkd3d_string_buffer_printf(name, "$%s", var->name);
-                vkd3d_free((char *)var->name);
-                var->name = hlsl_strdup(ctx, name->buffer);
-                hlsl_release_string_buffer(ctx, name);
-            }
-        }
-    }
-
-    sm1_sort_externs(ctx);
-
-    size_offset = put_u32(buffer, 0);
-    ctab_offset = put_u32(buffer, MAKEFOURCC('C','T','A','B'));
-
-    ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE));
-    creator_offset = put_u32(buffer, 0);
-    put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
-    put_u32(buffer, uniform_count);
-    put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */
-    put_u32(buffer, 0); /* FIXME: flags */
-    put_u32(buffer, 0); /* FIXME: target string */
-
-    vars_start = bytecode_get_size(buffer);
-
-    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-    {
-        if (!var->semantic.name && var->reg.allocated)
-        {
-            put_u32(buffer, 0); /* name */
-            put_u32(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16));
-            put_u32(buffer, var->data_type->reg_size / 4);
-            put_u32(buffer, 0); /* type */
-            put_u32(buffer, 0); /* FIXME: default value */
-        }
-    }
-
-    uniform_count = 0;
-
-    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-    {
-        if (!var->semantic.name && var->reg.allocated)
-        {
-            size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
-            size_t name_offset;
-
-            name_offset = put_string(buffer, var->name);
-            set_u32(buffer, var_offset, name_offset - ctab_start);
-
-            write_sm1_type(buffer, var->data_type, ctab_start);
-            set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
-            ++uniform_count;
-        }
-    }
-
-    offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
-    set_u32(buffer, creator_offset, offset - ctab_start);
-
-    ctab_end = bytecode_get_size(buffer);
-    set_u32(buffer, size_offset, D3DSIO_COMMENT | (((ctab_end - ctab_offset) / sizeof(uint32_t)) << 16));
-}
-
-static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type)
-{
-    return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK)
-            | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2);
-}
-
-struct sm1_instruction
-{
-    D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
-
-    struct sm1_dst_register
-    {
-        D3DSHADER_PARAM_REGISTER_TYPE type;
-        D3DSHADER_PARAM_DSTMOD_TYPE mod;
-        unsigned int writemask;
-        uint32_t reg;
-    } dst;
-
-    struct sm1_src_register
-    {
-        D3DSHADER_PARAM_REGISTER_TYPE type;
-        D3DSHADER_PARAM_SRCMOD_TYPE mod;
-        unsigned int swizzle;
-        uint32_t reg;
-    } srcs[2];
-    unsigned int src_count;
-
-    unsigned int has_dst;
-};
-
-static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg)
-{
-    assert(reg->writemask);
-    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg);
-}
-
-static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer,
-        const struct sm1_src_register *reg, unsigned int dst_writemask)
-{
-    unsigned int swizzle = map_swizzle(reg->swizzle, dst_writemask);
-
-    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (swizzle << 16) | reg->reg);
-}
-
-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct sm1_instruction *instr)
-{
-    uint32_t token = instr->opcode;
-    unsigned int i;
-
-    if (ctx->profile->major_version > 1)
-        token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT;
-    put_u32(buffer, token);
-
-    if (instr->has_dst)
-        write_sm1_dst_register(buffer, &instr->dst);
-
-    for (i = 0; i < instr->src_count; ++i)
-        write_sm1_src_register(buffer, &instr->srcs[i], instr->dst.writemask);
-};
-
-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
-        const struct hlsl_reg *src1, const struct hlsl_reg *src2)
-{
-    const struct sm1_instruction instr =
-    {
-        .opcode = opcode,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.writemask = dst->writemask,
-        .dst.reg = dst->id,
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_TEMP,
-        .srcs[0].swizzle = swizzle_from_writemask(src1->writemask),
-        .srcs[0].reg = src1->id,
-        .srcs[1].type = D3DSPR_TEMP,
-        .srcs[1].swizzle = swizzle_from_writemask(src2->writemask),
-        .srcs[1].reg = src2->id,
-        .src_count = 2,
-    };
-    write_sm1_instruction(ctx, buffer, &instr);
-}
-
-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
-        const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod)
-{
-    const struct sm1_instruction instr =
-    {
-        .opcode = opcode,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.writemask = dst->writemask,
-        .dst.reg = dst->id,
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_TEMP,
-        .srcs[0].swizzle = swizzle_from_writemask(src->writemask),
-        .srcs[0].reg = src->id,
-        .srcs[0].mod = src_mod,
-        .src_count = 1,
-    };
-    write_sm1_instruction(ctx, buffer, &instr);
-}
-
-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
-{
-    unsigned int i, x;
-
-    for (i = 0; i < ctx->constant_defs.count; ++i)
-    {
-        uint32_t token = D3DSIO_DEF;
-        const struct sm1_dst_register reg =
-        {
-            .type = D3DSPR_CONST,
-            .writemask = VKD3DSP_WRITEMASK_ALL,
-            .reg = i,
-        };
-
-        if (ctx->profile->major_version > 1)
-            token |= 5 << D3DSI_INSTLENGTH_SHIFT;
-        put_u32(buffer, token);
-
-        write_sm1_dst_register(buffer, &reg);
-        for (x = 0; x < 4; ++x)
-            put_f32(buffer, ctx->constant_defs.values[i].f[x]);
-    }
-}
-
-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct hlsl_ir_var *var, bool output)
-{
-    struct sm1_dst_register reg = {0};
-    uint32_t token, usage_idx;
-    D3DDECLUSAGE usage;
-    bool ret;
-
-    if (sm1_register_from_semantic(ctx, &var->semantic, output, &reg.type, &reg.reg))
-    {
-        usage = 0;
-        usage_idx = 0;
-    }
-    else
-    {
-        ret = sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx);
-        assert(ret);
-        reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT;
-        reg.reg = var->reg.id;
-    }
-
-    token = D3DSIO_DCL;
-    if (ctx->profile->major_version > 1)
-        token |= 2 << D3DSI_INSTLENGTH_SHIFT;
-    put_u32(buffer, token);
-
-    token = (1u << 31);
-    token |= usage << D3DSP_DCL_USAGE_SHIFT;
-    token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT;
-    put_u32(buffer, token);
-
-    reg.writemask = (1 << var->data_type->dimx) - 1;
-    write_sm1_dst_register(buffer, &reg);
-}
-
-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
-{
-    bool write_in = false, write_out = false;
-    struct hlsl_ir_var *var;
-
-    if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
-        write_in = true;
-    else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3)
-        write_in = write_out = true;
-    else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3)
-        write_in = true;
-
-    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-    {
-        if (write_in && var->is_input_semantic)
-            write_sm1_semantic_dcl(ctx, buffer, var, false);
-        if (write_out && var->is_output_semantic)
-            write_sm1_semantic_dcl(ctx, buffer, var, true);
-    }
-}
-
-static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct hlsl_ir_node *instr)
-{
-    const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
-    struct sm1_instruction sm1_instr =
-    {
-        .opcode = D3DSIO_MOV,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.reg = instr->reg.id,
-        .dst.writemask = instr->reg.writemask,
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_CONST,
-        .srcs[0].reg = constant->reg.id,
-        .srcs[0].swizzle = swizzle_from_writemask(constant->reg.writemask),
-        .src_count = 1,
-    };
-
-    assert(instr->reg.allocated);
-    assert(constant->reg.allocated);
-    write_sm1_instruction(ctx, buffer, &sm1_instr);
-}
-
-static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
-{
-    struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
-    struct hlsl_ir_node *arg1 = expr->operands[0].node;
-    struct hlsl_ir_node *arg2 = expr->operands[1].node;
-    unsigned int i;
-
-    assert(instr->reg.allocated);
-
-    if (instr->data_type->base_type != HLSL_TYPE_FLOAT)
-    {
-        FIXME("Non-float operations need to be lowered.\n");
-        return;
-    }
-
-    switch (expr->op)
-    {
-        case HLSL_IR_BINOP_ADD:
-            write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg);
-            break;
-
-        case HLSL_IR_BINOP_MUL:
-            write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg);
-            break;
-
-        case HLSL_IR_BINOP_SUB:
-            write_sm1_binary_op(ctx, buffer, D3DSIO_SUB, &instr->reg, &arg1->reg, &arg2->reg);
-            break;
-
-        case HLSL_IR_UNOP_NEG:
-            write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG);
-            break;
-
-        case HLSL_IR_UNOP_RCP:
-            for (i = 0; i < instr->data_type->dimx; ++i)
-            {
-                struct hlsl_reg src = arg1->reg, dst = instr->reg;
-
-                src.writemask = combine_writemasks(src.writemask, 1u << i);
-                dst.writemask = combine_writemasks(dst.writemask, 1u << i);
-                write_sm1_unary_op(ctx, buffer, D3DSIO_RCP, &dst, &src, 0);
-            }
-            break;
-
-        default:
-            FIXME("Unhandled op %u.\n", expr->op);
-            break;
-    }
-}
-
-static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
-{
-    const struct hlsl_ir_load *load = hlsl_ir_load(instr);
-    const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type);
-    struct sm1_instruction sm1_instr =
-    {
-        .opcode = D3DSIO_MOV,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.reg = instr->reg.id,
-        .dst.writemask = instr->reg.writemask,
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_TEMP,
-        .srcs[0].reg = reg.id,
-        .srcs[0].swizzle = swizzle_from_writemask(reg.writemask),
-        .src_count = 1,
-    };
-
-    assert(instr->reg.allocated);
-
-    if (load->src.var->is_uniform)
-    {
-        assert(reg.allocated);
-        sm1_instr.srcs[0].type = D3DSPR_CONST;
-    }
-    else if (load->src.var->is_input_semantic)
-    {
-        if (!sm1_register_from_semantic(ctx, &load->src.var->semantic,
-                false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg))
-        {
-            assert(reg.allocated);
-            sm1_instr.srcs[0].type = D3DSPR_INPUT;
-            sm1_instr.srcs[0].reg = reg.id;
-        }
-        else
-            sm1_instr.srcs[0].swizzle = swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1);
-    }
-
-    write_sm1_instruction(ctx, buffer, &sm1_instr);
-}
-
-static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct hlsl_ir_node *instr)
-{
-    const struct hlsl_ir_store *store = hlsl_ir_store(instr);
-    const struct hlsl_ir_node *rhs = store->rhs.node;
-    const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type);
-    struct sm1_instruction sm1_instr =
-    {
-        .opcode = D3DSIO_MOV,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.reg = reg.id,
-        .dst.writemask = combine_writemasks(reg.writemask, store->writemask),
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_TEMP,
-        .srcs[0].reg = rhs->reg.id,
-        .srcs[0].swizzle = swizzle_from_writemask(rhs->reg.writemask),
-        .src_count = 1,
-    };
-
-    if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX)
-    {
-        FIXME("Matrix writemasks need to be lowered.\n");
-        return;
-    }
-
-    if (store->lhs.var->is_output_semantic)
-    {
-        if (!sm1_register_from_semantic(ctx, &store->lhs.var->semantic, true, &sm1_instr.dst.type, &sm1_instr.dst.reg))
-        {
-            assert(reg.allocated);
-            sm1_instr.dst.type = D3DSPR_OUTPUT;
-            sm1_instr.dst.reg = reg.id;
-        }
-        else
-            sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1;
-    }
-    else
-        assert(reg.allocated);
-
-    write_sm1_instruction(ctx, buffer, &sm1_instr);
-}
-
-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct hlsl_ir_node *instr)
-{
-    const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
-    const struct hlsl_ir_node *val = swizzle->val.node;
-    struct sm1_instruction sm1_instr =
-    {
-        .opcode = D3DSIO_MOV,
-
-        .dst.type = D3DSPR_TEMP,
-        .dst.reg = instr->reg.id,
-        .dst.writemask = instr->reg.writemask,
-        .has_dst = 1,
-
-        .srcs[0].type = D3DSPR_TEMP,
-        .srcs[0].reg = val->reg.id,
-        .srcs[0].swizzle = combine_swizzles(swizzle_from_writemask(val->reg.writemask),
-                swizzle->swizzle, instr->data_type->dimx),
-        .src_count = 1,
-    };
-
-    assert(instr->reg.allocated);
-    assert(val->reg.allocated);
-    write_sm1_instruction(ctx, buffer, &sm1_instr);
-}
-
-static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
-        const struct hlsl_ir_function_decl *entry_func)
-{
-    const struct hlsl_ir_node *instr;
-
-    LIST_FOR_EACH_ENTRY(instr, entry_func->body, struct hlsl_ir_node, entry)
-    {
-        if (instr->data_type)
-        {
-            if (instr->data_type->type == HLSL_CLASS_MATRIX)
-            {
-                FIXME("Matrix operations need to be lowered.\n");
-                break;
-            }
-
-            assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR);
-        }
-
-        switch (instr->type)
-        {
-            case HLSL_IR_CONSTANT:
-                write_sm1_constant(ctx, buffer, instr);
-                break;
-
-            case HLSL_IR_EXPR:
-                write_sm1_expr(ctx, buffer, instr);
-                break;
-
-            case HLSL_IR_LOAD:
-                write_sm1_load(ctx, buffer, instr);
-                break;
-
-            case HLSL_IR_STORE:
-                write_sm1_store(ctx, buffer, instr);
-                break;
-
-            case HLSL_IR_SWIZZLE:
-                write_sm1_swizzle(ctx, buffer, instr);
-                break;
-
-            default:
-                FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type));
-        }
-    }
-}
-
-static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
-        struct vkd3d_shader_code *out)
-{
-    struct vkd3d_bytecode_buffer buffer = {0};
-    int ret;
-
-    put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
-
-    write_sm1_uniforms(ctx, &buffer, entry_func);
-
-    write_sm1_constant_defs(ctx, &buffer);
-    write_sm1_semantic_dcls(ctx, &buffer);
-    write_sm1_instructions(ctx, &buffer, entry_func);
-
-    put_u32(&buffer, D3DSIO_END);
-
-    if (!(ret = buffer.status))
-    {
-        out->code = buffer.data;
-        out->size = buffer.size;
-    }
-    return ret;
-}
-
 int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
 {
     struct hlsl_ir_var *var;
@@ -2104,7 +1223,7 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
         return ctx->result;
 
     if (ctx->profile->major_version < 4)
-        return write_sm1_shader(ctx, entry_func, out);
+        return hlsl_sm1_write(ctx, entry_func, out);
     else
         return VKD3D_ERROR_NOT_IMPLEMENTED;
 }
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c
new file mode 100644
index 00000000..6f9df654
--- /dev/null
+++ b/libs/vkd3d-shader/hlsl_sm1.c
@@ -0,0 +1,828 @@
+/*
+ * HLSL code generation for DXBC shader models 1-3
+ *
+ * Copyright 2019-2020 Zebediah Figura for CodeWeavers
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "hlsl.h"
+#include <stdio.h>
+
+bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
+        bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg)
+{
+    unsigned int i;
+
+    static const struct
+    {
+        const char *semantic;
+        bool output;
+        enum vkd3d_shader_type shader_type;
+        unsigned int major_version;
+        D3DSHADER_PARAM_REGISTER_TYPE type;
+        DWORD offset;
+    }
+    register_table[] =
+    {
+        {"color",       true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
+        {"depth",       true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
+        {"sv_depth",    true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
+        {"sv_target",   true,  VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
+        {"color",       false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT},
+        {"texcoord",    false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE},
+
+        {"color",       true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
+        {"depth",       true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
+        {"sv_depth",    true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
+        {"sv_target",   true,  VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
+        {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_POSITION},
+        {"vface",       false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_FACE},
+        {"vpos",        false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE,    D3DSMO_POSITION},
+
+        {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT},
+        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_FOG},
+        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POSITION},
+        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
+        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT,     D3DSRO_POSITION},
+        {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT},
+
+        {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT},
+        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_FOG},
+        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POSITION},
+        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
+        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT,     D3DSRO_POSITION},
+        {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT},
+    };
+
+    for (i = 0; i < ARRAY_SIZE(register_table); ++i)
+    {
+        if (!ascii_strcasecmp(semantic->name, register_table[i].semantic)
+                && output == register_table[i].output
+                && ctx->profile->type == register_table[i].shader_type
+                && ctx->profile->major_version == register_table[i].major_version)
+        {
+            *type = register_table[i].type;
+            if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT)
+                *reg = register_table[i].offset;
+            else
+                *reg = semantic->index;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx)
+{
+    static const struct
+    {
+        const char *name;
+        D3DDECLUSAGE usage;
+    }
+    semantics[] =
+    {
+        {"binormal",        D3DDECLUSAGE_BINORMAL},
+        {"blendindices",    D3DDECLUSAGE_BLENDINDICES},
+        {"blendweight",     D3DDECLUSAGE_BLENDWEIGHT},
+        {"color",           D3DDECLUSAGE_COLOR},
+        {"depth",           D3DDECLUSAGE_DEPTH},
+        {"fog",             D3DDECLUSAGE_FOG},
+        {"normal",          D3DDECLUSAGE_NORMAL},
+        {"position",        D3DDECLUSAGE_POSITION},
+        {"positiont",       D3DDECLUSAGE_POSITIONT},
+        {"psize",           D3DDECLUSAGE_PSIZE},
+        {"sample",          D3DDECLUSAGE_SAMPLE},
+        {"sv_depth",        D3DDECLUSAGE_DEPTH},
+        {"sv_position",     D3DDECLUSAGE_POSITION},
+        {"sv_target",       D3DDECLUSAGE_COLOR},
+        {"tangent",         D3DDECLUSAGE_TANGENT},
+        {"tessfactor",      D3DDECLUSAGE_TESSFACTOR},
+        {"texcoord",        D3DDECLUSAGE_TEXCOORD},
+    };
+
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(semantics); ++i)
+    {
+        if (!ascii_strcasecmp(semantic->name, semantics[i].name))
+        {
+            *usage = semantics[i].usage;
+            *usage_idx = semantic->index;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor)
+{
+    if (type == VKD3D_SHADER_TYPE_VERTEX)
+        return D3DVS_VERSION(major, minor);
+    else
+        return D3DPS_VERSION(major, minor);
+}
+
+static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type)
+{
+    switch (type->type)
+    {
+        case HLSL_CLASS_ARRAY:
+            return sm1_class(type->e.array.type);
+        case HLSL_CLASS_MATRIX:
+            assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
+            if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
+                return D3DXPC_MATRIX_COLUMNS;
+            else
+                return D3DXPC_MATRIX_ROWS;
+        case HLSL_CLASS_OBJECT:
+            return D3DXPC_OBJECT;
+        case HLSL_CLASS_SCALAR:
+            return D3DXPC_SCALAR;
+        case HLSL_CLASS_STRUCT:
+            return D3DXPC_STRUCT;
+        case HLSL_CLASS_VECTOR:
+            return D3DXPC_VECTOR;
+        default:
+            ERR("Invalid class %#x.\n", type->type);
+            assert(0);
+            return 0;
+    }
+}
+
+static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type)
+{
+    switch (type->base_type)
+    {
+        case HLSL_TYPE_BOOL:
+            return D3DXPT_BOOL;
+        case HLSL_TYPE_FLOAT:
+        case HLSL_TYPE_HALF:
+            return D3DXPT_FLOAT;
+        case HLSL_TYPE_INT:
+        case HLSL_TYPE_UINT:
+            return D3DXPT_INT;
+        case HLSL_TYPE_PIXELSHADER:
+            return D3DXPT_PIXELSHADER;
+        case HLSL_TYPE_SAMPLER:
+            switch (type->sampler_dim)
+            {
+                case HLSL_SAMPLER_DIM_1D:
+                    return D3DXPT_SAMPLER1D;
+                case HLSL_SAMPLER_DIM_2D:
+                    return D3DXPT_SAMPLER2D;
+                case HLSL_SAMPLER_DIM_3D:
+                    return D3DXPT_SAMPLER3D;
+                case HLSL_SAMPLER_DIM_CUBE:
+                    return D3DXPT_SAMPLERCUBE;
+                case HLSL_SAMPLER_DIM_GENERIC:
+                    return D3DXPT_SAMPLER;
+                default:
+                    ERR("Invalid dimension %#x.\n", type->sampler_dim);
+            }
+            break;
+        case HLSL_TYPE_STRING:
+            return D3DXPT_STRING;
+        case HLSL_TYPE_TEXTURE:
+            switch (type->sampler_dim)
+            {
+                case HLSL_SAMPLER_DIM_1D:
+                    return D3DXPT_TEXTURE1D;
+                case HLSL_SAMPLER_DIM_2D:
+                    return D3DXPT_TEXTURE2D;
+                case HLSL_SAMPLER_DIM_3D:
+                    return D3DXPT_TEXTURE3D;
+                case HLSL_SAMPLER_DIM_CUBE:
+                    return D3DXPT_TEXTURECUBE;
+                case HLSL_SAMPLER_DIM_GENERIC:
+                    return D3DXPT_TEXTURE;
+                default:
+                    ERR("Invalid dimension %#x.\n", type->sampler_dim);
+            }
+            break;
+        case HLSL_TYPE_VERTEXSHADER:
+            return D3DXPT_VERTEXSHADER;
+        case HLSL_TYPE_VOID:
+            return D3DXPT_VOID;
+        default:
+            assert(0);
+    }
+    assert(0);
+    return 0;
+}
+
+static const struct hlsl_type *get_array_type(const struct hlsl_type *type)
+{
+    if (type->type == HLSL_CLASS_ARRAY)
+        return get_array_type(type->e.array.type);
+    return type;
+}
+
+static unsigned int get_array_size(const struct hlsl_type *type)
+{
+    if (type->type == HLSL_CLASS_ARRAY)
+        return get_array_size(type->e.array.type) * type->e.array.elements_count;
+    return 1;
+}
+
+static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start)
+{
+    const struct hlsl_type *array_type = get_array_type(type);
+    unsigned int array_size = get_array_size(type);
+    struct hlsl_struct_field *field;
+    unsigned int field_count = 0;
+    size_t fields_offset = 0;
+
+    if (type->bytecode_offset)
+        return;
+
+    if (array_type->type == HLSL_CLASS_STRUCT)
+    {
+        LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
+        {
+            field->name_bytecode_offset = put_string(buffer, field->name);
+            write_sm1_type(buffer, field->type, ctab_start);
+        }
+
+        fields_offset = bytecode_get_size(buffer) - ctab_start;
+
+        LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
+        {
+            put_u32(buffer, field->name_bytecode_offset - ctab_start);
+            put_u32(buffer, field->type->bytecode_offset - ctab_start);
+            ++field_count;
+        }
+    }
+
+    type->bytecode_offset = put_u32(buffer, sm1_class(type) | (sm1_base_type(type) << 16));
+    put_u32(buffer, type->dimy | (type->dimx << 16));
+    put_u32(buffer, array_size | (field_count << 16));
+    put_u32(buffer, fields_offset);
+}
+
+static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
+{
+    struct hlsl_ir_var *var;
+
+    list_remove(&to_sort->extern_entry);
+
+    LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
+    {
+        if (strcmp(to_sort->name, var->name) < 0)
+        {
+            list_add_before(&var->extern_entry, &to_sort->extern_entry);
+            return;
+        }
+    }
+
+    list_add_tail(sorted, &to_sort->extern_entry);
+}
+
+static void sm1_sort_externs(struct hlsl_ctx *ctx)
+{
+    struct list sorted = LIST_INIT(sorted);
+    struct hlsl_ir_var *var, *next;
+
+    LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+        sm1_sort_extern(&sorted, var);
+    list_move_tail(&ctx->extern_vars, &sorted);
+}
+
+static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        struct hlsl_ir_function_decl *entry_func)
+{
+    size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset;
+    unsigned int uniform_count = 0;
+    struct hlsl_ir_var *var;
+
+    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+    {
+        if (!var->semantic.name && var->reg.allocated)
+        {
+            ++uniform_count;
+
+            if (var->is_param && var->is_uniform)
+            {
+                struct vkd3d_string_buffer *name;
+
+                if (!(name = hlsl_get_string_buffer(ctx)))
+                {
+                    buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
+                    return;
+                }
+                vkd3d_string_buffer_printf(name, "$%s", var->name);
+                vkd3d_free((char *)var->name);
+                var->name = hlsl_strdup(ctx, name->buffer);
+                hlsl_release_string_buffer(ctx, name);
+            }
+        }
+    }
+
+    sm1_sort_externs(ctx);
+
+    size_offset = put_u32(buffer, 0);
+    ctab_offset = put_u32(buffer, MAKEFOURCC('C','T','A','B'));
+
+    ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE));
+    creator_offset = put_u32(buffer, 0);
+    put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
+    put_u32(buffer, uniform_count);
+    put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */
+    put_u32(buffer, 0); /* FIXME: flags */
+    put_u32(buffer, 0); /* FIXME: target string */
+
+    vars_start = bytecode_get_size(buffer);
+
+    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+    {
+        if (!var->semantic.name && var->reg.allocated)
+        {
+            put_u32(buffer, 0); /* name */
+            put_u32(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16));
+            put_u32(buffer, var->data_type->reg_size / 4);
+            put_u32(buffer, 0); /* type */
+            put_u32(buffer, 0); /* FIXME: default value */
+        }
+    }
+
+    uniform_count = 0;
+
+    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+    {
+        if (!var->semantic.name && var->reg.allocated)
+        {
+            size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
+            size_t name_offset;
+
+            name_offset = put_string(buffer, var->name);
+            set_u32(buffer, var_offset, name_offset - ctab_start);
+
+            write_sm1_type(buffer, var->data_type, ctab_start);
+            set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
+            ++uniform_count;
+        }
+    }
+
+    offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
+    set_u32(buffer, creator_offset, offset - ctab_start);
+
+    ctab_end = bytecode_get_size(buffer);
+    set_u32(buffer, size_offset, D3DSIO_COMMENT | (((ctab_end - ctab_offset) / sizeof(uint32_t)) << 16));
+}
+
+static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type)
+{
+    return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK)
+            | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2);
+}
+
+struct sm1_instruction
+{
+    D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
+
+    struct sm1_dst_register
+    {
+        D3DSHADER_PARAM_REGISTER_TYPE type;
+        D3DSHADER_PARAM_DSTMOD_TYPE mod;
+        unsigned int writemask;
+        uint32_t reg;
+    } dst;
+
+    struct sm1_src_register
+    {
+        D3DSHADER_PARAM_REGISTER_TYPE type;
+        D3DSHADER_PARAM_SRCMOD_TYPE mod;
+        unsigned int swizzle;
+        uint32_t reg;
+    } srcs[2];
+    unsigned int src_count;
+
+    unsigned int has_dst;
+};
+
+static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg)
+{
+    assert(reg->writemask);
+    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg);
+}
+
+static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer,
+        const struct sm1_src_register *reg, unsigned int dst_writemask)
+{
+    unsigned int swizzle = hlsl_map_swizzle(reg->swizzle, dst_writemask);
+
+    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (swizzle << 16) | reg->reg);
+}
+
+static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct sm1_instruction *instr)
+{
+    uint32_t token = instr->opcode;
+    unsigned int i;
+
+    if (ctx->profile->major_version > 1)
+        token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT;
+    put_u32(buffer, token);
+
+    if (instr->has_dst)
+        write_sm1_dst_register(buffer, &instr->dst);
+
+    for (i = 0; i < instr->src_count; ++i)
+        write_sm1_src_register(buffer, &instr->srcs[i], instr->dst.writemask);
+};
+
+static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
+        const struct hlsl_reg *src1, const struct hlsl_reg *src2)
+{
+    const struct sm1_instruction instr =
+    {
+        .opcode = opcode,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.writemask = dst->writemask,
+        .dst.reg = dst->id,
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_TEMP,
+        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
+        .srcs[0].reg = src1->id,
+        .srcs[1].type = D3DSPR_TEMP,
+        .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
+        .srcs[1].reg = src2->id,
+        .src_count = 2,
+    };
+    write_sm1_instruction(ctx, buffer, &instr);
+}
+
+static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
+        const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod)
+{
+    const struct sm1_instruction instr =
+    {
+        .opcode = opcode,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.writemask = dst->writemask,
+        .dst.reg = dst->id,
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_TEMP,
+        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
+        .srcs[0].reg = src->id,
+        .srcs[0].mod = src_mod,
+        .src_count = 1,
+    };
+    write_sm1_instruction(ctx, buffer, &instr);
+}
+
+static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
+{
+    unsigned int i, x;
+
+    for (i = 0; i < ctx->constant_defs.count; ++i)
+    {
+        uint32_t token = D3DSIO_DEF;
+        const struct sm1_dst_register reg =
+        {
+            .type = D3DSPR_CONST,
+            .writemask = VKD3DSP_WRITEMASK_ALL,
+            .reg = i,
+        };
+
+        if (ctx->profile->major_version > 1)
+            token |= 5 << D3DSI_INSTLENGTH_SHIFT;
+        put_u32(buffer, token);
+
+        write_sm1_dst_register(buffer, &reg);
+        for (x = 0; x < 4; ++x)
+            put_f32(buffer, ctx->constant_defs.values[i].f[x]);
+    }
+}
+
+static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct hlsl_ir_var *var, bool output)
+{
+    struct sm1_dst_register reg = {0};
+    uint32_t token, usage_idx;
+    D3DDECLUSAGE usage;
+    bool ret;
+
+    if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &reg.type, &reg.reg))
+    {
+        usage = 0;
+        usage_idx = 0;
+    }
+    else
+    {
+        ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx);
+        assert(ret);
+        reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT;
+        reg.reg = var->reg.id;
+    }
+
+    token = D3DSIO_DCL;
+    if (ctx->profile->major_version > 1)
+        token |= 2 << D3DSI_INSTLENGTH_SHIFT;
+    put_u32(buffer, token);
+
+    token = (1u << 31);
+    token |= usage << D3DSP_DCL_USAGE_SHIFT;
+    token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT;
+    put_u32(buffer, token);
+
+    reg.writemask = (1 << var->data_type->dimx) - 1;
+    write_sm1_dst_register(buffer, &reg);
+}
+
+static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
+{
+    bool write_in = false, write_out = false;
+    struct hlsl_ir_var *var;
+
+    if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+        write_in = true;
+    else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3)
+        write_in = write_out = true;
+    else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3)
+        write_in = true;
+
+    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+    {
+        if (write_in && var->is_input_semantic)
+            write_sm1_semantic_dcl(ctx, buffer, var, false);
+        if (write_out && var->is_output_semantic)
+            write_sm1_semantic_dcl(ctx, buffer, var, true);
+    }
+}
+
+static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct hlsl_ir_node *instr)
+{
+    const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
+    struct sm1_instruction sm1_instr =
+    {
+        .opcode = D3DSIO_MOV,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.reg = instr->reg.id,
+        .dst.writemask = instr->reg.writemask,
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_CONST,
+        .srcs[0].reg = constant->reg.id,
+        .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask),
+        .src_count = 1,
+    };
+
+    assert(instr->reg.allocated);
+    assert(constant->reg.allocated);
+    write_sm1_instruction(ctx, buffer, &sm1_instr);
+}
+
+static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
+{
+    struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
+    struct hlsl_ir_node *arg1 = expr->operands[0].node;
+    struct hlsl_ir_node *arg2 = expr->operands[1].node;
+    unsigned int i;
+
+    assert(instr->reg.allocated);
+
+    if (instr->data_type->base_type != HLSL_TYPE_FLOAT)
+    {
+        FIXME("Non-float operations need to be lowered.\n");
+        return;
+    }
+
+    switch (expr->op)
+    {
+        case HLSL_IR_BINOP_ADD:
+            write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg);
+            break;
+
+        case HLSL_IR_BINOP_MUL:
+            write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg);
+            break;
+
+        case HLSL_IR_BINOP_SUB:
+            write_sm1_binary_op(ctx, buffer, D3DSIO_SUB, &instr->reg, &arg1->reg, &arg2->reg);
+            break;
+
+        case HLSL_IR_UNOP_NEG:
+            write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG);
+            break;
+
+        case HLSL_IR_UNOP_RCP:
+            for (i = 0; i < instr->data_type->dimx; ++i)
+            {
+                struct hlsl_reg src = arg1->reg, dst = instr->reg;
+
+                src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i);
+                dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i);
+                write_sm1_unary_op(ctx, buffer, D3DSIO_RCP, &dst, &src, 0);
+            }
+            break;
+
+        default:
+            FIXME("Unhandled op %u.\n", expr->op);
+            break;
+    }
+}
+
+static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
+{
+    const struct hlsl_ir_load *load = hlsl_ir_load(instr);
+    const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type);
+    struct sm1_instruction sm1_instr =
+    {
+        .opcode = D3DSIO_MOV,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.reg = instr->reg.id,
+        .dst.writemask = instr->reg.writemask,
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_TEMP,
+        .srcs[0].reg = reg.id,
+        .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask),
+        .src_count = 1,
+    };
+
+    assert(instr->reg.allocated);
+
+    if (load->src.var->is_uniform)
+    {
+        assert(reg.allocated);
+        sm1_instr.srcs[0].type = D3DSPR_CONST;
+    }
+    else if (load->src.var->is_input_semantic)
+    {
+        if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic,
+                false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg))
+        {
+            assert(reg.allocated);
+            sm1_instr.srcs[0].type = D3DSPR_INPUT;
+            sm1_instr.srcs[0].reg = reg.id;
+        }
+        else
+            sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1);
+    }
+
+    write_sm1_instruction(ctx, buffer, &sm1_instr);
+}
+
+static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct hlsl_ir_node *instr)
+{
+    const struct hlsl_ir_store *store = hlsl_ir_store(instr);
+    const struct hlsl_ir_node *rhs = store->rhs.node;
+    const struct hlsl_reg reg = hlsl_reg_from_deref(&store->lhs, rhs->data_type);
+    struct sm1_instruction sm1_instr =
+    {
+        .opcode = D3DSIO_MOV,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.reg = reg.id,
+        .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask),
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_TEMP,
+        .srcs[0].reg = rhs->reg.id,
+        .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask),
+        .src_count = 1,
+    };
+
+    if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX)
+    {
+        FIXME("Matrix writemasks need to be lowered.\n");
+        return;
+    }
+
+    if (store->lhs.var->is_output_semantic)
+    {
+        if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic,
+                true, &sm1_instr.dst.type, &sm1_instr.dst.reg))
+        {
+            assert(reg.allocated);
+            sm1_instr.dst.type = D3DSPR_OUTPUT;
+            sm1_instr.dst.reg = reg.id;
+        }
+        else
+            sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1;
+    }
+    else
+        assert(reg.allocated);
+
+    write_sm1_instruction(ctx, buffer, &sm1_instr);
+}
+
+static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct hlsl_ir_node *instr)
+{
+    const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
+    const struct hlsl_ir_node *val = swizzle->val.node;
+    struct sm1_instruction sm1_instr =
+    {
+        .opcode = D3DSIO_MOV,
+
+        .dst.type = D3DSPR_TEMP,
+        .dst.reg = instr->reg.id,
+        .dst.writemask = instr->reg.writemask,
+        .has_dst = 1,
+
+        .srcs[0].type = D3DSPR_TEMP,
+        .srcs[0].reg = val->reg.id,
+        .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask),
+                swizzle->swizzle, instr->data_type->dimx),
+        .src_count = 1,
+    };
+
+    assert(instr->reg.allocated);
+    assert(val->reg.allocated);
+    write_sm1_instruction(ctx, buffer, &sm1_instr);
+}
+
+static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
+        const struct hlsl_ir_function_decl *entry_func)
+{
+    const struct hlsl_ir_node *instr;
+
+    LIST_FOR_EACH_ENTRY(instr, entry_func->body, struct hlsl_ir_node, entry)
+    {
+        if (instr->data_type)
+        {
+            if (instr->data_type->type == HLSL_CLASS_MATRIX)
+            {
+                FIXME("Matrix operations need to be lowered.\n");
+                break;
+            }
+
+            assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR);
+        }
+
+        switch (instr->type)
+        {
+            case HLSL_IR_CONSTANT:
+                write_sm1_constant(ctx, buffer, instr);
+                break;
+
+            case HLSL_IR_EXPR:
+                write_sm1_expr(ctx, buffer, instr);
+                break;
+
+            case HLSL_IR_LOAD:
+                write_sm1_load(ctx, buffer, instr);
+                break;
+
+            case HLSL_IR_STORE:
+                write_sm1_store(ctx, buffer, instr);
+                break;
+
+            case HLSL_IR_SWIZZLE:
+                write_sm1_swizzle(ctx, buffer, instr);
+                break;
+
+            default:
+                FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type));
+        }
+    }
+}
+
+int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
+{
+    struct vkd3d_bytecode_buffer buffer = {0};
+    int ret;
+
+    put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
+
+    write_sm1_uniforms(ctx, &buffer, entry_func);
+
+    write_sm1_constant_defs(ctx, &buffer);
+    write_sm1_semantic_dcls(ctx, &buffer);
+    write_sm1_instructions(ctx, &buffer, entry_func);
+
+    put_u32(&buffer, D3DSIO_END);
+
+    if (!(ret = buffer.status))
+    {
+        out->code = buffer.data;
+        out->size = buffer.size;
+    }
+    return ret;
+}
-- 
2.32.0




More information about the wine-devel mailing list