From 63f1663fa959d8809d1762d99e9364565ba9b3d8 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Mon, 18 Oct 2021 20:24:15 -0300
Subject: [PATCH] Fix shader 8-bit and 16-bit STS/STG (#2741)

* Fix 8 and 16-bit STG

* Fix 8 and 16-bit STS

* Shader cache version bump
---
 Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs    |  2 +-
 .../CodeGen/Glsl/Declarations.cs              | 10 +++
 .../AtomicMinMaxS32Shared.glsl                |  4 +-
 .../AtomicMinMaxS32Storage.glsl               |  4 +-
 .../HelperFunctions/HelperFunctionNames.cs    |  5 ++
 .../HelperFunctions/StoreSharedSmallInt.glsl  | 23 +++++++
 .../HelperFunctions/StoreStorageSmallInt.glsl | 23 +++++++
 .../CodeGen/Glsl/Instructions/InstGen.cs      | 12 ++++
 .../Glsl/Instructions/InstGenHelper.cs        |  4 ++
 .../Glsl/Instructions/InstGenMemory.cs        | 64 +++++++++++++++++++
 .../Instructions/InstEmitMemory.cs            | 52 +++++++++------
 .../IntermediateRepresentation/Instruction.cs |  6 ++
 .../Ryujinx.Graphics.Shader.csproj            |  2 +
 .../StructuredIr/HelperFunctionsMask.cs       |  4 +-
 .../StructuredIr/InstructionInfo.cs           |  4 ++
 .../StructuredIr/StructuredProgram.cs         |  8 +++
 .../Translation/EmitterContextInsts.cs        | 20 ++++++
 .../Translation/GlobalMemory.cs               |  4 +-
 .../Optimizations/GlobalToStorage.cs          | 26 ++++++--
 .../Translation/Rewriter.cs                   | 27 ++++++--
 20 files changed, 266 insertions(+), 38 deletions(-)
 create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl
 create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl

diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index c33b35f643..619a8c5f4f 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Version of the codegen (to be changed when codegen or guest format change).
         /// </summary>
-        private const ulong ShaderCodeGenVersion = 2750;
+        private const ulong ShaderCodeGenVersion = 2741;
 
         // Progress reporting helpers
         private volatile int _shaderCount;
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index 3e2e51b02a..85288afac3 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -270,6 +270,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
             }
 
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl");
+            }
+
             if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
             {
                 AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl
index 9f8c641dff..82b76bccf2 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl
@@ -5,7 +5,7 @@ int Helper_AtomicMaxS32(int offset, int value)
     {
         oldValue = $SHARED_MEM$[offset];
         newValue = uint(max(int(oldValue), value));
-    } while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
+    } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
     return int(oldValue);
 }
 
@@ -16,6 +16,6 @@ int Helper_AtomicMinS32(int offset, int value)
     {
         oldValue = $SHARED_MEM$[offset];
         newValue = uint(min(int(oldValue), value));
-    } while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
+    } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
     return int(oldValue);
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
index fc3af6a73e..0862a71bf6 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
@@ -5,7 +5,7 @@ int Helper_AtomicMaxS32(int index, int offset, int value)
     {
         oldValue = $STORAGE_MEM$[index].data[offset];
         newValue = uint(max(int(oldValue), value));
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
+    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
     return int(oldValue);
 }
 
@@ -16,6 +16,6 @@ int Helper_AtomicMinS32(int index, int offset, int value)
     {
         oldValue = $STORAGE_MEM$[index].data[offset];
         newValue = uint(min(int(oldValue), value));
-    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
+    } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
     return int(oldValue);
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
index 1ff127bb38..54f35b15aa 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
@@ -13,5 +13,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
         public static string ShuffleUp   = "Helper_ShuffleUp";
         public static string ShuffleXor  = "Helper_ShuffleXor";
         public static string SwizzleAdd  = "Helper_SwizzleAdd";
+
+        public static string StoreShared16  = "Helper_StoreShared16";
+        public static string StoreShared8   = "Helper_StoreShared8";
+        public static string StoreStorage16 = "Helper_StoreStorage16";
+        public static string StoreStorage8  = "Helper_StoreStorage8";
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl
new file mode 100644
index 0000000000..2f57b5ff64
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl
@@ -0,0 +1,23 @@
+void Helper_StoreShared16(int offset, uint value)
+{
+    int wordOffset = offset >> 2;
+    int bitOffset = (offset & 3) * 8;
+    uint oldValue, newValue;
+    do
+    {
+        oldValue = $SHARED_MEM$[wordOffset];
+        newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
+    } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
+}
+
+void Helper_StoreShared8(int offset, uint value)
+{
+    int wordOffset = offset >> 2;
+    int bitOffset = (offset & 3) * 8;
+    uint oldValue, newValue;
+    do
+    {
+        oldValue = $SHARED_MEM$[wordOffset];
+        newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
+    } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
new file mode 100644
index 0000000000..f2253a7969
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
@@ -0,0 +1,23 @@
+void Helper_StoreStorage16(int index, int offset, uint value)
+{
+    int wordOffset = offset >> 2;
+    int bitOffset = (offset & 3) * 8;
+    uint oldValue, newValue;
+    do
+    {
+        oldValue = $STORAGE_MEM$[index].data[wordOffset];
+        newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
+    } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
+}
+
+void Helper_StoreStorage8(int index, int offset, uint value)
+{
+    int wordOffset = offset >> 2;
+    int bitOffset = (offset & 3) * 8;
+    uint oldValue, newValue;
+    do
+    {
+        oldValue = $STORAGE_MEM$[index].data[wordOffset];
+        newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
+    } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
index 4c2582f0ef..4f0591499a 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
@@ -185,9 +185,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                     case Instruction.StoreShared:
                         return StoreShared(context, operation);
 
+                    case Instruction.StoreShared16:
+                        return StoreShared16(context, operation);
+
+                    case Instruction.StoreShared8:
+                        return StoreShared8(context, operation);
+
                     case Instruction.StoreStorage:
                         return StoreStorage(context, operation);
 
+                    case Instruction.StoreStorage16:
+                        return StoreStorage16(context, operation);
+
+                    case Instruction.StoreStorage8:
+                        return StoreStorage8(context, operation);
+
                     case Instruction.TextureSample:
                         return TextureSample(context, operation);
 
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
index 022cd746fa..c2d59ff722 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
@@ -113,7 +113,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             Add(Instruction.StoreAttribute,           InstType.Special);
             Add(Instruction.StoreLocal,               InstType.Special);
             Add(Instruction.StoreShared,              InstType.Special);
+            Add(Instruction.StoreShared16,            InstType.Special);
+            Add(Instruction.StoreShared8,             InstType.Special);
             Add(Instruction.StoreStorage,             InstType.Special);
+            Add(Instruction.StoreStorage16,           InstType.Special);
+            Add(Instruction.StoreStorage8,            InstType.Special);
             Add(Instruction.Subtract,                 InstType.OpBinary,       "-",               2);
             Add(Instruction.SwizzleAdd,               InstType.CallTernary,    HelperFunctionNames.SwizzleAdd);
             Add(Instruction.TextureSample,            InstType.Special);
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
index 5e46bb46cb..f976ec5ecf 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@@ -365,6 +365,34 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return $"{arrayName}[{offsetExpr}] = {src}";
         }
 
+        public static string StoreShared16(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src1 = operation.GetSource(0);
+            IAstNode src2 = operation.GetSource(1);
+
+            string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+            VariableType srcType = OperandManager.GetNodeDestType(context, src2);
+
+            string src = TypeConversion.ReinterpretCast(context, src2, srcType, VariableType.U32);
+
+            return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})";
+        }
+
+        public static string StoreShared8(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src1 = operation.GetSource(0);
+            IAstNode src2 = operation.GetSource(1);
+
+            string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+            VariableType srcType = OperandManager.GetNodeDestType(context, src2);
+
+            string src = TypeConversion.ReinterpretCast(context, src2, srcType, VariableType.U32);
+
+            return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
+        }
+
         public static string StoreStorage(CodeGenContext context, AstOperation operation)
         {
             IAstNode src1 = operation.GetSource(0);
@@ -383,6 +411,42 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             return $"{sb} = {src}";
         }
 
+        public static string StoreStorage16(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src1 = operation.GetSource(0);
+            IAstNode src2 = operation.GetSource(1);
+            IAstNode src3 = operation.GetSource(2);
+
+            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+            VariableType srcType = OperandManager.GetNodeDestType(context, src3);
+
+            string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32);
+
+            string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+
+            return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})";
+        }
+
+        public static string StoreStorage8(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src1 = operation.GetSource(0);
+            IAstNode src2 = operation.GetSource(1);
+            IAstNode src3 = operation.GetSource(2);
+
+            string indexExpr  = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+            string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+            VariableType srcType = OperandManager.GetNodeDestType(context, src3);
+
+            string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32);
+
+            string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+
+            return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})";
+        }
+
         public static string TextureSample(CodeGenContext context, AstOperation operation)
         {
             AstTextureOperation texOp = (AstTextureOperation)operation;
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
index 78f41e8e4e..ceb76de162 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
@@ -366,23 +366,33 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
                 Operand elemOffset = context.IAdd(wordOffset, Const(index));
 
-                if (isSmallInt)
+                if (isSmallInt && region == MemoryRegion.Local)
                 {
-                    Operand word = null;
-
-                    switch (region)
-                    {
-                        case MemoryRegion.Local: word = context.LoadLocal(elemOffset); break;
-                        case MemoryRegion.Shared: word = context.LoadShared(elemOffset); break;
-                    }
+                    Operand word = context.LoadLocal(elemOffset);
 
                     value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
                 }
 
-                switch (region)
+                if (region == MemoryRegion.Local)
                 {
-                    case MemoryRegion.Local: context.StoreLocal(elemOffset, value); break;
-                    case MemoryRegion.Shared: context.StoreShared(elemOffset, value); break;
+                    context.StoreLocal(elemOffset, value);
+                }
+                else if (region == MemoryRegion.Shared)
+                {
+                    switch (size)
+                    {
+                        case LsSize2.U8:
+                        case LsSize2.S8:
+                            context.StoreShared8(baseOffset, value);
+                            break;
+                        case LsSize2.U16:
+                        case LsSize2.S16:
+                            context.StoreShared16(baseOffset, value);
+                            break;
+                        default:
+                            context.StoreShared(elemOffset, value);
+                            break;
+                    }
                 }
             }
         }
@@ -401,8 +411,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return;
             }
 
-            bool isSmallInt = size < LsSize2.B32;
-
             int count = GetVectorCount((LsSize)size);
 
             (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
@@ -415,14 +423,20 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
                 Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
 
-                if (isSmallInt)
+                Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
+
+                if (size == LsSize2.U8 || size == LsSize2.S8)
                 {
-                    Operand word = context.LoadGlobal(addrLow, addrHigh);
-
-                    value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
+                    context.StoreGlobal8(addrLowOffset, addrHigh, value);
+                }
+                else if (size == LsSize2.U16 || size == LsSize2.S16)
+                {
+                    context.StoreGlobal16(addrLowOffset, addrHigh, value);
+                }
+                else
+                {
+                    context.StoreGlobal(addrLowOffset, addrHigh, value);
                 }
-
-                context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
             }
         }
 
diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
index 03badec956..e04e61a7e8 100644
--- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
+++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
@@ -111,9 +111,15 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
         SquareRoot,
         StoreAttribute,
         StoreGlobal,
+        StoreGlobal16,
+        StoreGlobal8,
         StoreLocal,
         StoreShared,
+        StoreShared16,
+        StoreShared8,
         StoreStorage,
+        StoreStorage16,
+        StoreStorage8,
         Subtract,
         SwizzleAdd,
         TextureSample,
diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
index 2fa70c265b..d59efc2ef2 100644
--- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
+++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
@@ -17,6 +17,8 @@
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
+    <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" />
+    <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreStorageSmallInt.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_fp.glsl" />
     <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_cp.glsl" />
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
index af462a7f10..3dfd025b5b 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -13,6 +13,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
         ShuffleDown            = 1 << 5,
         ShuffleUp              = 1 << 6,
         ShuffleXor             = 1 << 7,
-        SwizzleAdd             = 1 << 8
+        StoreSharedSmallInt    = 1 << 8,
+        StoreStorageSmallInt   = 1 << 9,
+        SwizzleAdd             = 1 << 10
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
index 79588778c4..c647f450e2 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
@@ -120,7 +120,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
             Add(Instruction.StoreGlobal,              VariableType.None,   VariableType.S32,    VariableType.S32,    VariableType.U32);
             Add(Instruction.StoreLocal,               VariableType.None,   VariableType.S32,    VariableType.U32);
             Add(Instruction.StoreShared,              VariableType.None,   VariableType.S32,    VariableType.U32);
+            Add(Instruction.StoreShared16,            VariableType.None,   VariableType.S32,    VariableType.U32);
+            Add(Instruction.StoreShared8,             VariableType.None,   VariableType.S32,    VariableType.U32);
             Add(Instruction.StoreStorage,             VariableType.None,   VariableType.S32,    VariableType.S32,    VariableType.U32);
+            Add(Instruction.StoreStorage16,           VariableType.None,   VariableType.S32,    VariableType.S32,    VariableType.U32);
+            Add(Instruction.StoreStorage8,            VariableType.None,   VariableType.S32,    VariableType.S32,    VariableType.U32);
             Add(Instruction.Subtract,                 VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
             Add(Instruction.SwizzleAdd,               VariableType.F32,    VariableType.F32,    VariableType.F32,    VariableType.S32);
             Add(Instruction.TextureSample,            VariableType.F32);
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 4acfa80a65..61cc167a7d 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -203,6 +203,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
                 case Instruction.ShuffleXor:
                     context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
                     break;
+                case Instruction.StoreShared16:
+                case Instruction.StoreShared8:
+                    context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt;
+                    break;
+                case Instruction.StoreStorage16:
+                case Instruction.StoreStorage8:
+                    context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt;
+                    break;
                 case Instruction.SwizzleAdd:
                     context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
                     break;
diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
index 113ece99c7..a8fef95b3b 100644
--- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
+++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -627,6 +627,16 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.StoreGlobal, null, a, b, c);
         }
 
+        public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
+        {
+            return context.Add(Instruction.StoreGlobal16, null, a, b, c);
+        }
+
+        public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
+        {
+            return context.Add(Instruction.StoreGlobal8, null, a, b, c);
+        }
+
         public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
         {
             return context.Add(Instruction.StoreLocal, null, a, b);
@@ -637,6 +647,16 @@ namespace Ryujinx.Graphics.Shader.Translation
             return context.Add(Instruction.StoreShared, null, a, b);
         }
 
+        public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
+        {
+            return context.Add(Instruction.StoreShared16, null, a, b);
+        }
+
+        public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
+        {
+            return context.Add(Instruction.StoreShared8, null, a, b);
+        }
+
         public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
         {
             return UnpackDouble2x32(context, a, 1);
diff --git a/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
index 75bd9ddff6..1be6386842 100644
--- a/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
+++ b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
@@ -20,7 +20,9 @@ namespace Ryujinx.Graphics.Shader.Translation
         {
             return (inst.IsAtomic() && IsGlobalMr(inst)) ||
                     inst == Instruction.LoadGlobal ||
-                    inst == Instruction.StoreGlobal;
+                    inst == Instruction.StoreGlobal ||
+                    inst == Instruction.StoreGlobal16 ||
+                    inst == Instruction.StoreGlobal8;
         }
 
         private static bool IsGlobalMr(Instruction inst)
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
index cc57102cb8..1cf43e5d30 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -59,7 +59,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             Operation operation = (Operation)node.Value;
 
             bool isAtomic = operation.Inst.IsAtomic();
-            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal;
+            bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
 
             config.SetUsedStorageBuffer(storageIndex, isWrite);
 
@@ -78,12 +79,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                 node.List.AddBefore(node, andOp);
 
                 Operand byteOffset = Local();
-                Operand wordOffset = Local();
-
-                Operation subOp = new Operation(Instruction.Subtract,      byteOffset, addrLow, baseAddrTrunc);
-                Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+                Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
 
                 node.List.AddBefore(node, subOp);
+
+                if (isStg16Or8)
+                {
+                    return byteOffset;
+                }
+
+                Operand wordOffset = Local();
+                Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
                 node.List.AddBefore(node, shrOp);
 
                 return wordOffset;
@@ -113,7 +120,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             }
             else
             {
-                storageOp = new Operation(Instruction.StoreStorage, null, sources);
+                Instruction storeInst = operation.Inst switch
+                {
+                    Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+                    Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+                    _ => Instruction.StoreStorage
+                };
+
+                storageOp = new Operation(storeInst, null, sources);
             }
 
             for (int index = 0; index < operation.SourcesCount; index++)
diff --git a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
index 47428520ad..02a0feda8b 100644
--- a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
@@ -49,7 +49,8 @@ namespace Ryujinx.Graphics.Shader.Translation
             Operation operation = (Operation)node.Value;
 
             bool isAtomic = operation.Inst.IsAtomic();
-            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal;
+            bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+            bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
 
             Operation storageOp;
 
@@ -95,14 +96,21 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
 
-            Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd,    sbBaseAddrLow, alignMask);
-            Operand byteOffset    = PrependOperation(Instruction.Subtract,      addrLow, baseAddrTrunc);
-            Operand wordOffset    = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
+            Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
+            Operand byteOffset    = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
 
             Operand[] sources = new Operand[operation.SourcesCount];
 
             sources[0] = sbSlot;
-            sources[1] = wordOffset;
+
+            if (isStg16Or8)
+            {
+                sources[1] = byteOffset;
+            }
+            else
+            {
+                sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
+            }
 
             for (int index = 2; index < operation.SourcesCount; index++)
             {
@@ -121,7 +129,14 @@ namespace Ryujinx.Graphics.Shader.Translation
             }
             else
             {
-                storageOp = new Operation(Instruction.StoreStorage, null, sources);
+                Instruction storeInst = operation.Inst switch
+                {
+                    Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+                    Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+                    _ => Instruction.StoreStorage
+                };
+
+                storageOp = new Operation(storeInst, null, sources);
             }
 
             for (int index = 0; index < operation.SourcesCount; index++)