[llvm] 4b0a76a - [GlobalISel] Fix buildCopyFromRegs for split vectors (#77448)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 16 01:04:24 PST 2024


Author: Pierre van Houtryve
Date: 2024-01-16T10:04:20+01:00
New Revision: 4b0a76a3d7d590ca1ec60e8a703969be6550ba17

URL: https://github.com/llvm/llvm-project/commit/4b0a76a3d7d590ca1ec60e8a703969be6550ba17
DIFF: https://github.com/llvm/llvm-project/commit/4b0a76a3d7d590ca1ec60e8a703969be6550ba17.diff

LOG: [GlobalISel] Fix buildCopyFromRegs for split vectors (#77448)

Fixes #77055

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll

Modified: 
    llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 2953433deff1f0b..ccd9b13d730b60c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -478,9 +478,43 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
   } else {
     // Vector was split, and elements promoted to a wider type.
     // FIXME: Should handle floating point promotions.
-    LLT BVType = LLT::fixed_vector(LLTy.getNumElements(), PartLLT);
-    auto BV = B.buildBuildVector(BVType, Regs);
-    B.buildTrunc(OrigRegs[0], BV);
+    unsigned NumElts = LLTy.getNumElements();
+    LLT BVType = LLT::fixed_vector(NumElts, PartLLT);
+
+    Register BuildVec;
+    if (NumElts == Regs.size())
+      BuildVec = B.buildBuildVector(BVType, Regs).getReg(0);
+    else {
+      // Vector elements are packed in the inputs.
+      // e.g. we have a <4 x s16> but 2 x s32 in regs.
+      assert(NumElts > Regs.size());
+      LLT SrcEltTy = MRI.getType(Regs[0]);
+
+      LLT OriginalEltTy = MRI.getType(OrigRegs[0]).getElementType();
+
+      // Input registers contain packed elements.
+      // Determine how many elements per reg.
+      assert((SrcEltTy.getSizeInBits() % OriginalEltTy.getSizeInBits()) == 0);
+      unsigned EltPerReg =
+          (SrcEltTy.getSizeInBits() / OriginalEltTy.getSizeInBits());
+
+      SmallVector<Register, 0> BVRegs;
+      BVRegs.reserve(Regs.size() * EltPerReg);
+      for (Register R : Regs) {
+        auto Unmerge = B.buildUnmerge(OriginalEltTy, R);
+        for (unsigned K = 0; K < EltPerReg; ++K)
+          BVRegs.push_back(B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0));
+      }
+
+      // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces
+      // for a <3 x s16> vector. We should have less than EltPerReg extra items.
+      if (BVRegs.size() > NumElts) {
+        assert((BVRegs.size() - NumElts) < EltPerReg);
+        BVRegs.truncate(NumElts);
+      }
+      BuildVec = B.buildBuildVector(BVType, BVRegs).getReg(0);
+    }
+    B.buildTrunc(OrigRegs[0], BuildVec);
   }
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll
new file mode 100644
index 000000000000000..3206f8e55f44eb5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslate-bf16.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -global-isel -stop-after=irtranslator -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s -check-prefixes=GFX9
+
+; tests bf16 argument & return values lowering.
+
+define <3 x bfloat> @v3bf16(<3 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v3bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<3 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<3 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT4]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT5]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+  %res = shufflevector <3 x bfloat> %arg0, <3 x bfloat> zeroinitializer, <3 x i32> <i32 3, i32 1, i32 2>
+  ret <3 x bfloat> %res
+}
+
+define <4 x bfloat> @v4bf16(<4 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v4bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<4 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<4 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT4]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT5]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+  %res = shufflevector <4 x bfloat> %arg0, <4 x bfloat> zeroinitializer, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
+  ret <4 x bfloat> %res
+}
+
+define <5 x bfloat> @v5bf16(<5 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v5bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<5 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<5 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<5 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT6]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT7]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT8]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+  %res = shufflevector <5 x bfloat> %arg0, <5 x bfloat> zeroinitializer, <5 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4>
+  ret <5 x bfloat> %res
+}
+
+define <6 x bfloat> @v6bf16(<6 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v6bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[BUILD_VECTOR]](<6 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<6 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<6 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<6 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT6]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT7]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT8]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+  %res = shufflevector <6 x bfloat> %arg0, <6 x bfloat> zeroinitializer, <6 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5>
+  ret <6 x bfloat> %res
+}
+
+define <7 x bfloat> @v7bf16(<7 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v7bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<7 x s16>) = G_TRUNC [[BUILD_VECTOR]](<7 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<7 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<7 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<7 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6)
+  ; GFX9-NEXT:   [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<7 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16)
+  ; GFX9-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16)
+  ; GFX9-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT8]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT9]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT10]](s32)
+  ; GFX9-NEXT:   $vgpr3 = COPY [[ANYEXT11]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+  %res = shufflevector <7 x bfloat> %arg0, <7 x bfloat> zeroinitializer, <7 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6>
+  ret <7 x bfloat> %res
+}
+
+define <8 x bfloat> @v8bf16(<8 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v8bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+  ; GFX9-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT bfloat 0xR0000
+  ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+  ; GFX9-NEXT:   [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[TRUNC]](<8 x s16>), [[BUILD_VECTOR1]], shufflemask(3, 1, 2, 0, 4, 5, 6, 7)
+  ; GFX9-NEXT:   [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SHUF]](<8 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16)
+  ; GFX9-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16)
+  ; GFX9-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT8]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT9]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT10]](s32)
+  ; GFX9-NEXT:   $vgpr3 = COPY [[ANYEXT11]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+  %res = shufflevector <8 x bfloat> %arg0, <8 x bfloat> zeroinitializer, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x bfloat> %res
+}
+
+define <16 x bfloat> @v16bf16(<16 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v16bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX9-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GFX9-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GFX9-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GFX9-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY4]](s32)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16)
+  ; GFX9-NEXT:   [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY5]](s32)
+  ; GFX9-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16)
+  ; GFX9-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16)
+  ; GFX9-NEXT:   [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY6]](s32)
+  ; GFX9-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16)
+  ; GFX9-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16)
+  ; GFX9-NEXT:   [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY7]](s32)
+  ; GFX9-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16)
+  ; GFX9-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)
+  ; GFX9-NEXT:   [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](<16 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s16)
+  ; GFX9-NEXT:   [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s16)
+  ; GFX9-NEXT:   [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s16)
+  ; GFX9-NEXT:   [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s16)
+  ; GFX9-NEXT:   [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[UV20]](s16)
+  ; GFX9-NEXT:   [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[UV21]](s16)
+  ; GFX9-NEXT:   [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[UV22]](s16)
+  ; GFX9-NEXT:   [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[UV23]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT16]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT17]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT18]](s32)
+  ; GFX9-NEXT:   $vgpr3 = COPY [[ANYEXT19]](s32)
+  ; GFX9-NEXT:   $vgpr4 = COPY [[ANYEXT20]](s32)
+  ; GFX9-NEXT:   $vgpr5 = COPY [[ANYEXT21]](s32)
+  ; GFX9-NEXT:   $vgpr6 = COPY [[ANYEXT22]](s32)
+  ; GFX9-NEXT:   $vgpr7 = COPY [[ANYEXT23]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+  ret <16 x bfloat> %arg0
+}
+
+define <32 x bfloat> @v32bf16(<32 x bfloat> %arg0) {
+  ; GFX9-LABEL: name: v32bf16
+  ; GFX9: bb.1 (%ir-block.0):
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX9-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX9-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX9-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX9-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GFX9-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GFX9-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GFX9-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GFX9-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GFX9-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GFX9-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GFX9-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GFX9-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GFX9-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GFX9-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GFX9-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32)
+  ; GFX9-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+  ; GFX9-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+  ; GFX9-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](s32)
+  ; GFX9-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+  ; GFX9-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+  ; GFX9-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](s32)
+  ; GFX9-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+  ; GFX9-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
+  ; GFX9-NEXT:   [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY3]](s32)
+  ; GFX9-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
+  ; GFX9-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
+  ; GFX9-NEXT:   [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY4]](s32)
+  ; GFX9-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
+  ; GFX9-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16)
+  ; GFX9-NEXT:   [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY5]](s32)
+  ; GFX9-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16)
+  ; GFX9-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16)
+  ; GFX9-NEXT:   [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY6]](s32)
+  ; GFX9-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16)
+  ; GFX9-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16)
+  ; GFX9-NEXT:   [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY7]](s32)
+  ; GFX9-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16)
+  ; GFX9-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16)
+  ; GFX9-NEXT:   [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY8]](s32)
+  ; GFX9-NEXT:   [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s16)
+  ; GFX9-NEXT:   [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s16)
+  ; GFX9-NEXT:   [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY9]](s32)
+  ; GFX9-NEXT:   [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s16)
+  ; GFX9-NEXT:   [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s16)
+  ; GFX9-NEXT:   [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY10]](s32)
+  ; GFX9-NEXT:   [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[UV20]](s16)
+  ; GFX9-NEXT:   [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[UV21]](s16)
+  ; GFX9-NEXT:   [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY11]](s32)
+  ; GFX9-NEXT:   [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[UV22]](s16)
+  ; GFX9-NEXT:   [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[UV23]](s16)
+  ; GFX9-NEXT:   [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY12]](s32)
+  ; GFX9-NEXT:   [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[UV24]](s16)
+  ; GFX9-NEXT:   [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[UV25]](s16)
+  ; GFX9-NEXT:   [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY13]](s32)
+  ; GFX9-NEXT:   [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[UV26]](s16)
+  ; GFX9-NEXT:   [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[UV27]](s16)
+  ; GFX9-NEXT:   [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY14]](s32)
+  ; GFX9-NEXT:   [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[UV28]](s16)
+  ; GFX9-NEXT:   [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[UV29]](s16)
+  ; GFX9-NEXT:   [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY15]](s32)
+  ; GFX9-NEXT:   [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[UV30]](s16)
+  ; GFX9-NEXT:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[UV31]](s16)
+  ; GFX9-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32), [[ANYEXT16]](s32), [[ANYEXT17]](s32), [[ANYEXT18]](s32), [[ANYEXT19]](s32), [[ANYEXT20]](s32), [[ANYEXT21]](s32), [[ANYEXT22]](s32), [[ANYEXT23]](s32), [[ANYEXT24]](s32), [[ANYEXT25]](s32), [[ANYEXT26]](s32), [[ANYEXT27]](s32), [[ANYEXT28]](s32), [[ANYEXT29]](s32), [[ANYEXT30]](s32), [[ANYEXT31]](s32)
+  ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(<32 x s16>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>)
+  ; GFX9-NEXT:   [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](<32 x s16>)
+  ; GFX9-NEXT:   [[ANYEXT32:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s16)
+  ; GFX9-NEXT:   [[ANYEXT33:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s16)
+  ; GFX9-NEXT:   [[ANYEXT34:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s16)
+  ; GFX9-NEXT:   [[ANYEXT35:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s16)
+  ; GFX9-NEXT:   [[ANYEXT36:%[0-9]+]]:_(s32) = G_ANYEXT [[UV36]](s16)
+  ; GFX9-NEXT:   [[ANYEXT37:%[0-9]+]]:_(s32) = G_ANYEXT [[UV37]](s16)
+  ; GFX9-NEXT:   [[ANYEXT38:%[0-9]+]]:_(s32) = G_ANYEXT [[UV38]](s16)
+  ; GFX9-NEXT:   [[ANYEXT39:%[0-9]+]]:_(s32) = G_ANYEXT [[UV39]](s16)
+  ; GFX9-NEXT:   [[ANYEXT40:%[0-9]+]]:_(s32) = G_ANYEXT [[UV40]](s16)
+  ; GFX9-NEXT:   [[ANYEXT41:%[0-9]+]]:_(s32) = G_ANYEXT [[UV41]](s16)
+  ; GFX9-NEXT:   [[ANYEXT42:%[0-9]+]]:_(s32) = G_ANYEXT [[UV42]](s16)
+  ; GFX9-NEXT:   [[ANYEXT43:%[0-9]+]]:_(s32) = G_ANYEXT [[UV43]](s16)
+  ; GFX9-NEXT:   [[ANYEXT44:%[0-9]+]]:_(s32) = G_ANYEXT [[UV44]](s16)
+  ; GFX9-NEXT:   [[ANYEXT45:%[0-9]+]]:_(s32) = G_ANYEXT [[UV45]](s16)
+  ; GFX9-NEXT:   [[ANYEXT46:%[0-9]+]]:_(s32) = G_ANYEXT [[UV46]](s16)
+  ; GFX9-NEXT:   [[ANYEXT47:%[0-9]+]]:_(s32) = G_ANYEXT [[UV47]](s16)
+  ; GFX9-NEXT:   $vgpr0 = COPY [[ANYEXT32]](s32)
+  ; GFX9-NEXT:   $vgpr1 = COPY [[ANYEXT33]](s32)
+  ; GFX9-NEXT:   $vgpr2 = COPY [[ANYEXT34]](s32)
+  ; GFX9-NEXT:   $vgpr3 = COPY [[ANYEXT35]](s32)
+  ; GFX9-NEXT:   $vgpr4 = COPY [[ANYEXT36]](s32)
+  ; GFX9-NEXT:   $vgpr5 = COPY [[ANYEXT37]](s32)
+  ; GFX9-NEXT:   $vgpr6 = COPY [[ANYEXT38]](s32)
+  ; GFX9-NEXT:   $vgpr7 = COPY [[ANYEXT39]](s32)
+  ; GFX9-NEXT:   $vgpr8 = COPY [[ANYEXT40]](s32)
+  ; GFX9-NEXT:   $vgpr9 = COPY [[ANYEXT41]](s32)
+  ; GFX9-NEXT:   $vgpr10 = COPY [[ANYEXT42]](s32)
+  ; GFX9-NEXT:   $vgpr11 = COPY [[ANYEXT43]](s32)
+  ; GFX9-NEXT:   $vgpr12 = COPY [[ANYEXT44]](s32)
+  ; GFX9-NEXT:   $vgpr13 = COPY [[ANYEXT45]](s32)
+  ; GFX9-NEXT:   $vgpr14 = COPY [[ANYEXT46]](s32)
+  ; GFX9-NEXT:   $vgpr15 = COPY [[ANYEXT47]](s32)
+  ; GFX9-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+  ret <32 x bfloat> %arg0
+}


        


More information about the llvm-commits mailing list