[llvm] dbdffe3 - [AMDGPU] Add 192-bit register classes
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 22 05:10:54 PDT 2020
Author: Jay Foad
Date: 2020-04-22T13:10:37+01:00
New Revision: dbdffe3ee9d6fd4739bef5f03e61f052a95e72ca
URL: https://github.com/llvm/llvm-project/commit/dbdffe3ee9d6fd4739bef5f03e61f052a95e72ca
DIFF: https://github.com/llvm/llvm-project/commit/dbdffe3ee9d6fd4739bef5f03e61f052a95e72ca.diff
LOG: [AMDGPU] Add 192-bit register classes
Differential Revision: https://reviews.llvm.org/D78312
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f1b3cd9b8d20..4e1a7842a1ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -824,6 +824,16 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 5;
+ } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 6;
+ } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 6;
+ } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 6;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index df5b106bea09..dfb13767bfe2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,16 +7,16 @@
//===----------------------------------------------------------------------===//
def SGPRRegBank : RegisterBank<"SGPR",
- [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
+ [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
>;
def VGPRRegBank : RegisterBank<"VGPR",
- [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024]
+ [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_256, VReg_512, VReg_1024]
>;
// It is helpful to distinguish conditions from ordinary SGPRs.
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
def AGPRRegBank : RegisterBank <"AGPR",
- [AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_256, AReg_512, AReg_1024]
+ [AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024]
>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 17ab79ac62c2..09fde4bc741a 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -278,6 +278,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
isRegClass(AMDGPU::VReg_96RegClassID) ||
isRegClass(AMDGPU::VReg_128RegClassID) ||
isRegClass(AMDGPU::VReg_160RegClassID) ||
+ isRegClass(AMDGPU::VReg_192RegClassID) ||
isRegClass(AMDGPU::VReg_256RegClassID) ||
isRegClass(AMDGPU::VReg_512RegClassID) ||
isRegClass(AMDGPU::VReg_1024RegClassID);
@@ -1915,6 +1916,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 3: return AMDGPU::VReg_96RegClassID;
case 4: return AMDGPU::VReg_128RegClassID;
case 5: return AMDGPU::VReg_160RegClassID;
+ case 6: return AMDGPU::VReg_192RegClassID;
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
case 32: return AMDGPU::VReg_1024RegClassID;
@@ -1933,7 +1935,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
+ case 3: return AMDGPU::SGPR_96RegClassID;
case 4: return AMDGPU::SGPR_128RegClassID;
+ case 5: return AMDGPU::SGPR_160RegClassID;
+ case 6: return AMDGPU::SReg_192RegClassID;
case 8: return AMDGPU::SGPR_256RegClassID;
case 16: return AMDGPU::SGPR_512RegClassID;
}
@@ -1945,6 +1950,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 3: return AMDGPU::AReg_96RegClassID;
case 4: return AMDGPU::AReg_128RegClassID;
case 5: return AMDGPU::AReg_160RegClassID;
+ case 6: return AMDGPU::AReg_192RegClassID;
case 8: return AMDGPU::AReg_256RegClassID;
case 16: return AMDGPU::AReg_512RegClassID;
case 32: return AMDGPU::AReg_1024RegClassID;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 125a2f22c0e5..4461d2be13c8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -422,6 +422,7 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg))
Enc |= 512;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 79d661ffab0b..b32dfb665d79 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10608,6 +10608,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 160:
RC = &AMDGPU::SReg_160RegClass;
break;
+ case 192:
+ RC = &AMDGPU::SReg_192RegClass;
+ break;
case 256:
RC = &AMDGPU::SReg_256RegClass;
break;
@@ -10636,6 +10639,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 160:
RC = &AMDGPU::VReg_160RegClass;
break;
+ case 192:
+ RC = &AMDGPU::VReg_192RegClass;
+ break;
case 256:
RC = &AMDGPU::VReg_256RegClass;
break;
@@ -10666,6 +10672,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 160:
RC = &AMDGPU::AReg_160RegClass;
break;
+ case 192:
+ RC = &AMDGPU::AReg_192RegClass;
+ break;
case 256:
RC = &AMDGPU::AReg_256RegClass;
break;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 84c7ea4224ca..40252f43d033 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1291,6 +1291,8 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) {
return &AMDGPU::VReg_128RegClass;
case 160:
return &AMDGPU::VReg_160RegClass;
+ case 192:
+ return &AMDGPU::VReg_192RegClass;
case 256:
return &AMDGPU::VReg_256RegClass;
case 512:
@@ -1315,6 +1317,8 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) {
return &AMDGPU::AReg_128RegClass;
case 160:
return &AMDGPU::AReg_160RegClass;
+ case 192:
+ return &AMDGPU::AReg_192RegClass;
case 256:
return &AMDGPU::AReg_256RegClass;
case 512:
@@ -1341,6 +1345,8 @@ SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
return &AMDGPU::SReg_128RegClass;
case 160:
return &AMDGPU::SReg_160RegClass;
+ case 192:
+ return &AMDGPU::SReg_192RegClass;
case 256:
return &AMDGPU::SReg_256RegClass;
case 512:
@@ -1375,6 +1381,9 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
&AMDGPU::VReg_160RegClass,
&AMDGPU::SReg_160RegClass,
&AMDGPU::AReg_160RegClass,
+ &AMDGPU::VReg_192RegClass,
+ &AMDGPU::SReg_192RegClass,
+ &AMDGPU::AReg_192RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
&AMDGPU::AReg_256RegClass,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 2f737eaa8912..4f563d80432b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -59,6 +59,7 @@ class getSubRegs<int size> {
list<SubRegIndex> ret3 = [sub0, sub1, sub2];
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
+ list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,
@@ -77,8 +78,10 @@ class getSubRegs<int size> {
!if(!eq(size, 3), ret3,
!if(!eq(size, 4), ret4,
!if(!eq(size, 5), ret5,
- !if(!eq(size, 8), ret8,
- !if(!eq(size, 16), ret16, ret32))))));
+ !if(!eq(size, 6), ret6,
+ !if(!eq(size, 8), ret8,
+ !if(!eq(size, 16), ret16,
+ ret32)))))));
}
// Generates list of sequential register tuple names.
@@ -359,6 +362,9 @@ def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
def SGPR_160Regs : SIRegisterTuples<getSubRegs<5>.ret, SGPR_32, 105, 4, 5, "s">;
+// SGPR 192-bit registers
+def SGPR_192Regs : SIRegisterTuples<getSubRegs<6>.ret, SGPR_32, 105, 4, 6, "s">;
+
// SGPR 256-bit registers
def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;
@@ -505,6 +511,9 @@ def VGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, VGPR_32, 255, 1, 4, "v">;
// VGPR 160-bit registers
def VGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, VGPR_32, 255, 1, 5, "v">;
+// VGPR 192-bit registers
+def VGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, VGPR_32, 255, 1, 6, "v">;
+
// VGPR 256-bit registers
def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;
@@ -534,6 +543,9 @@ def AGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, AGPR_32, 255, 1, 4, "a">;
// AGPR 160-bit registers
def AGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, AGPR_32, 255, 1, 5, "a">;
+// AGPR 192-bit registers
+def AGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, AGPR_32, 255, 1, 6, "a">;
+
// AGPR 256-bit registers
def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;
@@ -686,10 +698,15 @@ def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
let AllocationPriority = 16;
}
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64], 32, (add SGPR_256Regs)> {
+def SReg_192 : RegisterClass<"AMDGPU", [untyped], 32, (add SGPR_192Regs)> {
+ let Size = 192;
let AllocationPriority = 17;
}
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64], 32, (add SGPR_256Regs)> {
+ let AllocationPriority = 18;
+}
+
def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
let isAllocatable = 0;
}
@@ -698,12 +715,12 @@ def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64], 32,
(add SGPR_256, TTMP_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
- let AllocationPriority = 17;
+ let AllocationPriority = 18;
}
def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
(add SGPR_512Regs)> {
- let AllocationPriority = 18;
+ let AllocationPriority = 19;
}
def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
@@ -715,7 +732,7 @@ def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
(add SGPR_512, TTMP_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
- let AllocationPriority = 18;
+ let AllocationPriority = 19;
}
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -725,13 +742,13 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3
def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
(add SGPR_1024Regs)> {
- let AllocationPriority = 19;
+ let AllocationPriority = 20;
}
def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
(add SGPR_1024)> {
let CopyCost = 16;
- let AllocationPriority = 19;
+ let AllocationPriority = 20;
}
// Register class for all vector registers (VGPRs + Interploation Registers)
@@ -750,6 +767,7 @@ def VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
def VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
def VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, i128], (add VGPR_128)>;
def VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
+def VReg_192 : VRegClass<6, [untyped], (add VGPR_192)>;
def VReg_256 : VRegClass<8, [v8i32, v8f32], (add VGPR_256)>;
def VReg_512 : VRegClass<16, [v16i32, v16f32], (add VGPR_512)>;
def VReg_1024 : VRegClass<32, [v32i32, v32f32], (add VGPR_1024)>;
@@ -765,6 +783,7 @@ def AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
def AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
def AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
def AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
+def AReg_192 : ARegClass<6, [untyped], (add AGPR_192)>;
def AReg_256 : ARegClass<8, [v8i32, v8f32], (add AGPR_256)>;
def AReg_512 : ARegClass<16, [v16i32, v16f32], (add AGPR_512)>;
def AReg_1024 : ARegClass<32, [v32i32, v32f32], (add AGPR_1024)>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 33d8aab00af8..983870032efb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1116,6 +1116,10 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_160RegClassID:
case AMDGPU::AReg_160RegClassID:
return 160;
+ case AMDGPU::SReg_192RegClassID:
+ case AMDGPU::VReg_192RegClassID:
+ case AMDGPU::AReg_192RegClassID:
+ return 192;
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
case AMDGPU::AReg_256RegClassID:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
deleted file mode 100644
index 95ddf2045648..000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s
-
-define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
- %trunc = trunc i32 %arg0 to i24
- %val = bitcast i24 %trunc to <3 x i8>
- %cvt = uitofp <3 x i8> %val to <3 x float>
- ret <3 x float> %cvt
-}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 4bfc7e3e6744..09e2d1ad0826 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -191,6 +191,40 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
ret <2 x float> %cvt
}
+define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
+; SI-LABEL: v_uitofp_v3i8_to_v3f32:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; SI-NEXT: s_movk_i32 s4, 0xff
+; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
+; SI-NEXT: v_and_b32_e32 v0, s4, v0
+; SI-NEXT: v_and_b32_e32 v1, s4, v1
+; SI-NEXT: v_and_b32_e32 v2, s4, v2
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_uitofp_v3i8_to_v3f32:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: s_movk_i32 s4, 0xff
+; VI-NEXT: v_mov_b32_e32 v2, s4
+; VI-NEXT: v_and_b32_sdwa v1, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
+; VI-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; VI-NEXT: v_mov_b32_e32 v0, v3
+; VI-NEXT: s_setpc_b64 s[30:31]
+ %trunc = trunc i32 %arg0 to i24
+ %val = bitcast i24 %trunc to <3 x i8>
+ %cvt = uitofp <3 x i8> %val to <3 x float>
+ ret <3 x float> %cvt
+}
+
define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind {
; SI-LABEL: v_uitofp_v4i8_to_v4f32:
; SI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
index 35d4e8e647e7..938b7af5b972 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
@@ -3,9 +3,7 @@
# RUN: FileCheck -check-prefix=ERR %s < %t
# ERR-NOT: remark:
-# ERR: remark: <unknown>:0:0: cannot select: %3:sgpr(<12 x s16>) = G_CONCAT_VECTORS %0:sgpr(<4 x s16>), %1:sgpr(<4 x s16>), %2:sgpr(<4 x s16>) (in function: test_concat_vectors_s_v12s16_s_v4s16_s_v4s16_s_v4s16)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(<12 x s16>) = G_CONCAT_VECTORS %0:vgpr(<4 x s16>), %1:vgpr(<4 x s16>), %2:vgpr(<4 x s16>) (in function: test_concat_vectors_v_v12s16_v_v4s16_v_v4s16_v_v4s16)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0:sgpr(<3 x s64>), %1:sgpr(<3 x s64>) (in function: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64)
+# ERR: remark: <unknown>:0:0: cannot select: %2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0:sgpr(<3 x s64>), %1:sgpr(<3 x s64>) (in function: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64)
# ERR-NOT: remark:
---
@@ -282,11 +280,11 @@ body: |
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v4s16_s_v4s16_s_v4s16
- ; GCN: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- ; GCN: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
- ; GCN: [[COPY2:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5
- ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s16>)
- ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5
+ ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
%2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5
@@ -304,11 +302,11 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GCN-LABEL: name: test_concat_vectors_v_v12s16_v_v4s16_v_v4s16_v_v4s16
- ; GCN: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
- ; GCN: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
- ; GCN: [[COPY2:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5
- ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s16>)
- ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>)
+ ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5
+ ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
%2:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir
index 207814ccd8c8..153b2860177e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir
@@ -1,12 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t | FileCheck -check-prefix=GCN %s
-# RUN: FileCheck -check-prefix=ERR %s < %t
-
-
-# ERR-NOT: remark:
-# ERR: remark: <unknown>:0:0: cannot select: %3:sgpr(s192) = G_MERGE_VALUES %0:sgpr(s64), %1:sgpr(s64), %2:sgpr(s64) (in function: test_merge_values_s_s192_s_s64_s_s64_s_s64)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s192) = G_MERGE_VALUES %0:vgpr(s64), %1:vgpr(s64), %2:vgpr(s64) (in function: test_merge_values_v_s192_v_s64_v_s64_v_s64)
-# ERR-NOT: remark:
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s | FileCheck -check-prefix=GCN %s
---
name: test_merge_values_v_s64_v_s32_v_s32
@@ -346,11 +339,11 @@ body: |
; GCN-LABEL: name: test_merge_values_s_s192_s_s64_s_s64_s_s64
; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
- ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
- ; GCN: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
- ; GCN: [[COPY2:%[0-9]+]]:sgpr(s64) = COPY $sgpr4_sgpr5
- ; GCN: [[MV:%[0-9]+]]:sgpr(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
- ; GCN: S_ENDPGM 0, implicit [[MV]](s192)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5
+ ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = COPY $sgpr2_sgpr3
%2:sgpr(s64) = COPY $sgpr4_sgpr5
@@ -370,11 +363,11 @@ body: |
; GCN-LABEL: name: test_merge_values_v_s192_v_s64_v_s64_v_s64
; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
- ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
- ; GCN: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
- ; GCN: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5
- ; GCN: [[MV:%[0-9]+]]:vgpr(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
- ; GCN: S_ENDPGM 0, implicit [[MV]](s192)
+ ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5
+ ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = COPY $vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
index f96dd5129e02..901e04a9c979 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir
@@ -1,10 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t | FileCheck -check-prefix=GCN %s
-# RUN: FileCheck -check-prefix=ERR %s < %t
-
-# ERR-NOT: remark:
-# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0:sgpr(s192) (in function: test_unmerge_values_s_s64_s_s64_s64_s_s192)
-# ERR-NOT: remark:
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s | FileCheck -check-prefix=GCN %s
---
name: test_unmerge_values_v_s32_v_s32_v_s64
@@ -185,9 +180,11 @@ body: |
; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF
- ; GCN: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192)
- ; GCN: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64)
+ ; GCN: [[DEF:%[0-9]+]]:sreg_192 = IMPLICIT_DEF
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5
+ ; GCN: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]]
%0:sgpr(s192) = G_IMPLICIT_DEF
%1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0
S_ENDPGM 0, implicit %1, implicit %2, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll
deleted file mode 100644
index dc5f9e4a29f2..000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s
-
-define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
- %load = load i32, i32 addrspace(1)* %ptr
- %ext = zext i32 %load to i96
- ret i96 %ext
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
index df52912c7013..f853a87e97d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
@@ -134,6 +134,50 @@ define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
ret i64 %ext
}
+define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i32_to_i96:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-NEXT: global_load_dword v0, v[2:3], off
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i32_to_i96:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v3, v1
+; GFX8-NEXT: v_mov_b32_e32 v2, v0
+; GFX8-NEXT: flat_load_dword v0, v[2:3]
+; GFX8-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-NEXT: v_mov_b32_e32 v1, 0
+; GFX8-NEXT: v_mov_b32_e32 v3, 0
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i32_to_i96:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mov_b32_e32 v3, v1
+; GFX6-NEXT: v_mov_b32_e32 v2, v0
+; GFX6-NEXT: s_mov_b32 s6, 0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-NEXT: buffer_load_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0
+; GFX6-NEXT: v_mov_b32_e32 v3, 0
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+ %load = load i32, i32 addrspace(1)* %ptr
+ %ext = zext i32 %load to i96
+ ret i96 %ext
+}
+
define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
; GFX9-LABEL: zextload_global_i32_to_i128:
; GFX9: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
index f1132fc773d4..d4084e40fc8f 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
@@ -1,19 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
; Make sure the expected regmask is generated for sub/superregisters.
-; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
+; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
define void @csr() #0 {
call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0
ret void
}
-; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
+; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
define void @subregs_for_super() #0 {
call void asm sideeffect "", "~{v0},~{v1}"() #0
ret void
}
-; CHECK-DAG: Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
+; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
define void @clobbered_reg_with_sub() #0 {
call void asm sideeffect "", "~{v[0:1]}"() #0
ret void
More information about the llvm-commits
mailing list