[llvm] e97aa56 - AMDGPU/GlobalISel: Don't assert in LegalizerInfo constructor
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 26 20:03:03 PDT 2020
Author: Matt Arsenault
Date: 2020-07-26T23:01:28-04:00
New Revision: e97aa5609fa53827c8c09a0fc79075dc834f292d
URL: https://github.com/llvm/llvm-project/commit/e97aa5609fa53827c8c09a0fc79075dc834f292d
DIFF: https://github.com/llvm/llvm-project/commit/e97aa5609fa53827c8c09a0fc79075dc834f292d.diff
LOG: AMDGPU/GlobalISel: Don't assert in LegalizerInfo constructor
We don't really need these asserts. The LegalizerInfo is also
overly-aggressivly constructed, even when not in use. It needs to not
assert on dummy targets that have manually specified, unrelated
features.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 673c5fc1e840..f1962db35bc0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -423,7 +423,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
if (ST.hasVOP3PInsts()) {
- assert(ST.hasIntClamp() && "all targets with VOP3P should support clamp");
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
.clampScalar(0, S16, S32)
@@ -445,8 +444,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.widenScalarToNextPow2(0, 32); // FIXME: min should be 16
- assert(ST.hasIntClamp() && "all targets with 16-bit should support clamp");
-
// Technically the saturating operations require clamp bit support, but this
// was introduced at the same time as 16-bit operations.
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
new file mode 100644
index 000000000000..81f6b8e71254
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s
+
+; Make sure legalizer info doesn't assert on dummy targets
+
+define i16 @vop3p_add_i16(i16 %arg0) #0 {
+ ; CHECK-LABEL: name: vop3p_add_i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]]
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %add = add i16 %arg0, %arg0
+ ret i16 %add
+}
+
+define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: vop3p_add_v2i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]]
+ ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %add = add <2 x i16> %arg0, %arg0
+ ret <2 x i16> %add
+}
+
+define i16 @halfinsts_add_i16(i16 %arg0) #1 {
+ ; CHECK-LABEL: name: halfinsts_add_i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY2]]
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+ ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+ ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ %add = add i16 %arg0, %arg0
+ ret i16 %add
+}
+
+define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 {
+ ; CHECK-LABEL: name: halfinsts_add_v2i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY4]]
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[COPY6]]
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32)
+ ; CHECK: $vgpr0 = COPY [[COPY7]](s32)
+ ; CHECK: $vgpr1 = COPY [[COPY8]](s32)
+ ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+ ; CHECK: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1
+ %add = add <2 x i16> %arg0, %arg0
+ ret <2 x i16> %add
+}
+
+attributes #0 = { "target-features"="+vop3p" }
+attributes #0 = { "target-features"="+16-bit-insts" }
More information about the llvm-commits
mailing list