[llvm] 3c9083f - Fix i1 vector reduction miscompilation
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 20 15:27:10 PDT 2023
Author: Sp00ph
Date: 2023-04-20T15:26:49-07:00
New Revision: 3c9083f6757cbaf6f8d6c601586d99a11faf642e
URL: https://github.com/llvm/llvm-project/commit/3c9083f6757cbaf6f8d6c601586d99a11faf642e
DIFF: https://github.com/llvm/llvm-project/commit/3c9083f6757cbaf6f8d6c601586d99a11faf642e.diff
LOG: Fix i1 vector reduction miscompilation
Previously, `vecreduce_{and,or} vNi1` could lead to miscompilations because the legalizer first decides to `any_ext` the operand (which is correct for `vecreduce_{and,or}`) and then decides to use `vecreduce_u{min,max}` instead (for which `any_ext` is incorrect). This patch changes it so the `vecreduce_u{min,max}` operations use `sign_ext` instead of `any_ext`.
Issue: https://github.com/llvm/llvm-project/issues/62211
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D148672
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
llvm/test/CodeGen/AArch64/reduce-and.ll
llvm/test/CodeGen/AArch64/reduce-or.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5bf863055a48..4dab62e348233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -2296,16 +2297,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
// An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
// vecreduce_or is not legal
else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) {
Opcode = ISD::VECREDUCE_UMAX;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ Op = SExtOrZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
// An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
// vecreduce_and is not legal
else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) {
Opcode = ISD::VECREDUCE_UMIN;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ Op = SExtOrZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
if (ResVT.bitsGE(EltVT))
return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
index 53aea45af6d11..278a36d1a66fb 100644
--- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
@@ -8,7 +8,7 @@ define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {
; CHECK-LABEL: unordered_floating_point_compare_on_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
-; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
@@ -27,7 +27,7 @@ define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) {
; CHECK-LABEL: unordered_floating_point_compare_on_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0
-; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0
; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
@@ -49,7 +49,7 @@ define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) {
; CHECK-LABEL: unordered_floating_point_compare_on_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0
-; CHECK-NEXT: mov w9, #1
+; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0
; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index 5adecbe5a9f25..66eb3080f14ea 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -20,6 +20,8 @@ define i1 @test_redand_v1i1(<1 x i1> %a) {
define i1 @test_redand_v2i1(<2 x i1> %a) {
; CHECK-LABEL: test_redand_v2i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -41,6 +43,8 @@ define i1 @test_redand_v2i1(<2 x i1> %a) {
define i1 @test_redand_v4i1(<4 x i1> %a) {
; CHECK-LABEL: test_redand_v4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: uminv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -68,6 +72,8 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
define i1 @test_redand_v8i1(<8 x i1> %a) {
; CHECK-LABEL: test_redand_v8i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.8b, v0.8b, #7
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: uminv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -107,6 +113,8 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
define i1 @test_redand_v16i1(<16 x i1> %a) {
; CHECK-LABEL: test_redand_v16i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -169,6 +177,8 @@ define i1 @test_redand_v16i1(<16 x i1> %a) {
define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
; CHECK-LABEL: test_redand_ins_v16i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index dee681d5226b1..a44dcee3978ba 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -20,6 +20,8 @@ define i1 @test_redor_v1i1(<1 x i1> %a) {
define i1 @test_redor_v2i1(<2 x i1> %a) {
; CHECK-LABEL: test_redor_v2i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -41,6 +43,8 @@ define i1 @test_redor_v2i1(<2 x i1> %a) {
define i1 @test_redor_v4i1(<4 x i1> %a) {
; CHECK-LABEL: test_redor_v4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -68,6 +72,8 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
define i1 @test_redor_v8i1(<8 x i1> %a) {
; CHECK-LABEL: test_redor_v8i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.8b, v0.8b, #7
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: umaxv b0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -107,6 +113,8 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
define i1 @test_redor_v16i1(<16 x i1> %a) {
; CHECK-LABEL: test_redor_v16i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -169,6 +177,8 @@ define i1 @test_redor_v16i1(<16 x i1> %a) {
define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
; CHECK-LABEL: test_redor_ins_v16i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
index 614d4ffefc2cd..3c0407a82938d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
@@ -4,7 +4,7 @@
define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {
; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #8
+; CHECK-NEXT: mov x8, #8 // =0x8
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index d7a63af7614c4..e97ace9c38588 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -96,7 +96,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-LABEL: test_v9i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.b[9], w8
@@ -140,6 +140,8 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-LABEL: test_v4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: uminv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index a14f12a407376..555ebe66a4de1 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -138,6 +138,8 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-LABEL: test_v4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
More information about the llvm-commits
mailing list