[llvm] 3c9083f - Fix i1 vector reduction miscompilation

Thu Apr 20 15:27:10 PDT 2023

Author: Sp00ph
Date: 2023-04-20T15:26:49-07:00
New Revision: 3c9083f6757cbaf6f8d6c601586d99a11faf642e

URL: https://github.com/llvm/llvm-project/commit/3c9083f6757cbaf6f8d6c601586d99a11faf642e
DIFF: https://github.com/llvm/llvm-project/commit/3c9083f6757cbaf6f8d6c601586d99a11faf642e.diff

LOG: Fix i1 vector reduction miscompilation

Previously, `vecreduce_{and,or} vNi1` could lead to miscompilations because the legalizer first decides to `any_ext` the operand (which is correct for `vecreduce_{and,or}`) and then decides to use `vecreduce_u{min,max}` instead (for which `any_ext` is incorrect). This patch changes it so the `vecreduce_u{min,max}` operations use `sign_ext` instead of `any_ext`.

Issue: https://github.com/llvm/llvm-project/issues/62211

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D148672

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
    llvm/test/CodeGen/AArch64/reduce-and.ll
    llvm/test/CodeGen/AArch64/reduce-or.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
    llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5bf863055a48..4dab62e348233 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,7 @@
 #include "LegalizeTypes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -2296,16 +2297,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
   // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
   // vecreduce_or is not legal
   else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
-      !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
-      TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+           !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+           TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) {
     Opcode = ISD::VECREDUCE_UMAX;
+    // Can't use promoteTargetBoolean here because we still need
+    // to either sign_ext or zero_ext in the undefined case.
+    switch (TLI.getBooleanContents(InVT)) {
+    case TargetLoweringBase::UndefinedBooleanContent:
+      Op = SExtOrZExtPromotedInteger(N->getOperand(0));
+      break;
+    case TargetLoweringBase::ZeroOrOneBooleanContent:
+      Op = ZExtPromotedInteger(N->getOperand(0));
+      break;
+    case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+      Op = SExtPromotedInteger(N->getOperand(0));
+      break;
+    }
+  }
 
   // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
   // vecreduce_and is not legal
   else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
-      !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
-      TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+           !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+           TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) {
     Opcode = ISD::VECREDUCE_UMIN;
+    // Can't use promoteTargetBoolean here because we still need
+    // to either sign_ext or zero_ext in the undefined case.
+    switch (TLI.getBooleanContents(InVT)) {
+    case TargetLoweringBase::UndefinedBooleanContent:
+      Op = SExtOrZExtPromotedInteger(N->getOperand(0));
+      break;
+    case TargetLoweringBase::ZeroOrOneBooleanContent:
+      Op = ZExtPromotedInteger(N->getOperand(0));
+      break;
+    case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+      Op = SExtPromotedInteger(N->getOperand(0));
+      break;
+    }
+  }
 
   if (ResVT.bitsGE(EltVT))
     return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);

diff  --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
index 53aea45af6d11..278a36d1a66fb 100644
--- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
@@ -8,7 +8,7 @@ define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {
 ; CHECK-LABEL: unordered_floating_point_compare_on_v8f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmgt v1.4s, v1.4s, #0.0
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0
 ; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    mvn v0.16b, v0.16b
@@ -27,7 +27,7 @@ define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) {
 ; CHECK-LABEL: unordered_floating_point_compare_on_v16f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmgt v3.4s, v3.4s, #0.0
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    fcmgt v2.4s, v2.4s, #0.0
 ; CHECK-NEXT:    fcmgt v1.4s, v1.4s, #0.0
 ; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0
@@ -49,7 +49,7 @@ define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) {
 ; CHECK-LABEL: unordered_floating_point_compare_on_v32f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmgt v3.4s, v3.4s, #0.0
-; CHECK-NEXT:    mov w9, #1
+; CHECK-NEXT:    mov w9, #1 // =0x1
 ; CHECK-NEXT:    fcmgt v2.4s, v2.4s, #0.0
 ; CHECK-NEXT:    fcmgt v1.4s, v1.4s, #0.0
 ; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0

diff  --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index 5adecbe5a9f25..66eb3080f14ea 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -20,6 +20,8 @@ define i1 @test_redand_v1i1(<1 x i1> %a) {
 define i1 @test_redand_v2i1(<2 x i1> %a) {
 ; CHECK-LABEL: test_redand_v2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.2s, v0.2s, #31
+; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
 ; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -41,6 +43,8 @@ define i1 @test_redand_v2i1(<2 x i1> %a) {
 define i1 @test_redand_v4i1(<4 x i1> %a) {
 ; CHECK-LABEL: test_redand_v4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #15
+; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
 ; CHECK-NEXT:    uminv h0, v0.4h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -68,6 +72,8 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
 define i1 @test_redand_v8i1(<8 x i1> %a) {
 ; CHECK-LABEL: test_redand_v8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.8b, v0.8b, #7
+; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
 ; CHECK-NEXT:    uminv b0, v0.8b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -107,6 +113,8 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
 define i1 @test_redand_v16i1(<16 x i1> %a) {
 ; CHECK-LABEL: test_redand_v16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.16b, v0.16b, #7
+; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    uminv b0, v0.16b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -169,6 +177,8 @@ define i1 @test_redand_v16i1(<16 x i1> %a) {
 define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
 ; CHECK-LABEL: test_redand_ins_v16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.16b, v0.16b, #7
+; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    uminv b0, v0.16b
 ; CHECK-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index dee681d5226b1..a44dcee3978ba 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -20,6 +20,8 @@ define i1 @test_redor_v1i1(<1 x i1> %a) {
 define i1 @test_redor_v2i1(<2 x i1> %a) {
 ; CHECK-LABEL: test_redor_v2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.2s, v0.2s, #31
+; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
 ; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -41,6 +43,8 @@ define i1 @test_redor_v2i1(<2 x i1> %a) {
 define i1 @test_redor_v4i1(<4 x i1> %a) {
 ; CHECK-LABEL: test_redor_v4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #15
+; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
 ; CHECK-NEXT:    umaxv h0, v0.4h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -68,6 +72,8 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
 define i1 @test_redor_v8i1(<8 x i1> %a) {
 ; CHECK-LABEL: test_redor_v8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.8b, v0.8b, #7
+; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
 ; CHECK-NEXT:    umaxv b0, v0.8b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -107,6 +113,8 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
 define i1 @test_redor_v16i1(<16 x i1> %a) {
 ; CHECK-LABEL: test_redor_v16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.16b, v0.16b, #7
+; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    umaxv b0, v0.16b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -169,6 +177,8 @@ define i1 @test_redor_v16i1(<16 x i1> %a) {
 define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
 ; CHECK-LABEL: test_redor_ins_v16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.16b, v0.16b, #7
+; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    umaxv b0, v0.16b
 ; CHECK-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
index 614d4ffefc2cd..3c0407a82938d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
@@ -4,7 +4,7 @@
 define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {
 ; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #8
+; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index d7a63af7614c4..e97ace9c38588 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -96,7 +96,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
 define i8 @test_v9i8(<9 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v9i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    umov w14, v0.b[6]
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v1.b[9], w8
@@ -140,6 +140,8 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
 define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #15
+; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
 ; CHECK-NEXT:    uminv h0, v0.4h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index a14f12a407376..555ebe66a4de1 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -138,6 +138,8 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
 define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #15
+; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
 ; CHECK-NEXT:    umaxv h0, v0.4h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1