[llvm] fee2953 - [ARM] Fix for undef elements from demanded elements (#70504)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 2 07:28:45 PDT 2023


Author: David Green
Date: 2023-11-02T14:28:40Z
New Revision: fee2953f23bd8a8a71e574e6a8db08033778d3a4

URL: https://github.com/llvm/llvm-project/commit/fee2953f23bd8a8a71e574e6a8db08033778d3a4
DIFF: https://github.com/llvm/llvm-project/commit/fee2953f23bd8a8a71e574e6a8db08033778d3a4.diff

LOG: [ARM] Fix for undef elements from demanded elements (#70504)

I think this is right, that the undef bits should be the undef bits from
the passthrough (operand 0), with the top/bottom lanes cleared, as they
come from the second arg (operand 1). We don't yet attempt to look for
undef elements in the second operand, but this should fix the bug with
all elements being marked as undef and the instruction being optimized
away.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1dee7a3ccb6d8d9..ab0a8f78b156302 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -272,8 +272,8 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
                                        : APInt::getHighBitsSet(2, 1));
     SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
     // The other lanes will be defined from the inserted elements.
-    UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
-                                                 : APInt::getHighBitsSet(2, 1));
+    UndefElts &= APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+                                                : APInt::getHighBitsSet(2, 1));
     return std::nullopt;
   };
 

diff  --git a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
index 20babc29d535ec2..3fbc852dba9af08 100644
--- a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
@@ -243,7 +243,10 @@ define <8 x half> @test_cvtnp_v8i16_bt(<8 x half> %a, <8 x half> %b, <4 x float>
 
 define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 ; CHECK-LABEL: @test_vshrn_const(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
+; CHECK-NEXT:    [[Z:%.*]] = shufflevector <8 x i16> [[Y]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[ZA:%.*]] = zext <4 x i16> [[Z]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[ZA]]
 ;
   %y = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
   %z = shufflevector <8 x i16> %y, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -254,7 +257,12 @@ define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 define zeroext i16 @test_undef_bits() {
 ; CHECK-LABEL: @test_undef_bits(
 ; CHECK-NEXT:  e:
-; CHECK-NEXT:    ret i16 0
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i64 0
+; CHECK-NEXT:    ret i16 [[TMP4]]
 ;
 e:
   %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> zeroinitializer, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)


        


More information about the llvm-commits mailing list