[llvm] [ARM] Fix for undef elements from demanded elements (PR #70504)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 27 13:29:47 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-arm

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

I think this is right, that the undef bits should be the undef bits from the passthrough (operand 0), with the top/bottom lanes cleared, as they come from the second arg (operand 1). We don't yet attempt to look for undef elements in the second operand, but this should fix the bug with all elements being marked as undef and the instruction being optimized away.

---
Full diff: https://github.com/llvm/llvm-project/pull/70504.diff


2 Files Affected:

- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll (+10-2) 


``````````diff
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index e0d112c4a7eddb5..4262606250558a2 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -272,8 +272,8 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
                                        : APInt::getHighBitsSet(2, 1));
     SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
     // The other lanes will be defined from the inserted elements.
-    UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
-                                                 : APInt::getHighBitsSet(2, 1));
+    UndefElts &= APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+                                                : APInt::getHighBitsSet(2, 1));
     return std::nullopt;
   };
 
diff --git a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
index 20babc29d535ec2..3fbc852dba9af08 100644
--- a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
@@ -243,7 +243,10 @@ define <8 x half> @test_cvtnp_v8i16_bt(<8 x half> %a, <8 x half> %b, <4 x float>
 
 define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 ; CHECK-LABEL: @test_vshrn_const(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
+; CHECK-NEXT:    [[Z:%.*]] = shufflevector <8 x i16> [[Y]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[ZA:%.*]] = zext <4 x i16> [[Z]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[ZA]]
 ;
   %y = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
   %z = shufflevector <8 x i16> %y, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -254,7 +257,12 @@ define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 define zeroext i16 @test_undef_bits() {
 ; CHECK-LABEL: @test_undef_bits(
 ; CHECK-NEXT:  e:
-; CHECK-NEXT:    ret i16 0
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i64 0
+; CHECK-NEXT:    ret i16 [[TMP4]]
 ;
 e:
   %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> zeroinitializer, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/70504


More information about the llvm-commits mailing list