[llvm] [ARM] Fix for undef elements from demanded elements (PR #70504)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 27 13:28:35 PDT 2023


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/70504

I think this is right, that the undef bits should be the undef bits from the passthrough (operand 0), with the top/bottom lanes cleared, as they come from the second arg (operand 1). We don't yet attempt to look for undef elements in the second operand, but this should fix the bug with all elements being marked as undef and the instruction being optimized away.

>From 4f5400a21687ac54765e585ec2fde54fd668e606 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 27 Oct 2023 21:26:59 +0100
Subject: [PATCH] [ARM] Fix for undef elements from demanded elements

I think this is right, that the undef bits should be the undef bits from the
passthrough (operand 0), with the top/bottom lanes cleared, as they come from
the second arg (operand 1). We don't yet attempt to look for undef elements in
the second operand, but this should fix the bug with all elements being marked
as undef and the instruction being optimized away.
---
 llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp     |  4 ++--
 llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll | 12 ++++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index e0d112c4a7eddb5..4262606250558a2 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -272,8 +272,8 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
                                        : APInt::getHighBitsSet(2, 1));
     SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
     // The other lanes will be defined from the inserted elements.
-    UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
-                                                 : APInt::getHighBitsSet(2, 1));
+    UndefElts &= APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+                                                : APInt::getHighBitsSet(2, 1));
     return std::nullopt;
   };
 
diff --git a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
index 20babc29d535ec2..3fbc852dba9af08 100644
--- a/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll
@@ -243,7 +243,10 @@ define <8 x half> @test_cvtnp_v8i16_bt(<8 x half> %a, <8 x half> %b, <4 x float>
 
 define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 ; CHECK-LABEL: @test_vshrn_const(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
+; CHECK-NEXT:    [[Z:%.*]] = shufflevector <8 x i16> [[Y]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[ZA:%.*]] = zext <4 x i16> [[Z]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[ZA]]
 ;
   %y = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> <i32 512, i32 0, i32 0, i32 0>, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1)
   %z = shufflevector <8 x i16> %y, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -254,7 +257,12 @@ define <4 x i32> @test_vshrn_const(<8 x i16> %a) {
 define zeroext i16 @test_undef_bits() {
 ; CHECK-LABEL: @test_undef_bits(
 ; CHECK-NEXT:  e:
-; CHECK-NEXT:    ret i16 0
+; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i64 0
+; CHECK-NEXT:    ret i16 [[TMP4]]
 ;
 e:
   %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> zeroinitializer, <4 x i32> <i32 256, i32 0, i32 0, i32 0>, i32 8, i32 1, i32 1, i32 1, i32 0, i32 1)



More information about the llvm-commits mailing list