[llvm-branch-commits] [llvm] 7923d71 - [ARM] PREDICATE_CAST demanded bits
David Green via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 1 02:44:42 PST 2020
Author: David Green
Date: 2020-12-01T10:32:24Z
New Revision: 7923d71b4a7a88f97c8a3efe1eb1473a4b2f5bf3
URL: https://github.com/llvm/llvm-project/commit/7923d71b4a7a88f97c8a3efe1eb1473a4b2f5bf3
DIFF: https://github.com/llvm/llvm-project/commit/7923d71b4a7a88f97c8a3efe1eb1473a4b2f5bf3.diff
LOG: [ARM] PREDICATE_CAST demanded bits
The PREDICATE_CAST node is used to model moves between MVE predicate
registers and gpr's, and eventually become a VMSR p0, rn. When moving to
a predicate only the bottom 16 bits of the sources register are
demanded. This adds a simple fold for that, allowing it to potentially
remove instructions like uxth.
Differential Revision: https://reviews.llvm.org/D92213
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c94b9e64632f..0426a560805a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13844,6 +13844,13 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
}
+ // Only the bottom 16 bits of the source register are used.
+ if (Op.getValueType() == MVT::i32) {
+ APInt DemandedMask = APInt::getLowBitsSet(32, 16);
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
+ return SDValue(N, 0);
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
index fff9ad871027..c7e553fa3510 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
@@ -139,10 +139,9 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-LE-NEXT: mov r4, sp
; CHECK-LE-NEXT: bfc r4, #0, #4
; CHECK-LE-NEXT: mov sp, r4
-; CHECK-LE-NEXT: uxth r0, r0
; CHECK-LE-NEXT: sub.w r4, r7, #8
-; CHECK-LE-NEXT: vmov.i32 q1, #0x0
; CHECK-LE-NEXT: vmsr p0, r0
+; CHECK-LE-NEXT: vmov.i32 q1, #0x0
; CHECK-LE-NEXT: vpsel q0, q0, q1
; CHECK-LE-NEXT: mov sp, r4
; CHECK-LE-NEXT: pop {r4, r6, r7, pc}
@@ -160,7 +159,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-BE-NEXT: mov sp, r4
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: uxth r0, r0
; CHECK-BE-NEXT: sub.w r4, r7, #8
; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vmsr p0, r0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll
index afad0077bbe7..17f57743c301 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll
@@ -51,10 +51,8 @@ define arm_aapcs_vfpcc void @const(<8 x i16> %acc0, <8 x i16> %acc1, i32* nocapt
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r6, r7, lr}
; CHECK-NEXT: push {r4, r6, r7, lr}
-; CHECK-NEXT: uxth r2, r1
+; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: mvns r1, r1
-; CHECK-NEXT: vmsr p0, r2
-; CHECK-NEXT: uxth r1, r1
; CHECK-NEXT: vpstt
; CHECK-NEXT: vaddvt.s16 r12, q1
; CHECK-NEXT: vaddvt.s16 r2, q0
@@ -92,7 +90,6 @@ define arm_aapcs_vfpcc <4 x i32> @xorvpnot_i32(<4 x i32> %acc0, i16 signext %p0)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvns r0, r0
; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
@@ -109,7 +106,6 @@ define arm_aapcs_vfpcc <8 x i16> @xorvpnot_i16(<8 x i16> %acc0, i16 signext %p0)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvns r0, r0
; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
@@ -126,7 +122,6 @@ define arm_aapcs_vfpcc <16 x i8> @xorvpnot_i8(<16 x i8> %acc0, i16 signext %p0)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvns r0, r0
; CHECK-NEXT: vmov.i32 q1, #0x0
-; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
More information about the llvm-branch-commits
mailing list