[llvm] 96dcd8c - [LoongArch] Optimize bitwise and with immediates
Ben Shi via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 2 21:11:11 PDT 2023
Author: Ben Shi
Date: 2023-04-03T12:10:07+08:00
New Revision: 96dcd8cb9446492132dd3467ed2d3c84ecba5b25
URL: https://github.com/llvm/llvm-project/commit/96dcd8cb9446492132dd3467ed2d3c84ecba5b25
DIFF: https://github.com/llvm/llvm-project/commit/96dcd8cb9446492132dd3467ed2d3c84ecba5b25.diff
LOG: [LoongArch] Optimize bitwise and with immediates
Optimize bitfield extractions retaining bit positions
from 'lu12i + addi + and' to 'bstrpick + slli'.
Reviewed By: xen0n, SixWeining
Differential Revision: https://reviews.llvm.org/D147368
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/test/CodeGen/LoongArch/alloca.ll
llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
llvm/test/CodeGen/LoongArch/shrinkwrap.ll
llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4cf943ea5d1c..7f62a12e2b9d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1372,16 +1372,39 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
if (CN->getZExtValue() <= 0xfff)
return SDValue();
- // Return if the mask doesn't start at position 0.
- if (SMIdx)
+ // Return if the MSB exceeds.
+ if (SMIdx + SMLen > ValTy.getSizeInBits())
return SDValue();
- lsb = 0;
+ if (SMIdx > 0) {
+ // Omit if the constant has more than 2 uses. This a conservative
+ // decision. Whether it is a win depends on the HW microarchitecture.
+ // However it should always be better for 1 and 2 uses.
+ if (CN->use_size() > 2)
+ return SDValue();
+ // Return if the constant can be composed by a single LU12I.W.
+ if ((CN->getZExtValue() & 0xfff) == 0)
+ return SDValue();
+ // Return if the constand can be composed by a single ADDI with
+ // the zero register.
+ if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
+ return SDValue();
+ }
+
+ lsb = SMIdx;
NewOperand = FirstOperand;
}
+
msb = lsb + SMLen - 1;
- return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
- DAG.getConstant(msb, DL, GRLenVT),
+ SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
+ DAG.getConstant(msb, DL, GRLenVT),
+ DAG.getConstant(lsb, DL, GRLenVT));
+ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
+ return NR0;
+ // Try to optimize to
+ // bstrpick $Rd, $Rs, msb, lsb
+ // slli $Rd, $Rd, lsb
+ return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
DAG.getConstant(lsb, DL, GRLenVT));
}
diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll
index ca6508efd94e..22473098e8fb 100644
--- a/llvm/test/CodeGen/LoongArch/alloca.ll
+++ b/llvm/test/CodeGen/LoongArch/alloca.ll
@@ -34,11 +34,10 @@ define void @simple_alloca(i32 %n) nounwind {
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: addi.d $fp, $sp, 16
-; LA64-NEXT: addi.w $a1, $zero, -16
-; LA64-NEXT: lu32i.d $a1, 1
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
; LA64-NEXT: addi.d $a0, $a0, 15
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 32, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $sp, $a0
; LA64-NEXT: move $sp, $a0
; LA64-NEXT: bl %plt(notdead)
@@ -85,12 +84,11 @@ define void @scoped_alloca(i32 %n) nounwind {
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: addi.d $fp, $sp, 32
-; LA64-NEXT: addi.w $a1, $zero, -16
-; LA64-NEXT: lu32i.d $a1, 1
+; LA64-NEXT: move $s0, $sp
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
; LA64-NEXT: addi.d $a0, $a0, 15
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: move $s0, $sp
+; LA64-NEXT: bstrpick.d $a0, $a0, 32, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $sp, $a0
; LA64-NEXT: move $sp, $a0
; LA64-NEXT: bl %plt(notdead)
@@ -154,11 +152,10 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: addi.d $fp, $sp, 16
-; LA64-NEXT: addi.w $a1, $zero, -16
-; LA64-NEXT: lu32i.d $a1, 1
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
; LA64-NEXT: addi.d $a0, $a0, 15
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 32, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $sp, $a0
; LA64-NEXT: move $sp, $a0
; LA64-NEXT: addi.d $sp, $sp, -32
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
index ea6051faf20e..a038ddc450a0 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
@@ -269,16 +269,14 @@ entry:
define signext i32 @and_i32_0xfff0(i32 %a) {
; LA32-LABEL: and_i32_0xfff0:
; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a1, 15
-; LA32-NEXT: ori $a1, $a1, 4080
-; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT: slli.w $a0, $a0, 4
; LA32-NEXT: ret
;
; LA64-LABEL: and_i32_0xfff0:
; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a1, 15
-; LA64-NEXT: ori $a1, $a1, 4080
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: ret
%b = and i32 %a, 65520
ret i32 %b
@@ -287,19 +285,19 @@ define signext i32 @and_i32_0xfff0(i32 %a) {
define signext i32 @and_i32_0xfff0_twice(i32 %a, i32 %b) {
; LA32-LABEL: and_i32_0xfff0_twice:
; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a2, 15
-; LA32-NEXT: ori $a2, $a2, 4080
-; LA32-NEXT: and $a1, $a1, $a2
-; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 4
+; LA32-NEXT: slli.w $a1, $a1, 4
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT: slli.w $a0, $a0, 4
; LA32-NEXT: sub.w $a0, $a0, $a1
; LA32-NEXT: ret
;
; LA64-LABEL: and_i32_0xfff0_twice:
; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a2, 15
-; LA64-NEXT: ori $a2, $a2, 4080
-; LA64-NEXT: and $a1, $a1, $a2
-; LA64-NEXT: and $a0, $a0, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 4
+; LA64-NEXT: slli.d $a1, $a1, 4
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $a0, $a1
; LA64-NEXT: ret
%c = and i32 %a, 65520
@@ -311,17 +309,15 @@ define signext i32 @and_i32_0xfff0_twice(i32 %a, i32 %b) {
define i64 @and_i64_0xfff0(i64 %a) {
; LA32-LABEL: and_i64_0xfff0:
; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a1, 15
-; LA32-NEXT: ori $a1, $a1, 4080
-; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT: slli.w $a0, $a0, 4
; LA32-NEXT: move $a1, $zero
; LA32-NEXT: ret
;
; LA64-LABEL: and_i64_0xfff0:
; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a1, 15
-; LA64-NEXT: ori $a1, $a1, 4080
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: ret
%b = and i64 %a, 65520
ret i64 %b
@@ -330,21 +326,21 @@ define i64 @and_i64_0xfff0(i64 %a) {
define i64 @and_i64_0xfff0_twice(i64 %a, i64 %b) {
; LA32-LABEL: and_i64_0xfff0_twice:
; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a1, 15
-; LA32-NEXT: ori $a1, $a1, 4080
-; LA32-NEXT: and $a2, $a2, $a1
-; LA32-NEXT: and $a1, $a0, $a1
-; LA32-NEXT: sub.w $a0, $a1, $a2
-; LA32-NEXT: sltu $a1, $a1, $a2
+; LA32-NEXT: bstrpick.w $a1, $a2, 15, 4
+; LA32-NEXT: slli.w $a1, $a1, 4
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT: slli.w $a2, $a0, 4
+; LA32-NEXT: sub.w $a0, $a2, $a1
+; LA32-NEXT: sltu $a1, $a2, $a1
; LA32-NEXT: sub.w $a1, $zero, $a1
; LA32-NEXT: ret
;
; LA64-LABEL: and_i64_0xfff0_twice:
; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a2, 15
-; LA64-NEXT: ori $a2, $a2, 4080
-; LA64-NEXT: and $a1, $a1, $a2
-; LA64-NEXT: and $a0, $a0, $a2
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 4
+; LA64-NEXT: slli.d $a1, $a1, 4
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $a0, $a1
; LA64-NEXT: ret
%c = and i64 %a, 65520
diff --git a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
index 083806417e87..35f7c8c4773b 100644
--- a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
+++ b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
@@ -60,11 +60,10 @@ define void @conditional_alloca(i32 %n) nounwind {
; NOSHRINKW-NEXT: b .LBB1_1
; NOSHRINKW-NEXT: .LBB1_1: # %if.then
; NOSHRINKW-NEXT: ld.d $a0, $fp, -24 # 8-byte Folded Reload
-; NOSHRINKW-NEXT: addi.w $a1, $zero, -16
-; NOSHRINKW-NEXT: lu32i.d $a1, 1
; NOSHRINKW-NEXT: bstrpick.d $a0, $a0, 31, 0
; NOSHRINKW-NEXT: addi.d $a0, $a0, 15
-; NOSHRINKW-NEXT: and $a1, $a0, $a1
+; NOSHRINKW-NEXT: bstrpick.d $a0, $a0, 32, 4
+; NOSHRINKW-NEXT: slli.d $a1, $a0, 4
; NOSHRINKW-NEXT: move $a0, $sp
; NOSHRINKW-NEXT: sub.d $a0, $a0, $a1
; NOSHRINKW-NEXT: move $sp, $a0
@@ -87,10 +86,9 @@ define void @conditional_alloca(i32 %n) nounwind {
; SHRINKW-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; SHRINKW-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; SHRINKW-NEXT: addi.d $fp, $sp, 16
-; SHRINKW-NEXT: addi.w $a1, $zero, -16
-; SHRINKW-NEXT: lu32i.d $a1, 1
; SHRINKW-NEXT: addi.d $a0, $a0, 15
-; SHRINKW-NEXT: and $a0, $a0, $a1
+; SHRINKW-NEXT: bstrpick.d $a0, $a0, 32, 4
+; SHRINKW-NEXT: slli.d $a0, $a0, 4
; SHRINKW-NEXT: sub.d $a0, $sp, $a0
; SHRINKW-NEXT: move $sp, $a0
; SHRINKW-NEXT: bl %plt(notdead)
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
index 667a7aff8b9b..e149f3748d4a 100644
--- a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
@@ -51,11 +51,10 @@ define void @caller(i32 %n) {
; LA64-NEXT: srli.d $a1, $sp, 6
; LA64-NEXT: slli.d $sp, $a1, 6
; LA64-NEXT: move $s8, $sp
-; LA64-NEXT: addi.w $a1, $zero, -16
-; LA64-NEXT: lu32i.d $a1, 1
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
; LA64-NEXT: addi.d $a0, $a0, 15
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 32, 4
+; LA64-NEXT: slli.d $a0, $a0, 4
; LA64-NEXT: sub.d $a0, $sp, $a0
; LA64-NEXT: move $sp, $a0
; LA64-NEXT: addi.d $a1, $s8, 0
More information about the llvm-commits
mailing list