[llvm] [DAG][X86]added shrd in combineor for bzhiq+shlq+or (PR #125734)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 10:46:59 PDT 2025
https://github.com/shalini-nik updated https://github.com/llvm/llvm-project/pull/125734
>From 625e962bc4ee99755e26713c4cae84fc1263d353 Mon Sep 17 00:00:00 2001
From: shalininikhil <shalininikhil22 at gmail.com>
Date: Tue, 4 Feb 2025 17:56:47 +0000
Subject: [PATCH 01/10] [DAG][X86]added shrd in combineor for bzhiq+shlq+or
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +++++++++++++++++++
.../X86/shrdq-to-insert-into-bitfield.ll | 18 ++++++++++++
2 files changed, 47 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a956074e50d86..395c0f5504d1b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,6 +51887,35 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
+ if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){
+ SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1;
+ SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0;
+
+ if (OtherOp.getOpcode() == ISD::AND) {
+ SDValue andop = OtherOp;
+
+ if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){
+
+ SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1);
+ SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0);
+ auto *ConstRHS = dyn_cast<ConstantSDNode>(constOp);
+ uint64_t maskValue = ConstRHS->getZExtValue();
+ auto *ConstSHL = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ uint64_t shiftValue = ConstSHL->getZExtValue();
+
+ if((((uint64_t)1<<shiftValue)-1)==maskValue){
+ unsigned numbits = SHL.getScalarValueSizeInBits();
+ unsigned newshift=numbits-shiftValue;
+
+ SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,valueOp,DAG.getConstant(newshift, dl, MVT::i8));
+ SDValue R = DAG.getNode(ISD::FSHR,dl,VT,
+ SHL.getOperand(0),newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+ return R;
+ }
+ }
+ }
+ }
+
if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
return SetCC;
diff --git a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
new file mode 100644
index 0000000000000..cc205ee145d88
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s | FileCheck %s
+
+define dso_local i64 @updateTop10Bits(i64 noundef %A, i64 noundef %B) local_unnamed_addr #0 {
+; CHECK-LABEL: updateTop10Bits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shlq $10, %rax
+; CHECK-NEXT: shrdq $10, %rsi, %rax
+; CHECK-NEXT: retq
+entry:
+ %and = and i64 %A, 18014398509481983
+ %shl = shl i64 %B, 54
+ %or = or disjoint i64 %shl, %and
+ ret i64 %or
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
\ No newline at end of file
>From c4b87398da054139412e2e69037e8a5b5d7df39f Mon Sep 17 00:00:00 2001
From: shalininikhil <shalininikhil22 at gmail.com>
Date: Wed, 5 Feb 2025 12:16:54 +0000
Subject: [PATCH 02/10] using sd_match to match this pattern
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 39 ++++++++-----------------
1 file changed, 12 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 395c0f5504d1b..c4624f9e1a5d5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,33 +51887,18 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
- if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){
- SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1;
- SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0;
-
- if (OtherOp.getOpcode() == ISD::AND) {
- SDValue andop = OtherOp;
-
- if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){
-
- SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1);
- SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0);
- auto *ConstRHS = dyn_cast<ConstantSDNode>(constOp);
- uint64_t maskValue = ConstRHS->getZExtValue();
- auto *ConstSHL = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- uint64_t shiftValue = ConstSHL->getZExtValue();
-
- if((((uint64_t)1<<shiftValue)-1)==maskValue){
- unsigned numbits = SHL.getScalarValueSizeInBits();
- unsigned newshift=numbits-shiftValue;
-
- SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,valueOp,DAG.getConstant(newshift, dl, MVT::i8));
- SDValue R = DAG.getNode(ISD::FSHR,dl,VT,
- SHL.getOperand(0),newSHL,DAG.getConstant(newshift, dl, MVT::i8));
- return R;
- }
- }
- }
+ using namespace llvm::SDPatternMatch;
+ APInt MaskConst,ShlConst;
+ SDValue A, B;
+ if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
+ uint64_t shiftValue = ShlConst.getZExtValue();
+ if(MaskConst.isMask(shiftValue)){
+ unsigned numbits = B.getScalarValueSizeInBits();
+ unsigned newshift=numbits-shiftValue;
+ SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,A,DAG.getConstant(newshift, dl, MVT::i8));
+ SDValue R = DAG.getNode(ISD::FSHR,dl,VT,B,newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+ return R;
+ }
}
if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
>From 6d3f1145c1c1b9c05feb9e94d03f85b6b96242e0 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Thu, 13 Feb 2025 13:00:37 +0530
Subject: [PATCH 03/10] formated the code with clang-format and Replaced both
getConstant with getShiftAmountConstant
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c4624f9e1a5d5..c283974dd91de 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51891,12 +51891,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
APInt MaskConst,ShlConst;
SDValue A, B;
if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
- uint64_t shiftValue = ShlConst.getZExtValue();
- if(MaskConst.isMask(shiftValue)){
- unsigned numbits = B.getScalarValueSizeInBits();
- unsigned newshift=numbits-shiftValue;
- SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,A,DAG.getConstant(newshift, dl, MVT::i8));
- SDValue R = DAG.getNode(ISD::FSHR,dl,VT,B,newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+ uint64_t ShiftValue = ShlConst.getZExtValue();
+ if (MaskConst.isMask(ShiftValue)) {
+ unsigned NumBits = B.getScalarValueSizeInBits();
+ unsigned NewShift = NumBits - ShiftValue;
+ SDValue NewSHL = DAG.getNode(
+ ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
+ SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
return R;
}
}
>From d8b8f1212b5342345c678d4c155670bdbda4645b Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Fri, 14 Feb 2025 12:42:11 +0530
Subject: [PATCH 04/10] [X86] adding checks for slow-shld and fixing the
clang-format warnings
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 31 ++++++++++++++-----------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c283974dd91de..6dc7216af450a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,22 +51887,25 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
- using namespace llvm::SDPatternMatch;
- APInt MaskConst,ShlConst;
- SDValue A, B;
- if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
- uint64_t ShiftValue = ShlConst.getZExtValue();
- if (MaskConst.isMask(ShiftValue)) {
- unsigned NumBits = B.getScalarValueSizeInBits();
- unsigned NewShift = NumBits - ShiftValue;
- SDValue NewSHL = DAG.getNode(
- ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
- SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
- DAG.getShiftAmountConstant(NewShift, VT, dl));
- return R;
+ if (!Subtarget.isSHLDSlow()) {
+ using namespace llvm::SDPatternMatch;
+ APInt MaskConst, ShlConst;
+ SDValue A, B;
+ if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)),
+ m_And(m_Value(A), m_ConstInt(MaskConst))))) {
+ uint64_t ShiftValue = ShlConst.getZExtValue();
+ if (MaskConst.isMask(ShiftValue)) {
+ unsigned NumBits = B.getScalarValueSizeInBits();
+ unsigned NewShift = NumBits - ShiftValue;
+ SDValue NewSHL = DAG.getNode(
+ ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
+ SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
+ return R;
+ }
}
}
-
+
if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
return SetCC;
>From c4d62a195fa93f228e940a8614634ccf77d85839 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Fri, 14 Feb 2025 12:43:10 +0530
Subject: [PATCH 05/10] [X86] updating test cases
---
llvm/test/CodeGen/X86/insert-bitfield.ll | 25 +++++++++++++++++++
.../X86/shrdq-to-insert-into-bitfield.ll | 18 -------------
2 files changed, 25 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/insert-bitfield.ll
delete mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll
new file mode 100644
index 0000000000000..828744604f084
--- /dev/null
+++ b/llvm/test/CodeGen/X86/insert-bitfield.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW
+
+define i64 @updateTop10Bits(i64 %A, i64 %B) {
+; FAST-LABEL: updateTop10Bits:
+; FAST: # %bb.0:
+; FAST-NEXT: movq %rdi, %rax
+; FAST-NEXT: shlq $10, %rax
+; FAST-NEXT: shrdq $10, %rsi, %rax
+; FAST-NEXT: retq
+;
+; SLOW-LABEL: updateTop10Bits:
+; SLOW: # %bb.0:
+; SLOW-NEXT: movabsq $18014398509481983, %rax
+; SLOW-NEXT: andq %rdi, %rax
+; SLOW-NEXT: shlq $54, %rsi
+; SLOW-NEXT: orq %rsi, %rax
+; SLOW-NEXT: retq
+entry:
+ %and = and i64 %A, 18014398509481983
+ %shl = shl i64 %B, 54
+ %or = or disjoint i64 %shl, %and
+ ret i64 %or
+}
diff --git a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
deleted file mode 100644
index cc205ee145d88..0000000000000
--- a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 < %s | FileCheck %s
-
-define dso_local i64 @updateTop10Bits(i64 noundef %A, i64 noundef %B) local_unnamed_addr #0 {
-; CHECK-LABEL: updateTop10Bits:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: shlq $10, %rax
-; CHECK-NEXT: shrdq $10, %rsi, %rax
-; CHECK-NEXT: retq
-entry:
- %and = and i64 %A, 18014398509481983
- %shl = shl i64 %B, 54
- %or = or disjoint i64 %shl, %and
- ret i64 %or
-}
-
-attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
\ No newline at end of file
>From de68b5fbb1f2fdbd92ea026c3aa185acd41f0912 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Wed, 19 Mar 2025 07:01:03 +0530
Subject: [PATCH 06/10] [X86] added checks for operands of and & shl
instructions
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6dc7216af450a..c9a74ade25ace 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51894,7 +51894,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)),
m_And(m_Value(A), m_ConstInt(MaskConst))))) {
uint64_t ShiftValue = ShlConst.getZExtValue();
- if (MaskConst.isMask(ShiftValue)) {
+ if (MaskConst.isMask(ShiftValue) && (A.getOpcode() == ISD::CopyFromReg) &&
+ (B.getOpcode() == ISD::CopyFromReg)) {
unsigned NumBits = B.getScalarValueSizeInBits();
unsigned NewShift = NumBits - ShiftValue;
SDValue NewSHL = DAG.getNode(
@@ -51903,6 +51904,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
DAG.getShiftAmountConstant(NewShift, VT, dl));
return R;
}
+ if (MaskConst.isMask(ShiftValue) &&
+ (A.getOpcode() == ISD::TRUNCATE &&
+ A.getOperand(0).getOpcode() == ISD::CopyFromReg) &&
+ (B.getOpcode() == ISD::TRUNCATE &&
+ B.getOperand(0).getOpcode() == ISD::CopyFromReg)) {
+ unsigned NumBits = B.getScalarValueSizeInBits();
+ unsigned NewShift = NumBits - ShiftValue;
+ if (ShiftValue > 4 && ShiftValue != 8 && ShiftValue != 16 &&
+ ShiftValue != 32 && ShiftValue != 64) {
+ SDValue NewSHL =
+ DAG.getNode(ISD::SHL, dl, VT, A,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
+ SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
+ return R;
+ }
+ }
}
}
>From 7ffbcd63815a43fe84ac71c4ba54f8d961e4d546 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Wed, 19 Mar 2025 07:13:41 +0530
Subject: [PATCH 07/10] [X86] added 32,16,8 bit test cases
---
llvm/test/CodeGen/X86/insert-bitfield.ll | 97 ++++++++++++++++++++----
1 file changed, 81 insertions(+), 16 deletions(-)
diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll
index 828744604f084..2d8825dab2974 100644
--- a/llvm/test/CodeGen/X86/insert-bitfield.ll
+++ b/llvm/test/CodeGen/X86/insert-bitfield.ll
@@ -1,25 +1,90 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
-define i64 @updateTop10Bits(i64 %A, i64 %B) {
-; FAST-LABEL: updateTop10Bits:
-; FAST: # %bb.0:
-; FAST-NEXT: movq %rdi, %rax
-; FAST-NEXT: shlq $10, %rax
-; FAST-NEXT: shrdq $10, %rsi, %rax
-; FAST-NEXT: retq
+define i64 @updateTop10Bits_64bits(i64 %A, i64 %B) {
+; X64-FAST-LABEL: updateTop10Bits_64bits:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movq %rdi, %rax
+; X64-FAST-NEXT: shlq $10, %rax
+; X64-FAST-NEXT: shrdq $10, %rsi, %rax
+; X64-FAST-NEXT: retq
;
-; SLOW-LABEL: updateTop10Bits:
-; SLOW: # %bb.0:
-; SLOW-NEXT: movabsq $18014398509481983, %rax
-; SLOW-NEXT: andq %rdi, %rax
-; SLOW-NEXT: shlq $54, %rsi
-; SLOW-NEXT: orq %rsi, %rax
-; SLOW-NEXT: retq
+; X64-SLOW-LABEL: updateTop10Bits_64bits:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movabsq $18014398509481983, %rax
+; X64-SLOW-NEXT: andq %rdi, %rax
+; X64-SLOW-NEXT: shlq $54, %rsi
+; X64-SLOW-NEXT: orq %rsi, %rax
+; X64-SLOW-NEXT: retq
entry:
%and = and i64 %A, 18014398509481983
%shl = shl i64 %B, 54
%or = or disjoint i64 %shl, %and
ret i64 %or
}
+
+define i32 @updateTop10Bits_32bits(i32 %A, i32 %B) {
+; X64-FAST-LABEL: updateTop10Bits_32bits:
+; X64-FAST: # %bb.0: # %entry
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shll $10, %eax
+; X64-FAST-NEXT: shrdl $10, %esi, %eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: updateTop10Bits_32bits:
+; X64-SLOW: # %bb.0: # %entry
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: andl $4194303, %edi # imm = 0x3FFFFF
+; X64-SLOW-NEXT: shll $22, %esi
+; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
+; X64-SLOW-NEXT: retq
+entry:
+ %and = and i32 %A, 4194303
+ %shl = shl i32 %B, 22
+ %or = or disjoint i32 %shl, %and
+ ret i32 %or
+}
+
+define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) {
+; X64-FAST-LABEL: updateTop10Bits_16bits:
+; X64-FAST: # %bb.0: # %entry
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shll $10, %eax
+; X64-FAST-NEXT: shrdw $10, %si, %ax
+; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: updateTop10Bits_16bits:
+; X64-SLOW: # %bb.0: # %entry
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: andl $63, %edi
+; X64-SLOW-NEXT: shll $6, %esi
+; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
+; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT: retq
+entry:
+ %and = and i16 %A, 63
+ %shl = shl i16 %B, 6
+ %or = or disjoint i16 %shl, %and
+ ret i16 %or
+}
+
+define i8 @updateTop3Bits_8bits(i8 %A, i8 %B) {
+; X64-LABEL: updateTop3Bits_8bits:
+; X64: # %bb.0: # %entry
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $7, %dil
+; X64-NEXT: leal (,%rsi,8), %eax
+; X64-NEXT: orb %dil, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+
+entry:
+ %and = and i8 %A, 7
+ %shl = shl i8 %B, 3
+ %or = or disjoint i8 %shl, %and
+ ret i8 %or
+}
\ No newline at end of file
>From e014f9bc2d847ff41ee47f1374a30ce5de7e256e Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Thu, 20 Mar 2025 00:00:39 +0530
Subject: [PATCH 08/10] [X86] adding missed checks
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c9a74ade25ace..c05417d636d92 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51898,11 +51898,15 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
(B.getOpcode() == ISD::CopyFromReg)) {
unsigned NumBits = B.getScalarValueSizeInBits();
unsigned NewShift = NumBits - ShiftValue;
- SDValue NewSHL = DAG.getNode(
- ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
- SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
- DAG.getShiftAmountConstant(NewShift, VT, dl));
- return R;
+ if (ShiftValue > 4 && ShiftValue != 8 && ShiftValue != 16 &&
+ ShiftValue != 32 && ShiftValue != 64) {
+ SDValue NewSHL =
+ DAG.getNode(ISD::SHL, dl, VT, A,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
+ SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+ DAG.getShiftAmountConstant(NewShift, VT, dl));
+ return R;
+ }
}
if (MaskConst.isMask(ShiftValue) &&
(A.getOpcode() == ISD::TRUNCATE &&
>From d7591521b8283d70f89cb3ddb51e7a8d62da6b12 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Thu, 20 Mar 2025 20:03:17 +0530
Subject: [PATCH 09/10] [X86] removed 2 spaces and entry: from test cases
---
llvm/test/CodeGen/X86/insert-bitfield.ll | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll
index 2d8825dab2974..0f8a589d7eb4d 100644
--- a/llvm/test/CodeGen/X86/insert-bitfield.ll
+++ b/llvm/test/CodeGen/X86/insert-bitfield.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
-define i64 @updateTop10Bits_64bits(i64 %A, i64 %B) {
+define i64 @updateTop10Bits_64bits(i64 %A, i64 %B) {
; X64-FAST-LABEL: updateTop10Bits_64bits:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movq %rdi, %rax
@@ -17,39 +17,37 @@ define i64 @updateTop10Bits_64bits(i64 %A, i64 %B) {
; X64-SLOW-NEXT: shlq $54, %rsi
; X64-SLOW-NEXT: orq %rsi, %rax
; X64-SLOW-NEXT: retq
-entry:
%and = and i64 %A, 18014398509481983
%shl = shl i64 %B, 54
%or = or disjoint i64 %shl, %and
ret i64 %or
}
-define i32 @updateTop10Bits_32bits(i32 %A, i32 %B) {
+define i32 @updateTop10Bits_32bits(i32 %A, i32 %B) {
; X64-FAST-LABEL: updateTop10Bits_32bits:
-; X64-FAST: # %bb.0: # %entry
+; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movl %edi, %eax
; X64-FAST-NEXT: shll $10, %eax
; X64-FAST-NEXT: shrdl $10, %esi, %eax
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: updateTop10Bits_32bits:
-; X64-SLOW: # %bb.0: # %entry
+; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SLOW-NEXT: andl $4194303, %edi # imm = 0x3FFFFF
; X64-SLOW-NEXT: shll $22, %esi
; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
; X64-SLOW-NEXT: retq
-entry:
%and = and i32 %A, 4194303
%shl = shl i32 %B, 22
%or = or disjoint i32 %shl, %and
ret i32 %or
}
-define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) {
+define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) {
; X64-FAST-LABEL: updateTop10Bits_16bits:
-; X64-FAST: # %bb.0: # %entry
+; X64-FAST: # %bb.0:
; X64-FAST-NEXT: movl %edi, %eax
; X64-FAST-NEXT: shll $10, %eax
; X64-FAST-NEXT: shrdw $10, %si, %ax
@@ -57,7 +55,7 @@ define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) {
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: updateTop10Bits_16bits:
-; X64-SLOW: # %bb.0: # %entry
+; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SLOW-NEXT: andl $63, %edi
@@ -65,24 +63,21 @@ define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) {
; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
-entry:
%and = and i16 %A, 63
%shl = shl i16 %B, 6
%or = or disjoint i16 %shl, %and
ret i16 %or
}
-define i8 @updateTop3Bits_8bits(i8 %A, i8 %B) {
+define i8 @updateTop3Bits_8bits(i8 %A, i8 %B) {
; X64-LABEL: updateTop3Bits_8bits:
-; X64: # %bb.0: # %entry
+; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: andb $7, %dil
; X64-NEXT: leal (,%rsi,8), %eax
; X64-NEXT: orb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
-
-entry:
%and = and i8 %A, 7
%shl = shl i8 %B, 3
%or = or disjoint i8 %shl, %and
>From bc588fddfd193c2cd0ae4768d8179813233059b3 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Wed, 26 Mar 2025 23:16:27 +0530
Subject: [PATCH 10/10] [X86] adding comments for the path
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c05417d636d92..e97517b25ccbf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,6 +51887,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
+ // Match pattern to identify a shift-left combined with
+ // a bitwise AND
+ // (B << ShiftConst) | (A & MaskConst)
+ //
+ // only if the SHLD instruction is not slow on this subtarget.
if (!Subtarget.isSHLDSlow()) {
using namespace llvm::SDPatternMatch;
APInt MaskConst, ShlConst;
@@ -51894,10 +51899,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)),
m_And(m_Value(A), m_ConstInt(MaskConst))))) {
uint64_t ShiftValue = ShlConst.getZExtValue();
+ // Check if the mask is a valid bit mask of the given shift value and both
+ // inputs come from registers
if (MaskConst.isMask(ShiftValue) && (A.getOpcode() == ISD::CopyFromReg) &&
(B.getOpcode() == ISD::CopyFromReg)) {
unsigned NumBits = B.getScalarValueSizeInBits();
unsigned NewShift = NumBits - ShiftValue;
+ // Prefers `LEA` instead of `SHL` for power-of-2 shifts, so only
+ // transform non-power-of-2 shifts
if (ShiftValue > 4 && ShiftValue != 8 && ShiftValue != 16 &&
ShiftValue != 32 && ShiftValue != 64) {
SDValue NewSHL =
@@ -51908,6 +51917,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return R;
}
}
+
+ // Handle the case where A and B are truncated values from registers
if (MaskConst.isMask(ShiftValue) &&
(A.getOpcode() == ISD::TRUNCATE &&
A.getOperand(0).getOpcode() == ISD::CopyFromReg) &&
More information about the llvm-commits
mailing list