[llvm] [DAG][X86]added shrd in combineor for bzhiq+shlq+or (PR #125734)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 11:31:04 PDT 2025


https://github.com/shalini-nik updated https://github.com/llvm/llvm-project/pull/125734

>From 625e962bc4ee99755e26713c4cae84fc1263d353 Mon Sep 17 00:00:00 2001
From: shalininikhil <shalininikhil22 at gmail.com>
Date: Tue, 4 Feb 2025 17:56:47 +0000
Subject: [PATCH 1/8] [DAG][X86]added shrd in combineor for bzhiq+shlq+or

---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 29 +++++++++++++++++++
 .../X86/shrdq-to-insert-into-bitfield.ll      | 18 ++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a956074e50d86..395c0f5504d1b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,6 +51887,35 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){
+    SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1;
+    SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0;
+    
+    if (OtherOp.getOpcode() == ISD::AND) {
+      SDValue andop = OtherOp;
+      
+      if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){
+              
+              SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1);
+              SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0);
+              auto *ConstRHS = dyn_cast<ConstantSDNode>(constOp);
+              uint64_t maskValue = ConstRHS->getZExtValue();
+              auto *ConstSHL = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+              uint64_t shiftValue = ConstSHL->getZExtValue();
+              
+              if((((uint64_t)1<<shiftValue)-1)==maskValue){
+                      unsigned numbits = SHL.getScalarValueSizeInBits();
+                      unsigned newshift=numbits-shiftValue;
+                      
+                      SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,valueOp,DAG.getConstant(newshift, dl, MVT::i8));
+                      SDValue R = DAG.getNode(ISD::FSHR,dl,VT,
+                                    SHL.getOperand(0),newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+                      return R;
+            }
+          }
+      }
+  }
+  
   if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
     return SetCC;
 
diff --git a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
new file mode 100644
index 0000000000000..cc205ee145d88
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s | FileCheck %s
+
+define dso_local i64 @updateTop10Bits(i64 noundef %A, i64 noundef %B) local_unnamed_addr #0 {
+; CHECK-LABEL: updateTop10Bits:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq	%rdi, %rax
+; CHECK-NEXT:    shlq	$10, %rax
+; CHECK-NEXT:    shrdq	$10, %rsi, %rax
+; CHECK-NEXT:    retq
+entry:
+  %and = and i64 %A, 18014398509481983
+  %shl = shl i64 %B, 54
+  %or = or disjoint i64 %shl, %and
+  ret i64 %or
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
\ No newline at end of file

>From c4b87398da054139412e2e69037e8a5b5d7df39f Mon Sep 17 00:00:00 2001
From: shalininikhil <shalininikhil22 at gmail.com>
Date: Wed, 5 Feb 2025 12:16:54 +0000
Subject: [PATCH 2/8] using sd_match to match this pattern

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 39 ++++++++-----------------
 1 file changed, 12 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 395c0f5504d1b..c4624f9e1a5d5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,33 +51887,18 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){
-    SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1;
-    SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0;
-    
-    if (OtherOp.getOpcode() == ISD::AND) {
-      SDValue andop = OtherOp;
-      
-      if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){
-              
-              SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1);
-              SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0);
-              auto *ConstRHS = dyn_cast<ConstantSDNode>(constOp);
-              uint64_t maskValue = ConstRHS->getZExtValue();
-              auto *ConstSHL = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
-              uint64_t shiftValue = ConstSHL->getZExtValue();
-              
-              if((((uint64_t)1<<shiftValue)-1)==maskValue){
-                      unsigned numbits = SHL.getScalarValueSizeInBits();
-                      unsigned newshift=numbits-shiftValue;
-                      
-                      SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,valueOp,DAG.getConstant(newshift, dl, MVT::i8));
-                      SDValue R = DAG.getNode(ISD::FSHR,dl,VT,
-                                    SHL.getOperand(0),newSHL,DAG.getConstant(newshift, dl, MVT::i8));
-                      return R;
-            }
-          }
-      }
+  using namespace llvm::SDPatternMatch;
+  APInt MaskConst,ShlConst;
+  SDValue A, B;
+  if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
+    uint64_t shiftValue = ShlConst.getZExtValue();
+    if(MaskConst.isMask(shiftValue)){
+      unsigned numbits = B.getScalarValueSizeInBits();
+      unsigned newshift=numbits-shiftValue;
+      SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,A,DAG.getConstant(newshift, dl, MVT::i8));
+      SDValue R = DAG.getNode(ISD::FSHR,dl,VT,B,newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+      return R;
+    }
   }
   
   if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))

>From 6d3f1145c1c1b9c05feb9e94d03f85b6b96242e0 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Thu, 13 Feb 2025 13:00:37 +0530
Subject: [PATCH 3/8] formated the code with clang-format and Replaced both
 getConstant with getShiftAmountConstant

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c4624f9e1a5d5..c283974dd91de 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51891,12 +51891,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   APInt MaskConst,ShlConst;
   SDValue A, B;
   if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
-    uint64_t shiftValue = ShlConst.getZExtValue();
-    if(MaskConst.isMask(shiftValue)){
-      unsigned numbits = B.getScalarValueSizeInBits();
-      unsigned newshift=numbits-shiftValue;
-      SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,A,DAG.getConstant(newshift, dl, MVT::i8));
-      SDValue R = DAG.getNode(ISD::FSHR,dl,VT,B,newSHL,DAG.getConstant(newshift, dl, MVT::i8));
+    uint64_t ShiftValue = ShlConst.getZExtValue();
+    if (MaskConst.isMask(ShiftValue)) {
+      unsigned NumBits = B.getScalarValueSizeInBits();
+      unsigned NewShift = NumBits - ShiftValue;
+      SDValue NewSHL = DAG.getNode(
+          ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
+      SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+                              DAG.getShiftAmountConstant(NewShift, VT, dl));
       return R;
     }
   }

>From d8b8f1212b5342345c678d4c155670bdbda4645b Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Fri, 14 Feb 2025 12:42:11 +0530
Subject: [PATCH 4/8] [X86] adding checks for slow-shld and fixing the
 clang-format warnings

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 31 ++++++++++++++-----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c283974dd91de..6dc7216af450a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51887,22 +51887,25 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  using namespace llvm::SDPatternMatch;
-  APInt MaskConst,ShlConst;
-  SDValue A, B;
-  if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){
-    uint64_t ShiftValue = ShlConst.getZExtValue();
-    if (MaskConst.isMask(ShiftValue)) {
-      unsigned NumBits = B.getScalarValueSizeInBits();
-      unsigned NewShift = NumBits - ShiftValue;
-      SDValue NewSHL = DAG.getNode(
-          ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
-      SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
-                              DAG.getShiftAmountConstant(NewShift, VT, dl));
-      return R;
+  if (!Subtarget.isSHLDSlow()) {
+    using namespace llvm::SDPatternMatch;
+    APInt MaskConst, ShlConst;
+    SDValue A, B;
+    if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)),
+                         m_And(m_Value(A), m_ConstInt(MaskConst))))) {
+      uint64_t ShiftValue = ShlConst.getZExtValue();
+      if (MaskConst.isMask(ShiftValue)) {
+        unsigned NumBits = B.getScalarValueSizeInBits();
+        unsigned NewShift = NumBits - ShiftValue;
+        SDValue NewSHL = DAG.getNode(
+            ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
+        SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+                                DAG.getShiftAmountConstant(NewShift, VT, dl));
+        return R;
+      }
     }
   }
-  
+
   if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
     return SetCC;
 

>From c4d62a195fa93f228e940a8614634ccf77d85839 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Fri, 14 Feb 2025 12:43:10 +0530
Subject: [PATCH 5/8] [X86] updating test cases

---
 llvm/test/CodeGen/X86/insert-bitfield.ll      | 25 +++++++++++++++++++
 .../X86/shrdq-to-insert-into-bitfield.ll      | 18 -------------
 2 files changed, 25 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/insert-bitfield.ll
 delete mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll

diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll
new file mode 100644
index 0000000000000..828744604f084
--- /dev/null
+++ b/llvm/test/CodeGen/X86/insert-bitfield.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW
+
+define  i64 @updateTop10Bits(i64  %A, i64  %B) {
+; FAST-LABEL: updateTop10Bits:
+; FAST:       # %bb.0:
+; FAST-NEXT:    movq	%rdi, %rax
+; FAST-NEXT:    shlq	$10, %rax
+; FAST-NEXT:    shrdq	$10, %rsi, %rax
+; FAST-NEXT:    retq
+;
+; SLOW-LABEL: updateTop10Bits:
+; SLOW:       # %bb.0:
+; SLOW-NEXT:    movabsq $18014398509481983, %rax
+; SLOW-NEXT:    andq    %rdi, %rax
+; SLOW-NEXT:    shlq    $54, %rsi
+; SLOW-NEXT:    orq     %rsi, %rax
+; SLOW-NEXT:    retq
+entry:
+  %and = and i64 %A, 18014398509481983
+  %shl = shl i64 %B, 54
+  %or = or disjoint i64 %shl, %and
+  ret i64 %or
+}
diff --git a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
deleted file mode 100644
index cc205ee145d88..0000000000000
--- a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 < %s | FileCheck %s
-
-define dso_local i64 @updateTop10Bits(i64 noundef %A, i64 noundef %B) local_unnamed_addr #0 {
-; CHECK-LABEL: updateTop10Bits:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq	%rdi, %rax
-; CHECK-NEXT:    shlq	$10, %rax
-; CHECK-NEXT:    shrdq	$10, %rsi, %rax
-; CHECK-NEXT:    retq
-entry:
-  %and = and i64 %A, 18014398509481983
-  %shl = shl i64 %B, 54
-  %or = or disjoint i64 %shl, %and
-  ret i64 %or
-}
-
-attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
\ No newline at end of file

>From de68b5fbb1f2fdbd92ea026c3aa185acd41f0912 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Wed, 19 Mar 2025 07:01:03 +0530
Subject: [PATCH 6/8] [X86] added checks for operands of and & shl instructions

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6dc7216af450a..c9a74ade25ace 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51894,7 +51894,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)),
                          m_And(m_Value(A), m_ConstInt(MaskConst))))) {
       uint64_t ShiftValue = ShlConst.getZExtValue();
-      if (MaskConst.isMask(ShiftValue)) {
+      if (MaskConst.isMask(ShiftValue) && (A.getOpcode() == ISD::CopyFromReg) &&
+          (B.getOpcode() == ISD::CopyFromReg)) {
         unsigned NumBits = B.getScalarValueSizeInBits();
         unsigned NewShift = NumBits - ShiftValue;
         SDValue NewSHL = DAG.getNode(
@@ -51903,6 +51904,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
                                 DAG.getShiftAmountConstant(NewShift, VT, dl));
         return R;
       }
+      if (MaskConst.isMask(ShiftValue) &&
+          (A.getOpcode() == ISD::TRUNCATE &&
+           A.getOperand(0).getOpcode() == ISD::CopyFromReg) &&
+          (B.getOpcode() == ISD::TRUNCATE &&
+           B.getOperand(0).getOpcode() == ISD::CopyFromReg)) {
+        unsigned NumBits = B.getScalarValueSizeInBits();
+        unsigned NewShift = NumBits - ShiftValue;
+        if (ShiftValue > 4 && ShiftValue != 8 && ShiftValue != 16 &&
+            ShiftValue != 32 && ShiftValue != 64) {
+          SDValue NewSHL =
+              DAG.getNode(ISD::SHL, dl, VT, A,
+                          DAG.getShiftAmountConstant(NewShift, VT, dl));
+          SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+                                  DAG.getShiftAmountConstant(NewShift, VT, dl));
+          return R;
+        }
+      }
     }
   }
 

>From 7ffbcd63815a43fe84ac71c4ba54f8d961e4d546 Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Wed, 19 Mar 2025 07:13:41 +0530
Subject: [PATCH 7/8] [X86] added 32,16,8 bit test cases

---
 llvm/test/CodeGen/X86/insert-bitfield.ll | 97 ++++++++++++++++++++----
 1 file changed, 81 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll
index 828744604f084..2d8825dab2974 100644
--- a/llvm/test/CodeGen/X86/insert-bitfield.ll
+++ b/llvm/test/CodeGen/X86/insert-bitfield.ll
@@ -1,25 +1,90 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
 
-define  i64 @updateTop10Bits(i64  %A, i64  %B) {
-; FAST-LABEL: updateTop10Bits:
-; FAST:       # %bb.0:
-; FAST-NEXT:    movq	%rdi, %rax
-; FAST-NEXT:    shlq	$10, %rax
-; FAST-NEXT:    shrdq	$10, %rsi, %rax
-; FAST-NEXT:    retq
+define  i64 @updateTop10Bits_64bits(i64  %A, i64  %B) {
+; X64-FAST-LABEL: updateTop10Bits_64bits:
+; X64-FAST:         # %bb.0:
+; X64-FAST-NEXT:    movq	%rdi, %rax
+; X64-FAST-NEXT:    shlq	$10, %rax
+; X64-FAST-NEXT:    shrdq	$10, %rsi, %rax
+; X64-FAST-NEXT:    retq
 ;
-; SLOW-LABEL: updateTop10Bits:
-; SLOW:       # %bb.0:
-; SLOW-NEXT:    movabsq $18014398509481983, %rax
-; SLOW-NEXT:    andq    %rdi, %rax
-; SLOW-NEXT:    shlq    $54, %rsi
-; SLOW-NEXT:    orq     %rsi, %rax
-; SLOW-NEXT:    retq
+; X64-SLOW-LABEL: updateTop10Bits_64bits:
+; X64-SLOW:         # %bb.0:
+; X64-SLOW-NEXT:    movabsq $18014398509481983, %rax
+; X64-SLOW-NEXT:    andq    %rdi, %rax
+; X64-SLOW-NEXT:    shlq    $54, %rsi
+; X64-SLOW-NEXT:    orq     %rsi, %rax
+; X64-SLOW-NEXT:    retq
 entry:
   %and = and i64 %A, 18014398509481983
   %shl = shl i64 %B, 54
   %or = or disjoint i64 %shl, %and
   ret i64 %or
 }
+
+define  i32 @updateTop10Bits_32bits(i32  %A, i32  %B) {
+; X64-FAST-LABEL: updateTop10Bits_32bits:
+; X64-FAST:         # %bb.0:    # %entry
+; X64-FAST-NEXT:    movl	%edi, %eax
+; X64-FAST-NEXT:    shll	$10, %eax
+; X64-FAST-NEXT:    shrdl	$10, %esi, %eax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: updateTop10Bits_32bits:
+; X64-SLOW:         # %bb.0:    # %entry
+; X64-SLOW-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT:    andl	$4194303, %edi    # imm = 0x3FFFFF
+; X64-SLOW-NEXT:    shll	$22, %esi
+; X64-SLOW-NEXT:    leal	(%rsi,%rdi), %eax
+; X64-SLOW-NEXT:    retq
+entry:
+  %and = and i32 %A, 4194303
+  %shl = shl i32 %B, 22
+  %or = or disjoint i32 %shl, %and
+  ret i32 %or
+}
+
+define  i16 @updateTop10Bits_16bits(i16  %A, i16  %B) {
+; X64-FAST-LABEL: updateTop10Bits_16bits:
+; X64-FAST:         # %bb.0:    # %entry
+; X64-FAST-NEXT:    movl	%edi, %eax
+; X64-FAST-NEXT:    shll	$10, %eax
+; X64-FAST-NEXT:    shrdw	$10, %si, %ax
+; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: updateTop10Bits_16bits:
+; X64-SLOW:         # %bb.0:    # %entry
+; X64-SLOW-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT:    andl	$63, %edi
+; X64-SLOW-NEXT:    shll	$6, %esi
+; X64-SLOW-NEXT:    leal	(%rsi,%rdi), %eax
+; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT:    retq
+entry:
+  %and = and i16 %A, 63
+  %shl = shl i16 %B, 6
+  %or = or disjoint i16 %shl, %and
+  ret i16 %or
+}
+
+define  i8 @updateTop3Bits_8bits(i8  %A, i8  %B) {
+; X64-LABEL: updateTop3Bits_8bits:                   
+; X64:         # %bb.0:    # %entry
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:	  andb	$7, %dil
+; X64-NEXT:	  leal    (,%rsi,8), %eax
+; X64-NEXT:	  orb     %dil, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:	retq
+
+entry:
+  %and = and i8 %A, 7
+  %shl = shl i8 %B, 3
+  %or = or disjoint i8 %shl, %and
+  ret i8 %or
+}
\ No newline at end of file

>From e014f9bc2d847ff41ee47f1374a30ce5de7e256e Mon Sep 17 00:00:00 2001
From: shalini-nik <shalininikhil22 at gmail.com>
Date: Thu, 20 Mar 2025 00:00:39 +0530
Subject: [PATCH 8/8] [X86] adding missed checks

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c9a74ade25ace..c05417d636d92 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51898,11 +51898,15 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
           (B.getOpcode() == ISD::CopyFromReg)) {
         unsigned NumBits = B.getScalarValueSizeInBits();
         unsigned NewShift = NumBits - ShiftValue;
-        SDValue NewSHL = DAG.getNode(
-            ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl));
-        SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
-                                DAG.getShiftAmountConstant(NewShift, VT, dl));
-        return R;
+        if (ShiftValue > 4 && ShiftValue != 8 && ShiftValue != 16 &&
+            ShiftValue != 32 && ShiftValue != 64) {
+          SDValue NewSHL =
+              DAG.getNode(ISD::SHL, dl, VT, A,
+                          DAG.getShiftAmountConstant(NewShift, VT, dl));
+          SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL,
+                                  DAG.getShiftAmountConstant(NewShift, VT, dl));
+          return R;
+        }
       }
       if (MaskConst.isMask(ShiftValue) &&
           (A.getOpcode() == ISD::TRUNCATE &&



More information about the llvm-commits mailing list