[llvm-branch-commits] [llvm] f5f4825 - [X86] Fix `(shift X, (xor Y, N-1))` -> `(shift X, (not Y))` by properly inserting `not Y` into DAG. [#61038]

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Mar 3 23:54:00 PST 2023


Author: Noah Goldstein
Date: 2023-03-03T23:53:12-08:00
New Revision: f5f4825b189ccb016e01a9cd10e2a06cd8f2a89a

URL: https://github.com/llvm/llvm-project/commit/f5f4825b189ccb016e01a9cd10e2a06cd8f2a89a
DIFF: https://github.com/llvm/llvm-project/commit/f5f4825b189ccb016e01a9cd10e2a06cd8f2a89a.diff

LOG: [X86] Fix `(shift X, (xor Y, N-1))` -> `(shift X, (not Y))` by properly inserting `not Y` into DAG. [#61038]

Previously not inserting the `-1` in `not Y` (`xor Y, -1`) into the
DAG. Not inserting `-1` as a DAG node comes up as a bug when doing
`(xor (shl 1, A), B)` -> `(btc A, B)`. `btc` requires `B` (dst) to be
a register.

Differential Revision: https://reviews.llvm.org/D144984

(cherry picked from commit 4a23031fac1a58f6cf05d07f915633930532eafa)

Added: 
    llvm/test/CodeGen/X86/pr61038.ll

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d69e2c3ed4930..181de6abb2c5f 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4020,7 +4020,10 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
 
       EVT OpVT = ShiftAmt.getValueType();
 
-      NewShiftAmt = CurDAG->getNOT(DL, Add0C == nullptr ? Add0 : Add1, OpVT);
+      SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
+      NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
+                                    Add0C == nullptr ? Add0 : Add1, AllOnes);
+      insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes);
       insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
       // If we are shifting by N-X where N == 0 mod Size, then just shift by
       // -X to generate a NEG instead of a SUB of a constant.

diff  --git a/llvm/test/CodeGen/X86/pr61038.ll b/llvm/test/CodeGen/X86/pr61038.ll
new file mode 100644
index 0000000000000..2db78234b3be3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr61038.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefix=CHECK-BMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefix=CHECK-BMI2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=CHECK-BMI
+
+declare i32 @llvm.cttz.i32(i32, i1 immarg)
+define void @test_61038(ptr %tmp_buffer) {
+; CHECK-BMI2-LABEL: test_61038:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    tzcntl %eax, %eax
+; CHECK-BMI2-NEXT:    movabsq $8589934591, %rcx # imm = 0x1FFFFFFFF
+; CHECK-BMI2-NEXT:    movq $-1, %rdx
+; CHECK-BMI2-NEXT:    btcq %rax, %rdx
+; CHECK-BMI2-NEXT:    shrxq %rdx, %rcx, %rdx
+; CHECK-BMI2-NEXT:    btcq %rax, %rcx
+; CHECK-BMI2-NEXT:    xorl %eax, %eax
+; CHECK-BMI2-NEXT:    cmpq $64, %rcx
+; CHECK-BMI2-NEXT:    cmovael %eax, %edx
+; CHECK-BMI2-NEXT:    movl %edx, (%rdi)
+; CHECK-BMI2-NEXT:    retq
+;
+; CHECK-BMI-LABEL: test_61038:
+; CHECK-BMI:       # %bb.0: # %entry
+; CHECK-BMI-NEXT:    tzcntl %eax, %eax
+; CHECK-BMI-NEXT:    movabsq $8589934591, %rdx # imm = 0x1FFFFFFFF
+; CHECK-BMI-NEXT:    movq %rdx, %rsi
+; CHECK-BMI-NEXT:    btcq %rax, %rsi
+; CHECK-BMI-NEXT:    movq $-1, %rcx
+; CHECK-BMI-NEXT:    btcq %rax, %rcx
+; CHECK-BMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-BMI-NEXT:    shrq %cl, %rdx
+; CHECK-BMI-NEXT:    xorl %eax, %eax
+; CHECK-BMI-NEXT:    cmpq $64, %rsi
+; CHECK-BMI-NEXT:    cmovael %eax, %edx
+; CHECK-BMI-NEXT:    movl %edx, (%rdi)
+; CHECK-BMI-NEXT:    retq
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 poison, i1 false)
+  %1 = zext i32 %0 to i64
+  %2 = shl nuw nsw i64 1, %1
+  %3 = xor i64 %2, 8589934591
+  %4 = icmp ugt i64 %3, 63
+  %x11.op.i = lshr i64 8589934591, %3
+  %5 = trunc i64 %x11.op.i to i32
+  %6 = select i1 %4, i32 0, i32 %5
+  store i32 %6, ptr %tmp_buffer, align 4
+  ret void
+}


        


More information about the llvm-branch-commits mailing list