[llvm] [x86] Optimize urem with a constant divisor to use multiply-by-reciprocal (PR #92669)

Shivam Gupta via llvm-commits llvm-commits at lists.llvm.org
Sat May 18 12:19:12 PDT 2024


https://github.com/xgupta created https://github.com/llvm/llvm-project/pull/92669

The testcase provided by Henning Thielemann implemented a simple random number generator based on linear concurrences.
This needs a division and LLVM chooses to call __umoddi3 which is very slow since denominator is a constant, this can be expanded into a multiply-by-reciprocal sequence.

Fix #6769

>From 8731d0127ee546b53ab7ac1a78de13fb9ededc69 Mon Sep 17 00:00:00 2001
From: Shivam Gupta <shivam98.tkg at gmail.com>
Date: Sun, 19 May 2024 00:42:10 +0530
Subject: [PATCH] [x86] Optimize urem with a constant divisor to use
 multiply-by-reciprocal

Fix #6769
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++++++++++++++++
 llvm/test/CodeGen/X86/pr6769.ll               | 20 ++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr6769.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b181cd3ab1db..e6a5370e0fdef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5019,6 +5019,32 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
     }
   }
 
+  // Optimization for urem with a constant divisor
+  if (!isSigned && isa<ConstantSDNode>(N1)) {
+    uint64_t M = cast<ConstantSDNode>(N1)->getZExtValue();
+    uint64_t R = (1ULL << 63) / M + 1;
+
+    SDValue Reciprocal = DAG.getConstant(R, DL, MVT::i64);
+    SDValue N0Ext = DAG.getZExtOrTrunc(N0, DL, MVT::i64);
+
+    // Multiply by reciprocal
+    SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, N0Ext, Reciprocal);
+
+    // Right shift by 63 to get the quotient
+    SDValue ShiftAmount = DAG.getConstant(63, DL, MVT::i64);
+    SDValue Quotient = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, ShiftAmount);
+
+    // Multiply quotient by M to get the product
+    SDValue Modulus = DAG.getConstant(M, DL, MVT::i64);
+    SDValue Product = DAG.getNode(ISD::MUL, DL, MVT::i64, Quotient, Modulus);
+
+    // Subtract product from the original dividend to get the remainder
+    SDValue Remainder = DAG.getNode(ISD::SUB, DL, MVT::i64, N0Ext, Product);
+
+    // Truncate the result to the original type
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, Remainder);
+  }
+
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
 
   // If X/C can be simplified by the division-by-constant logic, lower
diff --git a/llvm/test/CodeGen/X86/pr6769.ll b/llvm/test/CodeGen/X86/pr6769.ll
new file mode 100644
index 0000000000000..328bbf0f594c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr6769.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define i32 @_rnd(i32 %a0) {
+; CHECK-LABEL: _rnd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    imull $40692, %edi, %eax # imm = 0x9EF4
+; CHECK-NEXT:    movabsq $174770829514140, %rdx # imm = 0x9EF40135D59C
+; CHECK-NEXT:    imulq %rcx, %rdx
+; CHECK-NEXT:    shrq $63, %rdx
+; CHECK-NEXT:    imull $2147483399, %edx, %ecx # imm = 0x7FFFFF07
+; CHECK-NEXT:    subl %ecx, %eax
+; CHECK-NEXT:    retq
+  %x = zext i32 %a0 to i64
+  %y = mul i64 40692, %x
+  %z = urem i64 %y, 2147483399
+  %r = trunc i64 %z to i32
+  ret i32 %r
+}



More information about the llvm-commits mailing list