[PATCH] D54770: [x86] try to lower multiply i8 with constant to LEA

Tue Nov 20 13:00:49 PST 2018

spatel created this revision.
spatel added reviewers: craig.topper, RKSimon, lebedev.ri.
Herald added a subscriber: mcrosier.

This is an attempt to avoid regressions seen when https://reviews.llvm.org/D54640 is extended to include add ops, so it's another patch towards solving PR32023:
https://bugs.llvm.org/show_bug.cgi?id=32023

LEA formation is scattered between combines, DAG-to-DAG, and machine passes like "Two-Address instruction pass", so I'm not sure if this is the best solution.

The idea here is to rely on the existing DAG-to-DAG logic for 32-bit LEA formation; no edits needed in there. If this is the right approach, then I'd extend this to handle more constants; 3/5/9 are just the most obvious winners for size/speed regardless of uarch. GCC forms LEA for 8-bit mul much more aggressively.


https://reviews.llvm.org/D54770

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/mul-constant-i8.ll


Index: test/CodeGen/X86/mul-constant-i8.ll
===================================================================

--- test/CodeGen/X86/mul-constant-i8.ll
+++ test/CodeGen/X86/mul-constant-i8.ll
@@ -25,10 +25,9 @@
 define i8 @test_mul_by_3(i8 %x) {
 ; X64-LABEL: test_mul_by_3:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movb $3, %cl
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rdi,2), %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
-; X64-NEXT:    mulb %cl
 ; X64-NEXT:    retq
   %m = mul i8 %x, 3
   ret i8 %m
@@ -48,10 +47,9 @@
 define i8 @test_mul_by_5(i8 %x) {
 ; X64-LABEL: test_mul_by_5:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movb $5, %cl
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rdi,4), %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
-; X64-NEXT:    mulb %cl
 ; X64-NEXT:    retq
   %m = mul i8 %x, 5
   ret i8 %m
@@ -95,10 +93,9 @@
 define i8 @test_mul_by_9(i8 %x) {
 ; X64-LABEL: test_mul_by_9:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movb $9, %cl
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rdi,8), %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
-; X64-NEXT:    mulb %cl
 ; X64-NEXT:    retq
   %m = mul i8 %x, 9
   ret i8 %m
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -195,6 +195,9 @@
       setOperationAction(ISD::ABS          , MVT::i64  , Custom);
   }
 
+  // 8-bit shl/multiply might be better off as LEA depending on the operands.
+  setOperationAction(ISD::MUL, MVT::i8, Custom);
+
   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
   // operation.
   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
@@ -23410,7 +23413,6 @@
                         SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
-
   if (VT.getScalarType() == MVT::i1)
     return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
 
@@ -23420,6 +23422,19 @@
 
   SDValue A = Op.getOperand(0);
   SDValue B = Op.getOperand(1);
+  if (VT == MVT::i8) {
+    if (isa<ConstantSDNode>(B)) {
+      uint64_t MulC = Op.getConstantOperandVal(1);
+      if (MulC == 3 || MulC == 5 || MulC == 9) {
+        // Extend/truncate an 8-bit multiply to 32-bit to allow LEA formation.
+        SDValue ZextOp0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, A);
+        SDValue NewMul = DAG.getNode(ISD::MUL, dl, MVT::i32, ZextOp0,
+                                     DAG.getConstant(MulC, dl, MVT::i32));
+        return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, NewMul);
+      }
+    }
+    return Op;
+  }
 
   // Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
   // vector pairs, multiply and truncate.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D54770.174824.patch
Type: text/x-patch
Size: 2981 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181120/1c71f222/attachment.bin>