[llvm] r274066 - [SystemZ] Use NILL instruction instead of NILF where possible

Tue Jun 28 14:03:19 PDT 2016

Author: zhanjunl
Date: Tue Jun 28 16:03:19 2016
New Revision: 274066

URL: http://llvm.org/viewvc/llvm-project?rev=274066&view=rev
Log:
[SystemZ] Use NILL instruction instead of NILF where possible

Summary: SystemZ shift instructions only use the last 6 bits of the shift
amount. When the result of an AND operation is used as a shift amount, this
means that we can use the NILL instruction (which operates on the last 16 bits)
rather than NILF (which operates on the last 32 bits) for a 16-bit savings in
instruction size.

Reviewers: uweigand

Subscribers: llvm-commits

Author: colpell
Committing on behalf of Elliot.

Differential Revision: http://reviews.llvm.org/D21686

Added:
    llvm/trunk/test/CodeGen/SystemZ/rot-01.ll
    llvm/trunk/test/CodeGen/SystemZ/shift-11.ll
Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td?rev=274066&r1=274065&r2=274066&view=diff
==============================================================================

--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td Tue Jun 28 16:03:19 2016
@@ -1022,7 +1022,7 @@ let Defs = [CC] in {
   // ANDs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
     defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; 
+    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
   }
 
   // AND to memory
@@ -1685,6 +1685,37 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z
 def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
                           (XGR GR64:$y, (NGR GR64:$y, GR64:$x))>;
 
+// Shift/rotate instructions only use the last 6 bits of the second operand
+// register, so we can safely use NILL (16 fewer bits than NILF) to only AND the
+// last 16 bits.
+// Complexity is added so that we match this before we match NILF on the AND
+// operation alone.
+let AddedComplexity = 4 in {
+  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+}
+
 // Peepholes for turning scalar operations into block operations.
 defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
                       XCSequence, 1>;

Added: llvm/trunk/test/CodeGen/SystemZ/rot-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/rot-01.ll?rev=274066&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/rot-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/rot-01.ll Tue Jun 28 16:03:19 2016
@@ -0,0 +1,35 @@
+; Test shortening of NILL to NILF when the result is used as a rotate amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 32-bit rotate.
+define i32 @f1(i32 %val, i32 %amt) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: rll %r2, %r2, 0(%r3)
+  %mod = urem i32 %amt, 32
+
+  %inv = sub i32 32, %mod
+  %parta = shl i32 %val, %mod
+  %partb = lshr i32 %val, %inv
+
+  %rotl = or i32 %parta, %partb
+
+  ret i32 %rotl
+}
+
+; Test 64-bit rotate.
+define i64 @f2(i64 %val, i64 %amt) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 63
+; CHECK: rllg %r2, %r2, 0(%r3)
+  %mod = urem i64 %amt, 64
+
+  %inv = sub i64 64, %mod
+  %parta = shl i64 %val, %mod
+  %partb = lshr i64 %val, %inv
+
+  %rotl = or i64 %parta, %partb
+
+  ret i64 %rotl
+}

Added: llvm/trunk/test/CodeGen/SystemZ/shift-11.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/shift-11.ll?rev=274066&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/shift-11.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/shift-11.ll Tue Jun 28 16:03:19 2016
@@ -0,0 +1,63 @@
+; Test shortening of NILL to NILF when the result is used as a shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test logical shift right.
+define i32 @f1(i32 %a, i32 %sh) {
+; CHECK-LABEL: f1:
+; CHECK: nill %r3, 31
+; CHECK: srl %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = lshr i32 %a, %and
+  ret i32 %shift
+}
+
+; Test arithmetic shift right.
+define i32 @f2(i32 %a, i32 %sh) {
+; CHECK-LABEL: f2:
+; CHECK: nill %r3, 31
+; CHECK: sra %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = ashr i32 %a, %and
+  ret i32 %shift
+}
+
+; Test shift left.
+define i32 @f3(i32 %a, i32 %sh) {
+; CHECK-LABEL: f3:
+; CHECK: nill %r3, 31
+; CHECK: sll %r2, 0(%r3)
+  %and = and i32 %sh, 31
+  %shift = shl i32 %a, %and
+  ret i32 %shift
+}
+
+; Test 64-bit logical shift right.
+define i64 @f4(i64 %a, i64 %sh) {
+; CHECK-LABEL: f4:
+; CHECK: nill %r3, 31
+; CHECK: srlg %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = lshr i64 %a, %and
+  ret i64 %shift
+}
+
+; Test 64-bit arithmetic shift right.
+define i64 @f5(i64 %a, i64 %sh) {
+; CHECK-LABEL: f5:
+; CHECK: nill %r3, 31
+; CHECK: srag %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = ashr i64 %a, %and
+  ret i64 %shift
+}
+
+; Test 64-bit shift left.
+define i64 @f6(i64 %a, i64 %sh) {
+; CHECK-LABEL: f6:
+; CHECK: nill %r3, 31
+; CHECK: sllg %r2, %r2, 0(%r3)
+  %and = and i64 %sh, 31
+  %shift = shl i64 %a, %and
+  ret i64 %shift
+}