[PATCH] D76055: [SystemZ] Improve foldMemoryOperandImpl().

Thu Mar 12 03:46:56 PDT 2020

jonpa created this revision.
jonpa added a reviewer: uweigand.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

A spilled load of an immediate can use MVHI/MVGHI instead.
A compare of a spilled register against an immediate can use CHSI/CGHSI.

  On SPEC 17:                          trunk <> patched
  
  chsi           :                53231                59356    +6125
  lt             :                19324                14060    -5264
  cghsi          :                29368                34598    +5230
  ltg            :               166914               161949    -4965
  mvhi           :                29323                33923    +4600
  lhi            :               262083               257623    -4460
  st             :               181993               177559    -4434
  mvghi          :                54650                58599    +3949
  stg            :               409267               405380    -3887
  lghi           :               467915               464077    -3838
  l              :               231431               230640     -791
  jlh            :               178461               178961     +500
  lg             :              1093362              1092985     -377
  cijlh          :                83235                82875     -360
  je             :               340896               341233     +337
  cije           :               111150               110969     -181
  jl             :                52685                52808     +123
  chi            :                60634                60530     -104
  cijl           :                13434                13358      -76

Since LT/LTG and LHI/LGHI use a register write and an extra instruction, while CHSI/CGSI and MVHI/MVGHI do not, this should be a general improvement. I didn't see any big change in spilling/reloading, though (in fact a very slight increase in number of instructions which is probably related to later optimizations).

This is the remaining improvements I could see while looking at imagick. It seems to improve it maybe yet another percent or so.


https://reviews.llvm.org/D76055

Files:
  llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
  llvm/test/CodeGen/SystemZ/foldmemop-imm.ll


Index: llvm/test/CodeGen/SystemZ/foldmemop-imm.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/SystemZ/foldmemop-imm.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -O3 -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -O3 -mcpu=z14 | FileCheck %s
+;
+; Test folding of spilled immediate loads and compares.
+
+define i32 @fun0(i32 *%src, i32 %arg) nounwind {
+; CHECK-LABEL: fun0:
+; CHECK: 	mvhi	164(%r15), 0            # 4-byte Folded Spill
+; CHECK:	mvc	164(4,%r15), 0(%r2)     # 4-byte Folded Spill
+; CHECK-LABEL: .LBB0_2:
+; CHECK:	chsi	164(%r15), 2            # 4-byte Folded Reload
+
+entry:
+  %cmp  = icmp eq i32 %arg, 0
+  br i1 %cmp, label %cond, label %exit
+
+cond:
+  %val0 = load i32, i32 *%src
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  br label %exit
+
+exit:
+  %tmp0 = phi i32 [0, %entry], [%val0, %cond]
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  %cmp0 = icmp ne i32 %tmp0, 2
+  %zxt0 = zext i1 %cmp0 to i32
+  %and0 = and i32 %arg, %zxt0
+
+  ret i32 %and0
+}
+
+define i64 @fun1(i64 *%src, i64 %arg) nounwind {
+; CHECK-LABEL: fun1:
+; CHECK: 	mvghi	168(%r15), 0            # 8-byte Folded Spill
+; CHECK:	mvc	168(8,%r15), 0(%r2)     # 8-byte Folded Spill
+; CHECK-LABEL: .LBB1_2:
+; CHECK:	cghsi	168(%r15), 2            # 8-byte Folded Reload
+entry:
+  %cmp  = icmp eq i64 %arg, 0
+  br i1 %cmp, label %cond, label %exit
+
+cond:
+  %val0 = load i64, i64 *%src
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  br label %exit
+
+exit:
+  %tmp0 = phi i64 [0, %entry], [%val0, %cond]
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  %cmp0 = icmp ne i64 %tmp0, 2
+  %zxt0 = zext i1 %cmp0 to i64
+  %and0 = and i64 %arg, %zxt0
+
+  ret i64 %and0
+}
Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
===================================================================
--- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1078,6 +1078,23 @@
     return BuiltMI;
   }
 
+  unsigned MemImmOpc = 0;
+  switch (Opcode) {
+  case SystemZ::LHIMux:
+  case SystemZ::LHI:    MemImmOpc = SystemZ::MVHI;  break;
+  case SystemZ::LGHI:   MemImmOpc = SystemZ::MVGHI; break;
+  case SystemZ::CHIMux:
+  case SystemZ::CHI:    MemImmOpc = SystemZ::CHSI;  break;
+  case SystemZ::CGHI:   MemImmOpc = SystemZ::CGHSI; break;
+  default: break;
+  }
+  if (MemImmOpc)
+    return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+                   get(MemImmOpc))
+               .addFrameIndex(FrameIndex)
+               .addImm(0)
+               .addImm(MI.getOperand(1).getImm());
+
   if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
     bool Op0IsGPR = (Opcode == SystemZ::LGDR);
     bool Op1IsGPR = (Opcode == SystemZ::LDGR);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D76055.249880.patch
Type: text/x-patch
Size: 3214 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200312/0bf5d214/attachment-0001.bin>