[PATCH] merge consecutive loads that are offset from a base address (PR21771)

Sanjay Patel spatel at rotateright.com
Fri Dec 12 09:06:38 PST 2014


Hi hfinkel, andreadb, RKSimon,

SelectionDAG::isConsecutiveLoad() was not detecting consecutive loads when the first load was offset from a base address. This patch recognizes that pattern and subtracts the offset before comparing the second load to see if it is consecutive.

The codegen change in the new test case improves from:
   vmovsd	32(%rdi), %xmm0
   vmovsd	48(%rdi), %xmm1 
   vmovhpd	56(%rdi), %xmm1, %xmm1
   vmovhpd	40(%rdi), %xmm0, %xmm0
   vinsertf128	$1, %xmm1, %ymm0, %ymm0

To:
   vmovups	32(%rdi), %ymm0

An existing test case is also improved from:
   vmovsd	(%rdi), %xmm0
   vmovsd	16(%rdi), %xmm1
   vmovsd	24(%rdi), %xmm2
   vunpcklpd	%xmm2, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm2[0]
   vmovhpd	8(%rdi), %xmm1, %xmm3

To:
   vmovsd	(%rdi), %xmm0
   vmovsd	16(%rdi), %xmm1
   vmovhpd	24(%rdi), %xmm0, %xmm0
   vmovhpd	8(%rdi), %xmm1, %xmm1


This patch fixes PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ).

http://reviews.llvm.org/D6642

Files:
  lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  test/CodeGen/X86/chain_order.ll
  test/CodeGen/X86/vec_loadsingles.ll

Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6553,11 +6553,24 @@
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
 
-  // Handle X+C
-  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
-      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
-    return true;
-
+  // Handle X + C.
+  if (isBaseWithConstantOffset(Loc)) {
+    int LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+    if (Loc.getOperand(0) == BaseLoc) {
+      // If the base location is a simple address with no offset itself, then
+      // the second load's first add operand should be the base address.
+      if (LocOffset == Dist * (signed)Bytes)
+        return true;
+    } else if (isBaseWithConstantOffset(BaseLoc)) {
+      // The base location itself has an offset, so subtract that value from the
+      // second load's offset before comparing to distance * size.
+      int BOffset = cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+      if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+        if ((LocOffset - BOffset) == Dist * (signed)Bytes)
+          return true;
+      }
+    }
+  }
   const GlobalValue *GV1 = nullptr;
   const GlobalValue *GV2 = nullptr;
   int64_t Offset1 = 0;
Index: test/CodeGen/X86/chain_order.ll
===================================================================
--- test/CodeGen/X86/chain_order.ll
+++ test/CodeGen/X86/chain_order.ll
@@ -3,7 +3,7 @@
 ;CHECK-LABEL: cftx020:
 ;CHECK: vmovsd  (%rdi), %xmm{{.*}}
 ;CHECK: vmovsd  16(%rdi), %xmm{{.*}}
-;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
+;CHECK: vmovhpd  24(%rdi), %xmm{{.*}}
 ;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
 ;CHECK: vmovupd %xmm{{.*}}, (%rdi)
 ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
Index: test/CodeGen/X86/vec_loadsingles.ll
===================================================================
--- test/CodeGen/X86/vec_loadsingles.ll
+++ test/CodeGen/X86/vec_loadsingles.ll
@@ -89,7 +89,7 @@
 ; FAST32-NEXT: retq
 
 ; SLOW32: vmovups
-; SLOW32: vinsertf128
+; SLOW32-NEXT: vinsertf128
 ; SLOW32-NEXT: retq
 }
 
@@ -112,7 +112,34 @@
 ; FAST32-NEXT: retq
 
 ; SLOW32: vmovups
-; SLOW32: vinsertf128
+; SLOW32-NEXT: vinsertf128
+; SLOW32-NEXT: retq
+}
+
+; PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ) 
+; Recognize and combine consecutive loads even when the
+; first of the combined loads is offset from the base address.
+define <4 x double> @merge_4_doubles_offset(double* %ptr) {
+  %arrayidx4 = getelementptr inbounds double* %ptr, i64 4
+  %arrayidx5 = getelementptr inbounds double* %ptr, i64 5
+  %arrayidx6 = getelementptr inbounds double* %ptr, i64 6
+  %arrayidx7 = getelementptr inbounds double* %ptr, i64 7
+  %e = load double* %arrayidx4, align 8
+  %f = load double* %arrayidx5, align 8
+  %g = load double* %arrayidx6, align 8
+  %h = load double* %arrayidx7, align 8
+  %vecinit4 = insertelement <4 x double> undef, double %e, i32 0
+  %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1
+  %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2
+  %vecinit7 = insertelement <4 x double> %vecinit6, double %h, i32 3
+  ret <4 x double> %vecinit7
+
+; ALL-LABEL: merge_4_doubles_offset
+; FAST32: vmovups
+; FAST32-NEXT: retq
+
+; SLOW32: vmovups
+; SLOW32-NEXT: vinsertf128
 ; SLOW32-NEXT: retq
 }

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D6642.17237.patch
Type: text/x-patch
Size: 3505 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141212/e8654cd1/attachment.bin>


More information about the llvm-commits mailing list