[llvm-branch-commits] [llvm] b3cf704 - [CodeGen] Fix the computation of the alignment of split stores.

Wed Feb 12 06:29:59 PST 2020

Author: Hans Wennborg
Date: 2020-02-12T15:29:31+01:00
New Revision: b3cf70427eb1e97d9b89ba6e9298c280c8a32c74

URL: https://github.com/llvm/llvm-project/commit/b3cf70427eb1e97d9b89ba6e9298c280c8a32c74
DIFF: https://github.com/llvm/llvm-project/commit/b3cf70427eb1e97d9b89ba6e9298c280c8a32c74.diff

LOG: [CodeGen] Fix the computation of the alignment of split stores.

By Clement Courbet!

Backported from rG15488ff24b4a

Added: 
    llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll
    llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 003db39fe5f9..7d77664fbf69 100644

--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6857,12 +6857,20 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
     Value *Addr = Builder.CreateBitCast(
         SI.getOperand(1),
         SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
-    if ((IsLE && Upper) || (!IsLE && !Upper))
+    const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
+    if (IsOffsetStore)
       Addr = Builder.CreateGEP(
           SplitStoreType, Addr,
           ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+    MaybeAlign Alignment(SI.getAlignment());
+    if (IsOffsetStore && Alignment) {
+      // When splitting the store in half, naturally one half will retain the
+      // alignment of the original wider store, regardless of whether it was
+      // over-aligned or not, while the other will require adjustment.
+      Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
+    }
     Builder.CreateAlignedStore(
-        V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+        V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0);
   };
 
   CreateSplitStore(LValue, false);

diff  --git a/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll b/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll
new file mode 100644
index 000000000000..5bc7d3a3b3b4
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare -mtriple=powerpc64-unknown-linux-gnu -data-layout="E-m:e-i64:64-n32:64" -force-split-store < %s  | FileCheck --check-prefixes=ALL,BE %s
+; RUN: opt -S -codegenprepare -mtriple=powerpc64le-unknown-linux-gnu -data-layout="e-m:e-i64:64-n32:64" -force-split-store < %s  | FileCheck --check-prefixes=ALL,LE %s
+
+define void @split_store_align1(float %x, i64* %p) {
+; BE-LABEL: @split_store_align1(
+; BE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; BE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; BE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; BE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; BE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; BE-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
+; BE-NEXT:    store i32 [[B]], i32* [[TMP2]], align 1
+; BE-NEXT:    [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
+; BE-NEXT:    store i32 0, i32* [[TMP3]], align 1
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @split_store_align1(
+; LE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; LE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; LE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; LE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; LE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; LE-NEXT:    store i32 [[B]], i32* [[TMP1]], align 1
+; LE-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; LE-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; LE-NEXT:    store i32 0, i32* [[TMP3]], align 1
+; LE-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 1
+  ret void
+}
+
+define void @split_store_align2(float %x, i64* %p) {
+; BE-LABEL: @split_store_align2(
+; BE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; BE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; BE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; BE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; BE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; BE-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
+; BE-NEXT:    store i32 [[B]], i32* [[TMP2]], align 2
+; BE-NEXT:    [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
+; BE-NEXT:    store i32 0, i32* [[TMP3]], align 2
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @split_store_align2(
+; LE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; LE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; LE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; LE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; LE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; LE-NEXT:    store i32 [[B]], i32* [[TMP1]], align 2
+; LE-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; LE-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; LE-NEXT:    store i32 0, i32* [[TMP3]], align 2
+; LE-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 2
+  ret void
+}
+
+define void @split_store_align8(float %x, i64* %p) {
+; BE-LABEL: @split_store_align8(
+; BE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; BE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; BE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; BE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; BE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; BE-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
+; BE-NEXT:    store i32 [[B]], i32* [[TMP2]], align 4
+; BE-NEXT:    [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
+; BE-NEXT:    store i32 0, i32* [[TMP3]], align 8
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @split_store_align8(
+; LE-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; LE-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; LE-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; LE-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; LE-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; LE-NEXT:    store i32 [[B]], i32* [[TMP1]], align 8
+; LE-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; LE-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; LE-NEXT:    store i32 0, i32* [[TMP3]], align 4
+; LE-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 8
+  ret void
+}

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll b/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll
new file mode 100644
index 000000000000..7eb8cb8ebfe1
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -mtriple=x86_64-unknown-unknown -force-split-store -S < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-w64-windows-gnu"
+
+define void @split_store_align1(float %x, i64* %p) {
+; CHECK-LABEL: @split_store_align1(
+; CHECK-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; CHECK-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[B]], i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 1
+; CHECK-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 1
+  ret void
+}
+
+define void @split_store_align2(float %x, i64* %p) {
+; CHECK-LABEL: @split_store_align2(
+; CHECK-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; CHECK-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[B]], i32* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 2
+; CHECK-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 2
+  ret void
+}
+
+define void @split_store_align8(float %x, i64* %p) {
+; CHECK-LABEL: @split_store_align8(
+; CHECK-NEXT:    [[B:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[Z:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
+; CHECK-NEXT:    [[Z2:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT:    [[O:%.*]] = or i64 [[S]], [[Z2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[B]], i32* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
+; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 4
+; CHECK-NEXT:    ret void
+;
+  %b = bitcast float %x to i32
+  %z = zext i32 0 to i64
+  %s = shl nuw nsw i64 %z, 32
+  %z2 = zext i32 %b to i64
+  %o = or i64 %s, %z2
+  store i64 %o, i64* %p, align 8
+  ret void
+}