[llvm] e70ae0f - DAG/GlobalISel: Fix broken/redundant setting of MODereferenceable

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 13 17:30:36 PST 2023


Author: Matt Arsenault
Date: 2023-01-13T20:30:30-05:00
New Revision: e70ae0f46bd5553f0702c5d1e30c5dd6f45a8c01

URL: https://github.com/llvm/llvm-project/commit/e70ae0f46bd5553f0702c5d1e30c5dd6f45a8c01
DIFF: https://github.com/llvm/llvm-project/commit/e70ae0f46bd5553f0702c5d1e30c5dd6f45a8c01.diff

LOG: DAG/GlobalISel: Fix broken/redundant setting of MODereferenceable

This was incorrectly setting dereferenceable on unaligned
operands. getLoadMemOperandFlags does the alignment dereferenceabilty
check without alignment, and then both paths went on to check
isDereferenceableAndAlignedPointer. Make getLoadMemOperandFlags check
isDereferenceableAndAlignedPointer, and remove the second call.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
    llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
    llvm/test/CodeGen/AMDGPU/indirect-call.ll
    llvm/test/CodeGen/AMDGPU/kernel-args.ll
    llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll
    llvm/test/CodeGen/WebAssembly/reg-stackify.ll
    llvm/test/CodeGen/X86/fold-sext-trunc.ll
    llvm/test/CodeGen/X86/hoist-invariant-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f6179f9dd7241..92c58e0a767d4 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -62,6 +62,7 @@
 
 namespace llvm {
 
+class AssumptionCache;
 class CCState;
 class CCValAssign;
 class Constant;
@@ -424,8 +425,10 @@ class TargetLoweringBase {
     return MachineMemOperand::MONone;
   }
 
-  MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI,
-                                                  const DataLayout &DL) const;
+  MachineMemOperand::Flags
+  getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
+                         AssumptionCache *AC = nullptr,
+                         const TargetLibraryInfo *LibInfo = nullptr) const;
   MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
                                                    const DataLayout &DL) const;
   MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,

diff  --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 31b5c7cfd2f70..8cb343ee87185 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1306,7 +1306,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
   }
 
   auto &TLI = *MF->getSubtarget().getTargetLowering();
-  MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
+  MachineMemOperand::Flags Flags =
+      TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
   if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
     if (AA->pointsToConstantMemory(
             MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
@@ -1314,12 +1315,6 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
     }
   }
 
-  if (!(Flags & MachineMemOperand::MODereferenceable)) {
-    if (isDereferenceableAndAlignedPointer(Ptr, LI.getType(), LI.getAlign(),
-                                           *DL, &LI, AC, nullptr, LibInfo))
-      Flags |= MachineMemOperand::MODereferenceable;
-  }
-
   const MDNode *Ranges =
       Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
   for (unsigned i = 0; i < Regs.size(); ++i) {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e261b65b96bbe..96bf1943444d3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4134,7 +4134,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
   bool isVolatile = I.isVolatile();
   MachineMemOperand::Flags MMOFlags =
-      TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+      TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
 
   SDValue Root;
   bool ConstantMemory = false;
@@ -4157,10 +4157,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     Root = DAG.getRoot();
   }
 
-  if (isDereferenceableAndAlignedPointer(SV, Ty, Alignment, DAG.getDataLayout(),
-                                         &I, AC, nullptr, LibInfo))
-    MMOFlags |= MachineMemOperand::MODereferenceable;
-
   SDLoc dl = getCurSDLoc();
 
   if (isVolatile)
@@ -4731,7 +4727,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
       I.getAlign().value() < MemVT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
 
-  auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+  auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c7b1546af06eb..da8b87babc2dd 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2246,9 +2246,9 @@ void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
   MF.getRegInfo().freezeReservedRegs(MF);
 }
 
-MachineMemOperand::Flags
-TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
-                                           const DataLayout &DL) const {
+MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags(
+    const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC,
+    const TargetLibraryInfo *LibInfo) const {
   MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
   if (LI.isVolatile())
     Flags |= MachineMemOperand::MOVolatile;
@@ -2259,7 +2259,9 @@ TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
   if (LI.hasMetadata(LLVMContext::MD_invariant_load))
     Flags |= MachineMemOperand::MOInvariant;
 
-  if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL))
+  if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(),
+                                         LI.getAlign(), DL, &LI, AC,
+                                         /*DT=*/nullptr, LibInfo))
     Flags |= MachineMemOperand::MODereferenceable;
 
   Flags |= getTargetMMOFlags(LI);

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll
index f29930b54ca95..e955b52e1643b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll
@@ -33,7 +33,7 @@ define i32 @load_dereferenceable(ptr dereferenceable(4) %ptr) {
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr)
   ; CHECK-NEXT:   $w0 = COPY [[LOAD]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %load = load i32, ptr %ptr, align 4
@@ -46,7 +46,7 @@ define i32 @load_dereferenceable_invariant(ptr dereferenceable(4) %ptr) {
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr)
   ; CHECK-NEXT:   $w0 = COPY [[LOAD]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   %load = load i32, ptr %ptr, align 4, !invariant.load !0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll
index 78b30b61574b5..61735e3ae8f1d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll
@@ -6,23 +6,33 @@
 
 ; CHECK-LABEL: name: stack_passed_i64
 ; CHECK: fixedStack:
-; CHECK:  - { id: 0, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
+; CHECK:  - { id: 0, type: default, offset: 16, size: 8, alignment: 16, stack-id: default,
 ; CHECK-NEXT:      isImmutable: false, isAliased: false,
-; CHECK:  - { id: 1, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+; CHECK:  - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
+; CHECK-NEXT:      isImmutable: false, isAliased: false,
+; CHECK:  - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
 ; CHECK-NEXT: isImmutable: true, isAliased: false,
 define void @stack_passed_i64(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6,
-                              i64 %arg7, i64 %arg8, ptr byval(i64) %arg9) {
+                              i64 %arg7, i64 %arg8, ptr byval(i64) %arg9, ptr byval(i64) align(8) %arg10) {
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64)  from %fixed-stack.1, align 16)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64)  from %fixed-stack.2, align 16)
+  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
   ; CHECK:   [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX1]](p0)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (dereferenceable load (s64)  from %ir.arg9)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]]
-  ; CHECK:   G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9)
-  ; CHECK:   RET_ReallyLR
-  %load = load i64, ptr %arg9
-  %add = add i64 %load, %arg8
-  store volatile i64 %add, ptr %arg9
+  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX2]](p0)
+  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (load (s64) from %ir.arg9)
+  ; CHECK:   [[ADD0:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]]
+  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY9]](p0) :: (dereferenceable load (s64) from %ir.arg10)
+  ; CHECK:   [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD0]], [[LOAD2]]
+  ; CHECK:   G_STORE [[ADD1]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9)
+
+
+; CHECK:   RET_ReallyLR
+  %load0 = load i64, ptr %arg9
+  %add0 = add i64 %load0, %arg8
+  %load1 = load i64, ptr %arg10
+  %add1 = add i64 %add0, %load1
+  store volatile i64 %add1, ptr %arg9
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index 34860d46d832e..10931c04807a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -1659,10 +1659,10 @@ define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from %ir.arg0 + 4, addrspace 5)
   ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
@@ -1685,13 +1685,13 @@ define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (s32) from %ir.arg0 + 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5)
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (s32) from %ir.arg1 + 4, addrspace 5)
   ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
@@ -1717,8 +1717,8 @@ define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, pt
   ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5)
   ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN

diff  --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
index 3de705a947be7..9867d1208f257 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
+define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
   ; GCN-LABEL: name: mmo_offsets0
   ; GCN: bb.0.bb.0:
   ; GCN-NEXT:   liveins: $sgpr0, $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 5040e5348aa14..274d5db944435 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -182,9 +182,9 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
 ; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    s_add_u32 s8, s8, 8
 ; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
 ; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
 ; GISEL-NEXT:    s_mov_b32 s14, s16
@@ -373,12 +373,12 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
 ; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
 ; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    s_add_u32 s8, s8, 8
 ; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GISEL-NEXT:    s_mov_b32 s14, s16
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 5c35c107982cc..812a1c2486872 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -6129,7 +6129,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu
   ret void
 }
 
-define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
 ; SI-LABEL: byref_natural_align_constant_v16i32_arg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19

diff  --git a/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll b/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll
index e0de5430a23e6..5dc79d13d7896 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll
@@ -4,7 +4,14 @@
 ; and dereferenceable flags.
 
 ; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load (s16) from %ir.ptr, addrspace 4)
-define half @legalize_f16_load(ptr addrspace(4) dereferenceable(4) %ptr) {
+define half @legalize_f16_load_align2(ptr addrspace(4) dereferenceable(4) align(2) %ptr) {
+  %load = load half, ptr addrspace(4) %ptr, !invariant.load !0
+  %add = fadd half %load, 1.0
+  ret half %add
+}
+
+; GCN: BUFFER_LOAD_USHORT{{.*}} :: (invariant load (s16) from %ir.ptr, addrspace 4)
+define half @legalize_f16_load_align1(ptr addrspace(4) dereferenceable(4) align(1) %ptr) {
   %load = load half, ptr addrspace(4) %ptr, !invariant.load !0
   %add = fadd half %load, 1.0
   ret half %add

diff  --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
index e3b49c3faa11a..28f167ec65afb 100644
--- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
+++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -26,7 +26,7 @@ define i32 @no0(ptr %p, ptr %q) {
 ; CHECK: return $1{{$}}
 ; NOREGS-LABEL: no1:
 ; NOREGS: return{{$}}
-define i32 @no1(ptr %p, ptr dereferenceable(4) %q) {
+define i32 @no1(ptr %p, ptr dereferenceable(4) align(4) %q) {
   %t = load volatile i32, ptr %q, !invariant.load !0
   store volatile i32 0, ptr %p
   ret i32 %t
@@ -38,7 +38,7 @@ define i32 @no1(ptr %p, ptr dereferenceable(4) %q) {
 ; CHECK: return $pop{{[0-9]+}}{{$}}
 ; NOREGS-LABEL: yes0:
 ; NOREGS: return{{$}}
-define i32 @yes0(ptr %p, ptr dereferenceable(4) %q) {
+define i32 @yes0(ptr %p, ptr dereferenceable(4) align(4) %q) {
   %t = load i32, ptr %q, !invariant.load !0
   store i32 0, ptr %p
   ret i32 %t
@@ -559,7 +559,7 @@ define i32 @no_stackify_store_past_load(i32 %a, ptr %p1, ptr %p2) {
 ; NOREGS: call callee
 ; NOREGS: i32.load 0
 ; NOREGS: return
-define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) %p2) {
+define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) align(4) %p2) {
   store i32 %a, ptr %p1
   %b = load i32, ptr %p2, !invariant.load !0
   call i32 @callee(i32 %a)

diff  --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll
index 8ee2e113b6494..b498e5c77ad9c 100644
--- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll
@@ -5,7 +5,7 @@
 %0 = type { i64 }
 %struct.S1 = type { i16, i32 }
 
- at g_10 = external dso_local global %struct.S1
+ at g_10 = external dso_local global %struct.S1, align 8
 
 declare void @func_28(i64, i64)
 

diff  --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
index 8687b64b7f593..ee23c922174d6 100644
--- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
@@ -212,7 +212,7 @@ for.end:                                          ; preds = %for.body
 
 declare ptr @objc_msgSend(ptr, ptr, ...) nonlazybind
 
-define void @test_multi_def(ptr dereferenceable(8) %x1,
+define void @test_multi_def(ptr dereferenceable(8) align(8) %x1,
 ; CHECK-LABEL: test_multi_def:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    movq %rdx, %rax
@@ -233,7 +233,7 @@ define void @test_multi_def(ptr dereferenceable(8) %x1,
 ; CHECK-NEXT:    jl LBB4_2
 ; CHECK-NEXT:  ## %bb.3: ## %exit
 ; CHECK-NEXT:    retq
-                            ptr dereferenceable(8) %x2,
+                            ptr dereferenceable(8) align(8) %x2,
                             ptr %y, i64 %count) nounwind nofree nosync {
 entry:
   br label %for.body
@@ -260,7 +260,7 @@ exit:
   ret void
 }
 
-define void @test_div_def(ptr dereferenceable(8) %x1,
+define void @test_div_def(ptr dereferenceable(8) align(8) %x1,
 ; CHECK-LABEL: test_div_def:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    movq %rdx, %r8
@@ -281,7 +281,7 @@ define void @test_div_def(ptr dereferenceable(8) %x1,
 ; CHECK-NEXT:    jl LBB5_2
 ; CHECK-NEXT:  ## %bb.3: ## %exit
 ; CHECK-NEXT:    retq
-                          ptr dereferenceable(8) %x2,
+                          ptr dereferenceable(8) align(8) %x2,
                           ptr %y, i32 %count) nounwind nofree nosync {
 entry:
   br label %for.body


        


More information about the llvm-commits mailing list