[llvm] 0332d10 - GlobalISel: remove assert that memcpy Src and Dst addrspace must be identical

Wed Nov 24 17:24:06 PST 2021

Author: Jameson Nash
Date: 2021-11-24T20:23:05-05:00
New Revision: 0332d105b9ad7f1f0ffca7e78b71de8b3a48f158

URL: https://github.com/llvm/llvm-project/commit/0332d105b9ad7f1f0ffca7e78b71de8b3a48f158
DIFF: https://github.com/llvm/llvm-project/commit/0332d105b9ad7f1f0ffca7e78b71de8b3a48f158.diff

LOG: GlobalISel: remove assert that memcpy Src and Dst addrspace must be identical

The LangRef does not require these arguments to have the same type.

Differential Revision: https://reviews.llvm.org/D93154

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
    llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c74bec7dfc0d3..a775f2e69a3e7 100644

--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
   // of that value loaded. This can result in a sequence of loads and stores
   // mixed types, depending on what the target specifies as good types to use.
   unsigned CurrOffset = 0;
-  LLT PtrTy = MRI.getType(Src);
   unsigned Size = KnownLen;
   for (auto CopyTy : MemOps) {
     // Issuing an unaligned load / store pair  that overlaps with the previous
@@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
     Register LoadPtr = Src;
     Register Offset;
     if (CurrOffset != 0) {
-      Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+      LLT SrcTy = MRI.getType(Src);
+      Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
                    .getReg(0);
-      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+      LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
     }
     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
 
     // Create the store.
-    Register StorePtr =
-        CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+    Register StorePtr = Dst;
+    if (CurrOffset != 0) {
+      LLT DstTy = MRI.getType(Dst);
+      StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+    }
     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
     CurrOffset += CopyTy.getSizeInBytes();
     Size -= CopyTy.getSizeInBytes();
@@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
   // Apart from that, this loop is pretty much doing the same thing as the
   // memcpy codegen function.
   unsigned CurrOffset = 0;
-  LLT PtrTy = MRI.getType(Src);
   SmallVector<Register, 16> LoadVals;
   for (auto CopyTy : MemOps) {
     // Construct MMO for the load.
@@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
     // Create the load.
     Register LoadPtr = Src;
     if (CurrOffset != 0) {
+      LLT SrcTy = MRI.getType(Src);
       auto Offset =
-          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
-      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+          MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
+      LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
     }
     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
     CurrOffset += CopyTy.getSizeInBytes();
@@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
 
     Register StorePtr = Dst;
     if (CurrOffset != 0) {
+      LLT DstTy = MRI.getType(Dst);
       auto Offset =
-          MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
-      StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+          MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
+      StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
     }
     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
     CurrOffset += CopyTy.getSizeInBytes();

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
index 4ffcbdfa7e7ec..e1cc1ab92a520 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
@@ -13,6 +13,7 @@
   }
 
   declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+  declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i1 immarg) #1
 
   define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
   entry:
@@ -46,6 +47,15 @@
     ret void
   }
 
+  define void @test_memcpy_addrspace(i32 addrspace(1)* nocapture %dst, i32 addrspace(2)* nocapture readonly %src) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast i32 addrspace(1)* %dst to i8 addrspace(1)*
+    %1 = bitcast i32 addrspace(2)* %src to i8 addrspace(2)*
+    tail call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %0, i8 addrspace(2)* align 4 %1, i64 72, i1 false)
+    ret void
+  }
+
+
   attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
   attributes #2 = { optsize }
@@ -267,3 +277,51 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            test_memcpy_addrspace
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memcpy_addrspace
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p2) :: (load (s128) from %ir.1 + 48, align 4, addrspace 2)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p1) :: (store (s128) into %ir.0 + 48, align 4, addrspace 1)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p2) :: (load (s64) from %ir.1 + 64, align 4, addrspace 2)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p1) :: (store (s64) into %ir.0 + 64, align 4, addrspace 1)
+    ; CHECK: RET_ReallyLR
+    %0:_(p1) = COPY $x0
+    %1:_(p2) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 72
+    G_MEMCPY %0(p1), %1(p2), %2(s64), 1 :: (store (s8) into %ir.0, align 4, addrspace 1), (load (s8) from %ir.1, align 4, addrspace 2)
+    RET_ReallyLR
+
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
index 1f931221b2a2c..c7bf9c28926ca 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
@@ -13,6 +13,7 @@
   }
 
   declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1
+  declare void @llvm.memmove.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture readonly, i64, i1 immarg) #1
 
   define void @test_memmove2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
   entry:
@@ -38,6 +39,14 @@
     ret void
   }
 
+  define void @test_memmove_addrspace(i32 addrspace(1)* nocapture %dst, i32 addrspace(2)* nocapture readonly %src) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast i32 addrspace(1)* %dst to i8 addrspace(1)*
+    %1 = bitcast i32 addrspace(2)* %src to i8 addrspace(2)*
+    tail call void @llvm.memmove.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %0, i8 addrspace(2)* align 4 %1, i64 8, i1 false)
+    ret void
+  }
+
   attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" }
   attributes #1 = { argmemonly nounwind }
 
@@ -160,3 +169,38 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            test_memmove_addrspace
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memmove_addrspace
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
+    ; CHECK: RET_ReallyLR
+    %0:_(p1) = COPY $x0
+    %1:_(p2) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 48
+    G_MEMMOVE %0(p1), %1(p2), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
+    RET_ReallyLR
+
+
+...