[PATCH] D66575: Better way to fix misaligned mov instruction

Thu Aug 22 01:12:29 PDT 2019

cjld created this revision.
cjld added reviewers: lebedev.ri, spatel, jdoerfert.
cjld added a project: LLVM.
Herald added subscribers: llvm-commits, hiraditya.

Better way to fix misaligned mov instruction ,

alignment-from-assumptions pass doesn't generate aligned mov instructions, a example below:

  // b.cc
  #include <cstddef>
  #include <stdint.h>
  
  typedef long long index;
  
  extern "C" index g_tid;
  extern "C" index g_num;
  
  
  void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
      index n = 64*1024;
      index m = 16*1024;
      index k = 4*1024;
      index tid = g_tid;
      index num = g_num;
      __builtin_assume_aligned(a, 32);
      __builtin_assume_aligned(b, 32);
      __builtin_assume_aligned(c, 32);
      for (index i0=tid*k; i0<m; i0+=num*k)
          for (index i1=0; i1<n*m; i1+=m)
              for (index i2=0; i2<k; i2++)
                  c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
  }

compile with `clang ./b.cc -Ofast -march=native -std=c++14 -S -o b.s`. (intel i7-7500U)
which yield:

  // b.s
  ......
  	vmovaps	-224(%rdi,%rbx,4), %ymm0
  	vmovups	-192(%rdi,%rbx,4), %ymm1
  	vmovups	-160(%rdi,%rbx,4), %ymm2
  	vmovups	-128(%rdi,%rbx,4), %ymm3
  	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
  	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
  	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
  	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
  	vmovaps	%ymm0, -224(%rdx,%rbx,4)
  	vmovups	%ymm1, -192(%rdx,%rbx,4)
  	vmovups	%ymm2, -160(%rdx,%rbx,4)
  	vmovups	%ymm3, -128(%rdx,%rbx,4)
  ......

expect:

  // b.s
  ......
  	vmovaps	-224(%rdi,%rbx,4), %ymm0
  	vmovaps	-192(%rdi,%rbx,4), %ymm1
  	vmovaps	-160(%rdi,%rbx,4), %ymm2
  	vmovaps	-128(%rdi,%rbx,4), %ymm3
  	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
  	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
  	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
  	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
  	vmovaps	%ymm0, -224(%rdx,%rbx,4)
  	vmovaps	%ymm1, -192(%rdx,%rbx,4)
  	vmovaps	%ymm2, -160(%rdx,%rbx,4)
  	vmovaps	%ymm3, -128(%rdx,%rbx,4)
  ......

This is because the alignment-from-assumptions pass using the wrong function to calculate the alignment


Repository:
  rL LLVM

https://reviews.llvm.org/D66575

Files:
  llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp


Index: llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
===================================================================

--- llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -93,9 +93,7 @@
                                     const SCEV *AlignSCEV,
                                     ScalarEvolution *SE) {
   // DiffUnits = Diff % int64_t(Alignment)
-  const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
-  const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
-  const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
+  const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV);
 
   LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is "
                     << *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66575.216556.patch
Type: text/x-patch
Size: 862 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190822/5e76d542/attachment.bin>