[PATCH] D129734: [InstCombine] Canonicalize GEP of GEP by swapping constant-indexed GEP to the front

Wed Aug 31 18:22:09 PDT 2022

huangjd added a comment.
Herald added a subscriber: pcwang-thead.

Consider the following practical example

  struct Vec {
      float  x, y, z;
  };

  float f1(Vec vecs[], size_t n) {
      float sum = 0;
      for (size_t i = 0; i < n; i++) {
          float g = 0;
          g += vecs[i].x * vecs[i].x;
          g += vecs[i].y * vecs[i].y;
          g += vecs[i].z * vecs[i].z;
          sum += sqrtf(g);
      }
      return sum;
  }

llvm generates  the following with optimizations without my patch

  define dso_local noundef float @_Z2f1P3Vecm(ptr nocapture noundef readonly %0, i64 noundef %1) local_unnamed_addr #0 !dbg !361 {
    %3 = icmp eq i64 %1, 0, !dbg !385
    br i1 %3, label %4, label %6, !dbg !386

  4: ; preds = %6, %2
    %5 = phi float [ 0.000000e+00, %2 ], [ %19, %6 ], !dbg !383
    ret float %5, !dbg !387

  6: ; preds = %2, %6
    %7 = phi float [ %19, %6 ], [ 0.000000e+00, %2 ]
    %8 = phi i64 [ %20, %6 ], [ 0, %2 ]
    %9 = getelementptr inbounds %struct.Vec, ptr %0, i64 %8, !dbg !389
    %10 = load float, ptr %9, align 4, !dbg !390, !tbaa !391
    %11 = tail call float @llvm.fmuladd.f32(float %10, float %10, float 0.000000e+00), !dbg !396
    %12 = getelementptr inbounds %struct.Vec, ptr %0, i64 %8, i32 1, !dbg !397
    %13 = load float, ptr %12, align 4, !dbg !397, !tbaa !398
    %14 = tail call float @llvm.fmuladd.f32(float %13, float %13, float %11), !dbg !399
    %15 = getelementptr inbounds %struct.Vec, ptr %0, i64 %8, i32 2, !dbg !400
    %16 = load float, ptr %15, align 4, !dbg !400, !tbaa !401
    %17 = tail call float @llvm.fmuladd.f32(float %16, float %16, float %14), !dbg !402
    %18 = tail call float @llvm.sqrt.f32(float %17), !dbg !403
    %19 = fadd float %7, %18, !dbg !404
    %20 = add nuw i64 %8, 1, !dbg !405
    %21 = icmp eq i64 %20, %1, !dbg !385
    br i1 %21, label %4, label %6, !dbg !386, !llvm.loop !406
  }

GEP of GEP are merged at a very early pass before common subexpression, and I actually couldn't write any C++ code that would make LLVM generate a GEP of GEP where the second one has constant index.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129734/new/

https://reviews.llvm.org/D129734