[PATCH] D153862: Fix the ComplexDeinterleaving bug when handling mixed reductions.

Tue Jun 27 05:38:56 PDT 2023

igor.kirillov created this revision.
Herald added subscribers: mgabka, hiraditya.
Herald added a project: All.
igor.kirillov requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Add a missing check that ensures that ComplexDeinterleaving for reduction
is only analyzed for Real and Imaginary Instructions of the same type.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153862

Files:
  llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
  llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll


Index: llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
===================================================================

--- llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -310,7 +310,56 @@
   ret %"class.std::complex" %.fca.0.1.insert
 }
 
+; Integer and floating point complex number reduction in the same loop:
+;   complex<double> *s = ...;
+;   int *a = ...;
+;
+;   for (int i = 0; i < N; ++i) {
+;     sum += s[i];
+;     int_sum += a[i];
+;   }
+;
+define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalias nocapture noundef readnone %c, [2 x double] %d.coerce, ptr nocapture noundef readonly %s, ptr nocapture noundef writeonly %outs) local_unnamed_addr #0 {
+entry:
+  %0 = tail call i64 @llvm.vscale.i64()
+  %1 = shl nuw nsw i64 %0, 1
+  %n.mod.vf = urem i64 100, %1
+  %n.vec = sub nuw nsw i64 100, %n.mod.vf
+  %2 = tail call i64 @llvm.vscale.i64()
+  %3 = shl nuw nsw i64 %2, 1
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %vec.phi = phi <vscale x 2 x i32> [ zeroinitializer, %entry ], [ %5, %vector.body ]
+  %vec.phi13 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %9, %vector.body ]
+  %vec.phi14 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %10, %vector.body ]
+  %4 = getelementptr inbounds i32, ptr %s, i64 %index
+  %wide.load = load <vscale x 2 x i32>, ptr %4, align 4
+  %5 = add <vscale x 2 x i32> %wide.load, %vec.phi
+  %6 = getelementptr inbounds %"class.std::complex", ptr %a, i64 %index
+  %wide.vec = load <vscale x 4 x double>, ptr %6, align 8
+  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.vec)
+  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
+  %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
+  %9 = fadd fast <vscale x 2 x double> %7, %vec.phi13
+  %10 = fadd fast <vscale x 2 x double> %8, %vec.phi14
+  %index.next = add nuw i64 %index, %3
+  %11 = icmp eq i64 %index.next, %n.vec
+  br i1 %11, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %12 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %10)
+  %13 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %9)
+  %14 = tail call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %5)
+  store i32 %14, ptr %outs, align 4
+  %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %12, 0, 0
+  %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %13, 0, 1
+  ret %"class.std::complex" %.fca.0.1.insert
+}
+
 
 declare i64 @llvm.vscale.i64()
 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
 declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
+declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
Index: llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
===================================================================
--- llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -882,6 +882,8 @@
 ComplexDeinterleavingGraph::NodePtr
 ComplexDeinterleavingGraph::identifyNode(Instruction *Real, Instruction *Imag) {
   LLVM_DEBUG(dbgs() << "identifyNode on " << *Real << " / " << *Imag << "\n");
+  assert(Real->getType() == Imag->getType() &&
+         "Real and imaginary parts should not have different types");
   if (NodePtr CN = getContainingComposite(Real, Imag)) {
     LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
     return CN;
@@ -1463,6 +1465,8 @@
 
       auto *Real = OperationInstruction[i];
       auto *Imag = OperationInstruction[j];
+      if (Real->getType() != Imag->getType())
+        continue;
 
       RealPHI = ReductionInfo[Real].first;
       ImagPHI = ReductionInfo[Imag].first;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153862.534932.patch
Type: text/x-patch
Size: 4278 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230627/56b64a2d/attachment.bin>