[PATCH] D127213: [ARM][ParallelDSP] Fix self reference bug
Sam Parker via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 02:11:33 PDT 2022
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG447c411fef6e: [ARM][ParallelDSP] Fix self reference bug (authored by samparker).
Changed prior to commit:
https://reviews.llvm.org/D127213?vs=434798&id=435455#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D127213/new/
https://reviews.llvm.org/D127213
Files:
llvm/lib/Target/ARM/ARMParallelDSP.cpp
llvm/test/CodeGen/ARM/ParallelDSP/self-ref-bug.ll
Index: llvm/test/CodeGen/ARM/ParallelDSP/self-ref-bug.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/ParallelDSP/self-ref-bug.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf < %s -arm-parallel-dsp -verify -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define i32 @test(ptr %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 2
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
+; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX1]], align 2
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP8]], i32 [[TMP1]], i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16
+; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
+; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP7]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], [[TMP3]]
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[INCDEC_PTR]], align 2
+; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP15]] to i32
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
+; CHECK-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
+; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32
+; CHECK-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], [[TMP6]]
+; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[MUL7]], [[MUL]]
+; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 0, 0
+; CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[MUL13]], [[TMP10]]
+; CHECK-NEXT: ret i32 [[ADD15]]
+;
+entry:
+ %0 = load i16, ptr %b, align 2
+ %conv = sext i16 %0 to i32
+ %arrayidx1 = getelementptr inbounds i16, ptr %b, i32 3
+ %1 = load i16, ptr %arrayidx1, align 2
+ %conv2 = sext i16 %1 to i32
+ %mul = mul nsw i32 %conv2, %conv
+ %incdec.ptr = getelementptr inbounds i16, ptr %b, i32 1
+ %2 = load i16, ptr %incdec.ptr, align 2
+ %conv4 = sext i16 %2 to i32
+ %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 4
+ %3 = load i16, ptr %arrayidx5, align 2
+ %conv6 = sext i16 %3 to i32
+ %mul7 = mul nsw i32 %conv6, %conv4
+ %add9 = add nsw i32 %mul7, %mul
+ %mul13 = mul nsw i32 0, 0
+ %add15 = add nsw i32 %mul13, %add9
+ ret i32 %add15
+}
Index: llvm/lib/Target/ARM/ARMParallelDSP.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -459,6 +459,10 @@
if (ValidLHS && ValidRHS)
return true;
+ // Ensure we don't add the root as the incoming accumulator.
+ if (R.getRoot() == I)
+ return false;
+
return R.InsertAcc(I);
}
case Instruction::Mul: {
@@ -535,6 +539,7 @@
InsertParallelMACs(R);
Changed = true;
AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D127213.435455.patch
Type: text/x-patch
Size: 3804 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220609/15f0bb3e/attachment.bin>
More information about the llvm-commits
mailing list