[llvm] ce5b132 - [Matrix] Fix miscompile for NT matmul if the transpose has other use

Thu Jul 22 10:47:10 PDT 2021

Author: Adam Nemet
Date: 2021-07-22T10:45:56-07:00
New Revision: ce5b1320a7055befa118b86fb7d132721ab68c7e

URL: https://github.com/llvm/llvm-project/commit/ce5b1320a7055befa118b86fb7d132721ab68c7e
DIFF: https://github.com/llvm/llvm-project/commit/ce5b1320a7055befa118b86fb7d132721ab68c7e.diff

LOG: [Matrix] Fix miscompile for NT matmul if the transpose has other use

We should only add the fake lowering entry for the matrix remark if the
transpose is not lowered on its own.  `MapVector::insert` is used to insert
the entry during proper lowering which does not overwrite the fake entry in
the map.

We actually had test coverage for this but the reference output code was
wrong; it was storing undef rather than the transposed column.

Also add an assert that would have caught this.

Differential Revision: https://reviews.llvm.org/D106457

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index dde918bd0d02d..8a5bc44a5012f 100644

--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1140,7 +1140,8 @@ class LowerMatrixIntrinsics {
   /// deletion.
   void finalizeLowering(Instruction *Inst, MatrixTy Matrix,
                         IRBuilder<> &Builder) {
-    Inst2ColumnMatrix.insert(std::make_pair(Inst, Matrix));
+    auto inserted = Inst2ColumnMatrix.insert(std::make_pair(Inst, Matrix));
+    assert(inserted.second && "multiple matrix lowering mapping");
 
     ToRemove.push_back(Inst);
     Value *Flattened = nullptr;
@@ -1540,11 +1541,11 @@ class LowerMatrixIntrinsics {
       if (Transpose->hasOneUse()) {
         FusedInsts.insert(cast<Instruction>(Transpose));
         ToRemove.push_back(cast<Instruction>(Transpose));
+        // TODO: add a fake entry for the folded instruction so that this is
+        // included in the expression in the remark.
+        Inst2ColumnMatrix[Transpose] = MatrixTy(M, C, EltType);
       }
       finalizeLowering(MatMul, Result, Builder);
-      // TODO: add a fake entry for the folded instruction so that this is
-      // included in the expression in the remark.
-      Inst2ColumnMatrix[Transpose] = MatrixTy(M, C, EltType);
       return;
     }
 

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
index 2c19dd0aac17a..32d7c2abbe25d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
@@ -91,10 +91,10 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <6 x double>* [[P:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[TMP12]] to <3 x double>*
-; CHECK-NEXT:    store <3 x double> undef, <3 x double>* [[VEC_CAST]], align 16
+; CHECK-NEXT:    store <3 x double> [[TMP5]], <3 x double>* [[VEC_CAST]], align 16
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP12]], i64 3
 ; CHECK-NEXT:    [[VEC_CAST42:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
-; CHECK-NEXT:    store <3 x double> undef, <3 x double>* [[VEC_CAST42]], align 8
+; CHECK-NEXT:    store <3 x double> [[TMP11]], <3 x double>* [[VEC_CAST42]], align 8
 ; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 4, i32 5>