[clang] 63851a7 - [Matrix] Implement += and -= for MatrixType.

Mon Mar 8 01:32:25 PST 2021

Author: Saurabh Jha
Date: 2021-03-08T09:32:11Z
New Revision: 63851a701eac1918777535c8e1df2e97c7a01966

URL: https://github.com/llvm/llvm-project/commit/63851a701eac1918777535c8e1df2e97c7a01966
DIFF: https://github.com/llvm/llvm-project/commit/63851a701eac1918777535c8e1df2e97c7a01966.diff

LOG: [Matrix] Implement += and -= for MatrixType.

Make sure CompLHSTy is set correctly for += and -= and matrix type
operands.

Bugzilla ticket is here https://bugs.llvm.org/show_bug.cgi?id=46164

Patch by Saurabh Jha <saurabh.jhaa at gmail.com>

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D98075

Added: 
    

Modified: 
    clang/lib/Sema/SemaExpr.cpp
    clang/test/CodeGen/matrix-type-operators.c
    clang/test/Sema/matrix-type-operators.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5896a7c0bb7b..6a56743ee382 100644

--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10528,7 +10528,11 @@ QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
 
   if (LHS.get()->getType()->isConstantMatrixType() ||
       RHS.get()->getType()->isConstantMatrixType()) {
-    return CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
+    QualType compType =
+        CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
+    if (CompLHSTy)
+      *CompLHSTy = compType;
+    return compType;
   }
 
   QualType compType = UsualArithmeticConversions(
@@ -10628,7 +10632,11 @@ QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS,
 
   if (LHS.get()->getType()->isConstantMatrixType() ||
       RHS.get()->getType()->isConstantMatrixType()) {
-    return CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
+    QualType compType =
+        CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
+    if (CompLHSTy)
+      *CompLHSTy = compType;
+    return compType;
   }
 
   QualType compType = UsualArithmeticConversions(

diff  --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c
index 8b234b1e761d..3af1662c534f 100644
--- a/clang/test/CodeGen/matrix-type-operators.c
+++ b/clang/test/CodeGen/matrix-type-operators.c
@@ -17,6 +17,26 @@ void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) {
   a = b + c;
 }
 
+void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
+  // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[A]], [[B]]
+  // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+
+  a += b;
+}
+
+void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> %a, <25 x double> %b)
+  // CHECK:       [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[A]], [[B]]
+  // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+
+  a -= b;
+}
+
 void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> %a, <6 x float> %b, <6 x float> %c)
   // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
@@ -27,6 +47,26 @@ void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) {
   a = b + c;
 }
 
+void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
+  // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[A]], [[B]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+
+  a += b;
+}
+
+void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> %a, <6 x float> %b)
+  // CHECK:       [[B:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[A:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[A]], [[B]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+
+  a -= b;
+}
+
 void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> %a, float %vf)
   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
@@ -40,6 +80,32 @@ void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
   a = a + vf;
 }
 
+void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
+  // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+
+  a += vf;
+}
+
+void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> %a, float %vf)
+  // CHECK:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+
+  a -= vf;
+}
+
 void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> %a, double %vd)
   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
@@ -52,6 +118,28 @@ void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
   a = a + vd;
 }
 
+void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
+  // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
+  // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+  a += vd;
+}
+
+void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> %a, double %vd)
+  // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
+  // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+  a -= vd;
+}
+
 void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> %b, float %vf)
   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
@@ -64,6 +152,28 @@ void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
   b = b + vf;
 }
 
+void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
+  // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+  b += vf;
+}
+
+void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> %b, float %vf)
+  // CHECK:       [[SCALAR:%.*]] = load float, float* %vf.addr, align 4
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+  b -= vf;
+}
+
 void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> %b, double %vd)
   // CHECK:       [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
@@ -77,6 +187,30 @@ void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
   b = b + vd;
 }
 
+void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
+  // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
+  // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+  b += vd;
+}
+
+void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> %b, double %vd)
+  // CHECK:       [[SCALAR:%.*]] = load double, double* %vd.addr, align 8
+  // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
+  b -= vd;
+}
+
 // Integer matrix/scalar additions
 
 void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
@@ -88,6 +222,24 @@ void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) {
   a = b + c;
 }
 
+void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
+  // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
+  // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
+  // CHECK:       [[RES:%.*]] = add <27 x i32> [[A]], [[B]]
+  // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
+  a += b;
+}
+
+void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> %a, <27 x i32> %b)
+  // CHECK:       [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
+  // CHECK:       [[A:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
+  // CHECK:       [[RES:%.*]] = sub <27 x i32> [[A]], [[B]]
+  // CHECK:       store <27 x i32> [[RES]], <27 x i32>* {{.*}}, align 4
+  a -= b;
+}
+
 void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c)
   // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
@@ -98,6 +250,26 @@ void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c)
   a = b + c;
 }
 
+void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
+  // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[A]], [[B]]
+  // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  a += b;
+}
+
+void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> %a, <8 x i64> %b)
+  // CHECK:       [[B:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:  [[A:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[A]], [[B]]
+  // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  a -= b;
+}
+
 void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
@@ -111,6 +283,32 @@ void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
   a = a + vs;
 }
 
+void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
+  // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a += vs;
+}
+
+void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> %a, i16 signext %vs)
+  // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a -= vs;
+}
+
 void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
@@ -124,6 +322,32 @@ void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
   a = a + vli;
 }
 
+void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
+  // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
+  // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a += vli;
+}
+
+void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> %a, i64 %vli)
+  // CHECK:       [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
+  // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a -= vli;
+}
+
 void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
   // CHECK:        [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
@@ -137,6 +361,32 @@ void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int
   a = a + vulli;
 }
 
+void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
+  // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a += vulli;
+}
+
+void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> %a, i64 %vulli)
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
+  // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+
+  a -= vulli;
+}
+
 void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
   // CHECK:         [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
@@ -150,6 +400,32 @@ void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
   b = vs + b;
 }
 
+void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
+  // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b += vs;
+}
+
+void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> %b, i16 signext %vs)
+  // CHECK:       [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2
+  // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
+  // CHECK-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b -= vs;
+}
+
 void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
   // CHECK:         [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
@@ -162,6 +438,30 @@ void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
   b = vli + b;
 }
 
+void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b += vli;
+}
+
+void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> %b, i64 %vli)
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b -= vli;
+}
+
 void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
   // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long
   // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
@@ -173,6 +473,30 @@ void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned lo
   b = vulli + b;
 }
 
+void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
+  // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b += vulli;
+}
+
+void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) {
+  // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long
+  // CHECK:        [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8
+  // CHECK-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
+  // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
+  // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
+
+  b -= vulli;
+}
+
 // Tests for matrix multiplication.
 
 void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
@@ -189,6 +513,16 @@ void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
   a = b * c;
 }
 
+void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
+  // CHECK-LABEL: @multiply_compound_matrix_matrix_double(
+  // CHECK:        [[C:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:   [[B:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:   [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
+  // CHECK-NEXT:   store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+  // CHECK-NEXT:   ret void
+  b *= c;
+}
+
 typedef int ix3x9_t __attribute__((matrix_type(3, 9)));
 typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
 // CHECK-LABEL: @multiply_matrix_matrix_int(
@@ -218,6 +552,20 @@ void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
   a = a * s;
 }
 
+// CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
+// CHECK:         [[S:%.*]] = load float, float* %s.addr, align 4
+// CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
+// CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
+// CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
+// CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+// CHECK-NEXT:    ret void
+//
+void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
+  a *= s;
+}
+
 // CHECK-LABEL: @multiply_double_matrix_scalar_double(
 // CHECK:         [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
 // CHECK-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8
@@ -231,6 +579,18 @@ void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
   a = a * s;
 }
 
+// CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
+// CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
+// CHECK-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
+// CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
+// CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
+// CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
+// CHECK-NEXT:    ret void
+void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
+  a *= s;
+}
+
 // CHECK-LABEL: @multiply_float_matrix_scalar_double(
 // CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
@@ -245,6 +605,19 @@ void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
   b = s * b;
 }
 
+// CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
+// CHECK:         [[S:%.*]] = load double, double* %s.addr, align 8
+// CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
+// CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
+// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
+// store <6 x float> %3, <6 x float>* %0, align 4
+// ret void
+void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
+  b *= s;
+}
+
 // CHECK-LABEL: @multiply_int_matrix_scalar_short(
 // CHECK:         [[S:%.*]] = load i16, i16* %s.addr, align 2
 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
@@ -259,6 +632,20 @@ void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
   b = s * b;
 }
 
+// CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
+// CHECK:        [[S:%.*]] = load i16, i16* %s.addr, align 2
+// CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
+// CHECK-NEXT:   [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
+// CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
+// CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
+// CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
+// CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:   ret void
+//
+void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
+  b *= s;
+}
+
 // CHECK-LABEL: @multiply_int_matrix_scalar_ull(
 // CHECK:         [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
 // CHECK-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8
@@ -273,6 +660,20 @@ void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
   b = b * s;
 }
 
+void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
+  // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull(
+  // CHECK:         [[S:%.*]] = load i64, i64* %s.addr, align 8
+  // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
+  // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
+  // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
+  // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
+  // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
+  // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+  // CHECK-NEXT:    ret void
+
+  b *= s;
+}
+
 // CHECK-LABEL: @multiply_float_matrix_constant(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
@@ -287,6 +688,19 @@ void multiply_float_matrix_constant(fx2x3_t a) {
   a = a * 2.5;
 }
 
+// CHECK-LABEL: @multiply_compound_float_matrix_constant(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [6 x float], align 4
+// CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
+// CHECK-NEXT:    store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
+// CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    ret void
+void multiply_compound_float_matrix_constant(fx2x3_t a) {
+  a *= 2.5;
+}
+
 // CHECK-LABEL: @multiply_int_matrix_constant(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
@@ -301,6 +715,20 @@ void multiply_int_matrix_constant(ix9x3_t a) {
   a = 5 * a;
 }
 
+// CHECK-LABEL: @multiply_compound_int_matrix_constant(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [27 x i32], align 4
+// CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
+// CHECK-NEXT:    store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+// CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
+// CHECK-NEXT:    ret void
+//
+void multiply_compound_int_matrix_constant(ix9x3_t a) {
+  a *= 5;
+}
+
 // Tests for the matrix type operators.
 
 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));

diff  --git a/clang/test/Sema/matrix-type-operators.c b/clang/test/Sema/matrix-type-operators.c
index 397fc3513557..fc857854af35 100644
--- a/clang/test/Sema/matrix-type-operators.c
+++ b/clang/test/Sema/matrix-type-operators.c
@@ -8,6 +8,9 @@ void add(sx10x10_t a, sx5x10_t b, sx10x5_t c) {
   a = b + c;
   // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}}
 
+  b += c;
+  // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}}
+
   a = b + b; // expected-error {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}}
 
   a = 10 + b;
@@ -16,12 +19,19 @@ void add(sx10x10_t a, sx5x10_t b, sx10x5_t c) {
   a = b + &c;
   // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*'))}}
   // expected-error at -2 {{casting 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*') to incompatible type 'float'}}
+
+  b += &c;
+  // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*'))}}
+  // expected-error at -2 {{casting 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*') to incompatible type 'float'}}
 }
 
 void sub(sx10x10_t a, sx5x10_t b, sx10x5_t c) {
   a = b - c;
   // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}}
 
+  b -= c;
+  // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t' (aka 'float __attribute__((matrix_type(10, 5)))'))}}
+
   a = b - b; // expected-error {{assigning to 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') from incompatible type 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))')}}
 
   a = 10 - b;
@@ -30,6 +40,10 @@ void sub(sx10x10_t a, sx5x10_t b, sx10x5_t c) {
   a = b - &c;
   // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*'))}}
   // expected-error at -2 {{casting 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*') to incompatible type 'float'}}
+
+  b -= &c;
+  // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*'))}}
+  // expected-error at -2 {{casting 'sx10x5_t *' (aka 'float  __attribute__((matrix_type(10, 5)))*') to incompatible type 'float'}}
 }
 
 typedef int ix10x5_t __attribute__((matrix_type(10, 5)));
@@ -39,12 +53,16 @@ void matrix_matrix_multiply(sx10x10_t a, sx5x10_t b, ix10x5_t c, ix10x10_t d, fl
   // Check dimension mismatches.
   a = a * b;
   // expected-error at -1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))'))}}
+  a *= b;
+  // expected-error at -1 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))'))}}
   b = a * a;
   // expected-error at -1 {{assigning to 'sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') from incompatible type 'float __attribute__((matrix_type(10, 10)))'}}
 
   // Check element type mismatches.
   a = b * c;
   // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'ix10x5_t' (aka 'int __attribute__((matrix_type(10, 5)))'))}}
+  b *= c;
+  // expected-error at -1 {{invalid operands to binary expression ('sx5x10_t' (aka 'float __attribute__((matrix_type(5, 10)))') and 'ix10x5_t' (aka 'int __attribute__((matrix_type(10, 5)))'))}}
   d = a * a;
   // expected-error at -1 {{assigning to 'ix10x10_t' (aka 'int __attribute__((matrix_type(10, 10)))') from incompatible type 'float __attribute__((matrix_type(10, 10)))'}}
 
@@ -62,9 +80,15 @@ void mat_scalar_multiply(sx10x10_t a, sx5x10_t b, float sf, char *p) {
   a = a * p;
   // expected-error at -1 {{casting 'char *' to incompatible type 'float'}}
   // expected-error at -2 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'char *')}}
+  a *= p;
+  // expected-error at -1 {{casting 'char *' to incompatible type 'float'}}
+  // expected-error at -2 {{invalid operands to binary expression ('sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))') and 'char *')}}
   a = p * a;
   // expected-error at -1 {{casting 'char *' to incompatible type 'float'}}
   // expected-error at -2 {{invalid operands to binary expression ('char *' and 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))'))}}
+  p *= a;
+  // expected-error at -1 {{casting 'char *' to incompatible type 'float'}}
+  // expected-error at -2 {{invalid operands to binary expression ('char *' and 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))'))}}
 
   sf = a * sf;
   // expected-error at -1 {{assigning to 'float' from incompatible type 'sx10x10_t' (aka 'float __attribute__((matrix_type(10, 10)))')}}