[llvm] cb0c034 - [PowerPC] Fix issue where vsrq is given incorrect shift vector

Wed Jan 6 03:56:39 PST 2021

Author: Stefan Pintilie
Date: 2021-01-06T05:56:09-06:00
New Revision: cb0c034edc98b32691ea25b70fc3cc2e9d6d2a86

URL: https://github.com/llvm/llvm-project/commit/cb0c034edc98b32691ea25b70fc3cc2e9d6d2a86
DIFF: https://github.com/llvm/llvm-project/commit/cb0c034edc98b32691ea25b70fc3cc2e9d6d2a86.diff

LOG: [PowerPC] Fix issue where vsrq is given incorrect shift vector

The new Power10 instruction vsrq was being given the wrong shift vector.
The original code assumed that the shift would be found in bits 121 to 127.
This is not correct. The shift is found in bits 57 to 63.
This can be fixed by swaping the first and second double words.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D94113

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 2f29811b20d8..b6e9562dd0f6 100644

--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2521,18 +2521,6 @@ let Predicates = [IsISA3_1] in {
             (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
             (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
-  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
-  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
-  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
-  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
-  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
-  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
-            (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
 
   def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
              (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
@@ -2570,6 +2558,35 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
             (STXVRDX $src, xoaddr:$dst)>;
  }
 
+// FIXME: The swap is overkill when the shift amount is a constant.
+// We should just fix the constant in the DAG.
+let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSLQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSLQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRAQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRAQ v1i128:$VRA,
+                     (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
+                               (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+}
+
 class xxevalPattern <dag pattern, bits<8> imm> :
   Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
 

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
index 5055c02786ac..f02f263c0b43 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -13,6 +13,7 @@
 define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vslq:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vslq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
@@ -24,6 +25,7 @@ entry:
 define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vsrq:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vsrq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
@@ -35,6 +37,7 @@ entry:
 define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vsraq:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vsraq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
@@ -46,6 +49,7 @@ entry:
 define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vslq2:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vslq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
@@ -56,6 +60,7 @@ entry:
 define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vsrq2:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vsrq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:
@@ -66,6 +71,7 @@ entry:
 define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) {
 ; CHECK-LABEL: test_vec_vsraq2:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd v3, v3
 ; CHECK-NEXT:    vsraq v2, v2, v3
 ; CHECK-NEXT:    blr
 entry: