[llvm] r314251 - [AArch64][Falkor] Fix correctness bug in falkor prefetcher fix pass and correct some opcode tag computations.

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 14:40:41 PDT 2017


Author: gberry
Date: Tue Sep 26 14:40:41 2017
New Revision: 314251

URL: http://llvm.org/viewvc/llvm-project?rev=314251&view=rev
Log:
[AArch64][Falkor] Fix correctness bug in falkor prefetcher fix pass and correct some opcode tag computations.

Summary:
This addresses a correctness bug for LD[1234]*_POST opcodes that have
the prefetcher fix applied to them: the base register was not being
written back from the temp after being incremented, so it would appear
to never be incremented.

Also, fix some opcode tag computations based on some updated HW details
to get better tag avoidance and thus better prefetcher performance.

Reviewers: mcrosier

Subscribers: aemerson, rengolin, javed.absar, kristof.beyls

Differential Revision: https://reviews.llvm.org/D38256

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
    llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir

Modified: llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp?rev=314251&r1=314250&r2=314251&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp Tue Sep 26 14:40:41 2017
@@ -240,27 +240,27 @@ static Optional<LoadInfo> getLoadInfo(co
   default:
     return None;
 
+  case AArch64::LD1i64:
+  case AArch64::LD2i64:
+    DestRegIdx = 0;
+    BaseRegIdx = 3;
+    OffsetIdx = -1;
+    IsPrePost = false;
+    break;
+
   case AArch64::LD1i8:
   case AArch64::LD1i16:
   case AArch64::LD1i32:
-  case AArch64::LD1i64:
   case AArch64::LD2i8:
   case AArch64::LD2i16:
   case AArch64::LD2i32:
-  case AArch64::LD2i64:
   case AArch64::LD3i8:
   case AArch64::LD3i16:
   case AArch64::LD3i32:
+  case AArch64::LD3i64:
   case AArch64::LD4i8:
   case AArch64::LD4i16:
   case AArch64::LD4i32:
-    DestRegIdx = 0;
-    BaseRegIdx = 3;
-    OffsetIdx = -1;
-    IsPrePost = false;
-    break;
-
-  case AArch64::LD3i64:
   case AArch64::LD4i64:
     DestRegIdx = -1;
     BaseRegIdx = 3;
@@ -284,23 +284,16 @@ static Optional<LoadInfo> getLoadInfo(co
   case AArch64::LD1Rv4s:
   case AArch64::LD1Rv8h:
   case AArch64::LD1Rv16b:
-  case AArch64::LD1Twov1d:
-  case AArch64::LD1Twov2s:
-  case AArch64::LD1Twov4h:
-  case AArch64::LD1Twov8b:
-  case AArch64::LD2Twov2s:
-  case AArch64::LD2Twov4s:
-  case AArch64::LD2Twov8b:
-  case AArch64::LD2Rv1d:
-  case AArch64::LD2Rv2s:
-  case AArch64::LD2Rv4s:
-  case AArch64::LD2Rv8b:
     DestRegIdx = 0;
     BaseRegIdx = 1;
     OffsetIdx = -1;
     IsPrePost = false;
     break;
 
+  case AArch64::LD1Twov1d:
+  case AArch64::LD1Twov2s:
+  case AArch64::LD1Twov4h:
+  case AArch64::LD1Twov8b:
   case AArch64::LD1Twov2d:
   case AArch64::LD1Twov4s:
   case AArch64::LD1Twov8h:
@@ -321,10 +314,17 @@ static Optional<LoadInfo> getLoadInfo(co
   case AArch64::LD1Fourv4s:
   case AArch64::LD1Fourv8h:
   case AArch64::LD1Fourv16b:
+  case AArch64::LD2Twov2s:
+  case AArch64::LD2Twov4s:
+  case AArch64::LD2Twov8b:
   case AArch64::LD2Twov2d:
   case AArch64::LD2Twov4h:
   case AArch64::LD2Twov8h:
   case AArch64::LD2Twov16b:
+  case AArch64::LD2Rv1d:
+  case AArch64::LD2Rv2s:
+  case AArch64::LD2Rv4s:
+  case AArch64::LD2Rv8b:
   case AArch64::LD2Rv2d:
   case AArch64::LD2Rv4h:
   case AArch64::LD2Rv8h:
@@ -365,32 +365,32 @@ static Optional<LoadInfo> getLoadInfo(co
     IsPrePost = false;
     break;
 
+  case AArch64::LD1i64_POST:
+  case AArch64::LD2i64_POST:
+    DestRegIdx = 1;
+    BaseRegIdx = 4;
+    OffsetIdx = 5;
+    IsPrePost = true;
+    break;
+
   case AArch64::LD1i8_POST:
   case AArch64::LD1i16_POST:
   case AArch64::LD1i32_POST:
-  case AArch64::LD1i64_POST:
   case AArch64::LD2i8_POST:
   case AArch64::LD2i16_POST:
   case AArch64::LD2i32_POST:
-  case AArch64::LD2i64_POST:
   case AArch64::LD3i8_POST:
   case AArch64::LD3i16_POST:
   case AArch64::LD3i32_POST:
+  case AArch64::LD3i64_POST:
   case AArch64::LD4i8_POST:
   case AArch64::LD4i16_POST:
   case AArch64::LD4i32_POST:
-    DestRegIdx = 1;
-    BaseRegIdx = 4;
-    OffsetIdx = 5;
-    IsPrePost = false;
-    break;
-
-  case AArch64::LD3i64_POST:
   case AArch64::LD4i64_POST:
     DestRegIdx = -1;
     BaseRegIdx = 4;
     OffsetIdx = 5;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
   case AArch64::LD1Onev1d_POST:
@@ -409,23 +409,16 @@ static Optional<LoadInfo> getLoadInfo(co
   case AArch64::LD1Rv4s_POST:
   case AArch64::LD1Rv8h_POST:
   case AArch64::LD1Rv16b_POST:
-  case AArch64::LD1Twov1d_POST:
-  case AArch64::LD1Twov2s_POST:
-  case AArch64::LD1Twov4h_POST:
-  case AArch64::LD1Twov8b_POST:
-  case AArch64::LD2Twov2s_POST:
-  case AArch64::LD2Twov4s_POST:
-  case AArch64::LD2Twov8b_POST:
-  case AArch64::LD2Rv1d_POST:
-  case AArch64::LD2Rv2s_POST:
-  case AArch64::LD2Rv4s_POST:
-  case AArch64::LD2Rv8b_POST:
     DestRegIdx = 1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
+  case AArch64::LD1Twov1d_POST:
+  case AArch64::LD1Twov2s_POST:
+  case AArch64::LD1Twov4h_POST:
+  case AArch64::LD1Twov8b_POST:
   case AArch64::LD1Twov2d_POST:
   case AArch64::LD1Twov4s_POST:
   case AArch64::LD1Twov8h_POST:
@@ -446,10 +439,17 @@ static Optional<LoadInfo> getLoadInfo(co
   case AArch64::LD1Fourv4s_POST:
   case AArch64::LD1Fourv8h_POST:
   case AArch64::LD1Fourv16b_POST:
+  case AArch64::LD2Twov2s_POST:
+  case AArch64::LD2Twov4s_POST:
+  case AArch64::LD2Twov8b_POST:
   case AArch64::LD2Twov2d_POST:
   case AArch64::LD2Twov4h_POST:
   case AArch64::LD2Twov8h_POST:
   case AArch64::LD2Twov16b_POST:
+  case AArch64::LD2Rv1d_POST:
+  case AArch64::LD2Rv2s_POST:
+  case AArch64::LD2Rv4s_POST:
+  case AArch64::LD2Rv8b_POST:
   case AArch64::LD2Rv2d_POST:
   case AArch64::LD2Rv4h_POST:
   case AArch64::LD2Rv8h_POST:
@@ -487,7 +487,7 @@ static Optional<LoadInfo> getLoadInfo(co
     DestRegIdx = -1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
-    IsPrePost = false;
+    IsPrePost = true;
     break;
 
   case AArch64::LDRBBroW:
@@ -592,16 +592,19 @@ static Optional<LoadInfo> getLoadInfo(co
     IsPrePost = true;
     break;
 
+  case AArch64::LDNPDi:
+  case AArch64::LDNPQi:
+  case AArch64::LDNPSi:
   case AArch64::LDPQi:
+  case AArch64::LDPDi:
+  case AArch64::LDPSi:
     DestRegIdx = -1;
     BaseRegIdx = 2;
     OffsetIdx = 3;
     IsPrePost = false;
     break;
 
-  case AArch64::LDPDi:
   case AArch64::LDPSWi:
-  case AArch64::LDPSi:
   case AArch64::LDPWi:
   case AArch64::LDPXi:
     DestRegIdx = 0;
@@ -612,18 +615,18 @@ static Optional<LoadInfo> getLoadInfo(co
 
   case AArch64::LDPQpost:
   case AArch64::LDPQpre:
+  case AArch64::LDPDpost:
+  case AArch64::LDPDpre:
+  case AArch64::LDPSpost:
+  case AArch64::LDPSpre:
     DestRegIdx = -1;
     BaseRegIdx = 3;
     OffsetIdx = 4;
     IsPrePost = true;
     break;
 
-  case AArch64::LDPDpost:
-  case AArch64::LDPDpre:
   case AArch64::LDPSWpost:
   case AArch64::LDPSWpre:
-  case AArch64::LDPSpost:
-  case AArch64::LDPSpre:
   case AArch64::LDPWpost:
   case AArch64::LDPWpre:
   case AArch64::LDPXpost:

Modified: llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir?rev=314251&r1=314250&r2=314251&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/falkor-hwpf-fix.mir Tue Sep 26 14:40:41 2017
@@ -1,12 +1,7 @@
 # RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s
---- |
-  @g = external global i32
-
-  define void @hwpf1() { ret void }
-  define void @hwpf2() { ret void }
-...
 ---
-# Verify that the tag collision between the loads is resolved.
+# Verify that the tag collision between the loads is resolved for various load opcodes.
+
 # CHECK-LABEL: name: hwpf1
 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
 # CHECK: LDRWui %[[BASE]], 0
@@ -17,7 +12,7 @@ body: |
   bb.0:
     liveins: %w0, %x1
 
-    %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+    %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
     %w2 = LDRWui %x1, 1
 
     %w0 = SUBWri %w0, 1, 0
@@ -28,23 +23,283 @@ body: |
     RET_ReallyLR
 ...
 ---
-# Verify that the tag collision between the loads is resolved and written back for post increment addressing.
 # CHECK-LABEL: name: hwpf2
 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name:            hwpf2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %q2
+
+    %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+    %w2 = LDRWui %x1, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name:            hwpf3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %q2
+
+    %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+    %w0 = LDRWui %x1, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Onev1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name:            hwpf4
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1
+
+    %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4)
+    %w2 = LDRWui %x1, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Twov1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name:            hwpf5
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1
+
+    %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4)
+    %w0 = LDRWui %x1, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPQi %[[BASE]]
+# CHECK: LDRWui %x1, 3
+name:            hwpf6
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1
+
+    %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4)
+    %w0 = LDRWui %x1, 3
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXi %[[BASE]]
+# CHECK: LDRWui %x1, 2
+name:            hwpf7
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1
+
+    %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4)
+    %w2 = LDRWui %x1, 2
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# Verify that the tag collision between the loads is resolved and written back
+# for post increment addressing for various load opcodes.
+
+# CHECK-LABEL: name: hwpfinc1
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
 # CHECK: LDRWpost %[[BASE]], 0
 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
 # CHECK: LDRWui %x1, 1
-name:            hwpf2
+name:            hwpfinc1
 tracksRegLiveness: true
 body: |
   bb.0:
     liveins: %w0, %x1
 
-    %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+    %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4)
     %w2 = LDRWui %x1, 1
 
     %w0 = SUBWri %w0, 1, 0
     %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc2
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 1
+name:            hwpfinc2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %q2
+
+    %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+    %w2 = LDRWui %x1, 132
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 132
+name:            hwpfinc3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %q2
+
+    %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+    %w0 = LDRWui %x1, 132
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Rv1d_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 252
+name:            hwpfinc4
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %q2
+
+    %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4)
+    %w2 = LDRWui %x1, 252
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD3Threev2s_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWroX %x17, %x0
+name:            hwpfinc5
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %x17, %q2
+
+    %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4)
+    %w0 = LDRWroX %x17, %x0, 0, 0
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPDpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name:            hwpfinc6
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %x17, %q2
+
+    %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4)
+    %w16 = LDRWui %x17, 2
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+    Bcc 9, %bb.0, implicit %nzcv
+
+  bb.1:
+    RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name:            hwpfinc7
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %w0, %x1, %x17, %q2
+
+    %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4)
+    %w18 = LDRWui %x17, 2
+
+    %w0 = SUBWri %w0, 1, 0
+    %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
     Bcc 9, %bb.0, implicit %nzcv
 
   bb.1:




More information about the llvm-commits mailing list