[llvm] r259134 - [AArch64] Fix i64 nontemporal high-half extraction.

Ahmed Bougacha via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 28 17:08:42 PST 2016


Author: ab
Date: Thu Jan 28 19:08:41 2016
New Revision: 259134

URL: http://llvm.org/viewvc/llvm-project?rev=259134&view=rev
Log:
[AArch64] Fix i64 nontemporal high-half extraction.

Since we only have pair - not single - nontemporal store instructions,
we have to extract the high part into a separate register to be able
to use them.

When the initial nontemporal codegen support was added, I wrote the
extract using the nonsensical UBFX [0,32[.
Use the correct LSR form instead.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/trunk/test/CodeGen/AArch64/nontemporal.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=259134&r1=259133&r2=259134&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Jan 28 19:08:41 2016
@@ -5982,7 +5982,7 @@ def : NTStore64Pat<v8i8>;
 def : Pat<(nontemporalstore GPR64:$Rt,
             (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
           (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
-                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
+                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
                   GPR64sp:$Rn, simm7s4:$offset)>;
 } // AddedComplexity=10
 } // Predicates = [IsLE]

Modified: llvm/trunk/test/CodeGen/AArch64/nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/nontemporal.ll?rev=259134&r1=259133&r2=259134&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/nontemporal.ll Thu Jan 28 19:08:41 2016
@@ -112,7 +112,7 @@ define void @test_stnp_v1i64(<1 x i64>*
 
 define void @test_stnp_i64(i64* %p, i64 %v) #0 {
 ; CHECK-LABEL: test_stnp_i64:
-; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
 ; CHECK-NEXT:  stnp w1, w[[HI]], [x0]
 ; CHECK-NEXT:  ret
   store i64 %v, i64* %p, align 1, !nontemporal !0
@@ -162,7 +162,7 @@ define void @test_stnp_v2f32_offset_neg(
 
 define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
 ; CHECK-LABEL: test_stnp_i64_offset:
-; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
 ; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr i64, i64* %p, i32 1
@@ -172,7 +172,7 @@ define void @test_stnp_i64_offset(i64* %
 
 define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
 ; CHECK-LABEL: test_stnp_i64_offset_neg:
-; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  lsr x[[HI:[0-9]+]], x1, #32
 ; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #-8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr i64, i64* %p, i32 -1




More information about the llvm-commits mailing list