[llvm] [X86] Support hoisting load/store with conditional faulting (PR #95515)

Sun Jun 23 09:56:05 PDT 2024

================
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64 -mattr=+cf -verify-machineinstrs | FileCheck %s
+
+define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: basic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    sete %dil
+; CHECK-NEXT:    negb %dil
+; CHECK-NEXT:    cfcmovnel (%rsi), %esi
+; CHECK-NEXT:    cfcmovnel %esi, (%rdx)
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    cfcmovneq %rax, (%rdx)
+; CHECK-NEXT:    movw $2, %ax
+; CHECK-NEXT:    cfcmovnew %ax, (%rcx)
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp eq i32 %a, 0
+  %0 = bitcast i1 %cond to <1 x i1>
+  %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i32> poison)
+  call void @llvm.masked.store.v1i32.p0(<1 x i32> %1, ptr %p, i32 4, <1 x i1> %0)
+  %2 = xor i1 %cond, true
+  %3 = bitcast i1 %2 to <1 x i1>
+  call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr %p, i32 8, <1 x i1> %3)
+  call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr %q, i32 8, <1 x i1> %3)
+  ret void
+}
+
+define i16 @cload_passthru_zero(i16 %a, ptr %b) {
+; CHECK-LABEL: cload_passthru_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testw %di, %di
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    cfcmovnew (%rsi), %ax
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp eq i16 %a, 0
+  %0 = bitcast i1 %cond to <1 x i1>
+  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
+  %2 = bitcast <1 x i16> %1 to i16
+  ret i16 %2
+}
+
+define i64 @cload_passthru_not_zero(i64 %a, ptr %b) {
+; CHECK-LABEL: cload_passthru_not_zero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    cfcmovneq (%rsi), %rdi, %rax
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp eq i64 %a, 0
+  %0 = bitcast i1 %cond to <1 x i1>
+  %va = bitcast i64 %a to <1 x i64>
+  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> %va)
+  %2 = bitcast <1 x i64> %1 to i64
+  ret i64 %2
+}
+
+define i16 @cond_false(ptr %b) {
+; CHECK-LABEL: cond_false:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    cfcmovnew (%rdi), %ax
+; CHECK-NEXT:    retq
+entry:
+  %0 = bitcast i1 false to <1 x i1>
+  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
+  %2 = bitcast <1 x i16> %1 to i16
+  ret i16 %2
+}
+
+define i64 @cond_true(ptr %b) {
+; CHECK-LABEL: cond_true:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    cfcmovneq (%rdi), %rax
+; CHECK-NEXT:    retq
----------------
KanRobert wrote:

No need to optimize the generated assembly for cond_false/cond_true b/c it should never be emitted by middle-end.
We check the IR just to check it's legal to feed backend with such IR.

https://github.com/llvm/llvm-project/pull/95515