[llvm] [X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic (PR #106370)

Tue Sep 17 02:55:21 PDT 2024

================
@@ -58159,6 +58160,25 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue combineCanonicalize(SDNode *N, SelectionDAG &DAG) {
+  SDValue Operand = N->getOperand(0);
+  EVT VT = Operand.getValueType();
+  SDLoc dl(N);
+
+  // Canonicalize scalar variable FP Nodes.
+  SDValue One =
+      DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));
----------------
pawan-nirpal-031 wrote:

I tried this suggestion, But I'm running into a crash for f80 scalar input, What I realized while debugging though is that changeTypeToInteger may not be required, I did following changes and I see that vector inputs are handled pretty seamlessly, 

Change 
```
-  // Canonicalize scalar variable FP Nodes.
-  SDValue One =
-      DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));
+  SDValue One = DAG.getConstantFP(1.0, dl, VT);
+
```



input 
```
define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) {
  %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a)
  ret <4 x float> %canonicalized
}

```

result 
```
.LCPI9_0:
	.long	0x3f800000                      # float 1
	.long	0x3f800000                      # float 1
	.long	0x3f800000                      # float 1
	.long	0x3f800000                      # float 1
	.text
	.globl	canon_fp32_varargsv4f32
	.p2align	4, 0x90
	.type	canon_fp32_varargsv4f32, at function
canon_fp32_varargsv4f32:                # @canon_fp32_varargsv4f32
	.cfi_startproc
# %bb.0:
	mulps	.LCPI9_0(%rip), %xmm0
```

input 
```
define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) {
  %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a)
  ret <4 x double> %canonicalized
}
```

result
```
.LCPI10_0:
	.quad	0x3ff0000000000000              # double 1
	.quad	0x3ff0000000000000              # double 1
	.text
	.globl	canon_fp64_varargsv4f64
	.p2align	4, 0x90
	.type	canon_fp64_varargsv4f64, at function
canon_fp64_varargsv4f64:                # @canon_fp64_varargsv4f64
	.cfi_startproc
# %bb.0:
	movapd	.LCPI10_0(%rip), %xmm2          # xmm2 = [1.0E+0,1.0E+0]
	mulpd	%xmm2, %xmm0
	mulpd	%xmm2, %xmm1
	retq
```

input 
```
define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 {
  %val = load <4 x x86_fp80>, <4 x x86_fp80> addrspace(1)* %out
  %canonicalized = call <4 x x86_fp80> @llvm.canonicalize.f80(<4 x x86_fp80> %val)
  store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out
  ret void
}
```

result
```
# %bb.0:
	fldt	30(%rdi)
	fldt	20(%rdi)
	fldt	10(%rdi)
	fldt	(%rdi)
	fld1
	fmul	%st, %st(1)
	fmul	%st, %st(2)
	fmul	%st, %st(3)
	fmulp	%st, %st(4)
	fxch	%st(3)
	fstpt	30(%rdi)
	fxch	%st(1)
	fstpt	20(%rdi)
	fstpt	10(%rdi)
	fstpt	(%rdi)
	retq
```

https://github.com/llvm/llvm-project/pull/106370