[PATCH] [AArch64 NEON] Fix wrong imm used in BIC instruction.
Kevin Qin
kevinqindev at gmail.com
Thu Jan 2 19:24:09 PST 2014
Hi,
For below IR,
define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
ret <4 x i16> %tmp1
}
We expect
bic v0.8h, #0xff, lsl #8
But we got
bic v0.8h, #0x00, lsl #8
which equals
%tmp1 = and <4 x i16> %a, <4 x i16> All_One
It means all high 8 bits don't be set to zero as IR request and result is wrong.
Also, this patch fix some overflow immediate about BIC instruction in test case. Please review. Thanks.
http://llvm-reviews.chandlerc.com/D2500
Files:
lib/Target/AArch64/AArch64InstrNEON.td
test/CodeGen/AArch64/neon-bitwise-instructions.ll
Index: lib/Target/AArch64/AArch64InstrNEON.td
===================================================================
--- lib/Target/AArch64/AArch64InstrNEON.td
+++ lib/Target/AArch64/AArch64InstrNEON.td
@@ -1220,8 +1220,8 @@
!strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (v2i32 VPR64:$src),
- (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))))],
+ (v2i32 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))],
NoItinerary> {
bits<2> Simm;
let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
@@ -1234,8 +1234,8 @@
!strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (v4i32 VPR128:$src),
- (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))))],
+ (v4i32 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))],
NoItinerary> {
bits<2> Simm;
let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
@@ -1249,8 +1249,8 @@
!strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (v4i16 VPR64:$src),
- (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))))],
+ (v4i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))],
NoItinerary> {
bit Simm;
let cmode = {0b1, 0b0, Simm, 0b1};
@@ -1263,8 +1263,8 @@
!strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (v8i16 VPR128:$src),
- (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))))],
+ (v8i16 (neonopnode timm:$Imm,
+ neon_mov_imm_LSL_operand:$Simm)))))],
NoItinerary> {
bit Simm;
let cmode = {0b1, 0b0, Simm, 0b1};
@@ -1350,21 +1350,20 @@
return (HasShift && !ShiftOnesIn); }],
neon_mov_imm_LSLH_transform_XFORM>;
-// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
-// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
+// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8)
+// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff)
def : Pat<(v4i16 (and VPR64:$src,
(v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
- (BICvi_lsl_4H VPR64:$src, 0,
+ (BICvi_lsl_4H VPR64:$src, 255,
neon_mov_imm_LSLH_transform_operand:$Simm)>;
-// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
-// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
+// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8)
+// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff)
def : Pat<(v8i16 (and VPR128:$src,
(v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
- (BICvi_lsl_8H VPR128:$src, 0,
+ (BICvi_lsl_8H VPR128:$src, 255,
neon_mov_imm_LSLH_transform_operand:$Simm)>;
-
multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
SDPatternOperator neonopnode,
Instruction INST4H,
Index: test/CodeGen/AArch64/neon-bitwise-instructions.ll
===================================================================
--- test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -163,19 +163,19 @@
define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #8
- %tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519 >
+ %tmp1 = and <2 x i32> %a, < i32 4294963199, i32 4294963199 >
ret <2 x i32> %tmp1
}
define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #16
- %tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 >
+ %tmp1 = and <2 x i32> %a, < i32 4293918719, i32 4293918719 >
ret <2 x i32> %tmp1
}
define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #24
- %tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159>
+ %tmp1 = and <2 x i32> %a, < i32 4026531839, i32 4026531839>
ret <2 x i32> %tmp1
}
@@ -187,68 +187,68 @@
define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #8
- %tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519 >
+ %tmp1 = and <4 x i32> %a, < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 >
ret <4 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #16
- %tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 >
+ %tmp1 = and <4 x i32> %a, < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
ret <4 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #24
- %tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159>
+ %tmp1 = and <4 x i32> %a, < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839>
ret <4 x i32> %tmp1
}
define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
;CHECK: bic {{v[0-9]+}}.4h, #0x10
- %tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
+ %tmp1 = and <4 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 >
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
-;CHECK: bic {{v[0-9]+}}.4h, #0x0
+;CHECK: bic {{v[0-9]+}}.4h, #0xff
%tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
;CHECK: bic {{v[0-9]+}}.4h, #0x10, lsl #8
- %tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
+ %tmp1 = and <4 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199>
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
-;CHECK: bic {{v[0-9]+}}.4h, #0x0, lsl #8
+;CHECK: bic {{v[0-9]+}}.4h, #0xff, lsl #8
%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
ret <4 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
;CHECK: bic {{v[0-9]+}}.8h, #0x10
- %tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599,
- i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
+ %tmp1 = and <8 x i16> %a, < i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279,
+ i16 4294967279, i16 4294967279, i16 4294967279, i16 4294967279 >
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
-;CHECK: bic {{v[0-9]+}}.8h, #0x0
+;CHECK: bic {{v[0-9]+}}.8h, #0xff
%tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
;CHECK: bic {{v[0-9]+}}.8h, #0x10, lsl #8
- %tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519,
- i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
+ %tmp1 = and <8 x i16> %a, < i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199,
+ i16 4294963199, i16 4294963199, i16 4294963199, i16 4294963199>
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
-;CHECK: bic {{v[0-9]+}}.8h, #0x0, lsl #8
+;CHECK: bic {{v[0-9]+}}.8h, #0xff, lsl #8
%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
ret <8 x i16> %tmp1
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2500.1.patch
Type: text/x-patch
Size: 8646 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140102/990d0a23/attachment.bin>
More information about the llvm-commits
mailing list