Skip to content

Commit 1ccc6c8

Browse files
committed
Adding missed optimisation
1 parent 0ca5720 commit 1ccc6c8

File tree

3 files changed

+57
-47
lines changed

3 files changed

+57
-47
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -4301,6 +4301,29 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
43014301
Op.getOperand(1));
43024302
}
43034303

4304+
// Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
4305+
// a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS
4306+
// (AND X Y) Z which produces a better opt with EmitComparison
4307+
static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
4308+
SelectionDAG &DAG, const SDLoc dl) {
4309+
if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) {
4310+
ConstantSDNode *LHSConstOp = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
4311+
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4312+
if (LHSConstOp && RHSConst) {
4313+
uint64_t lhsConstValue = LHSConstOp->getZExtValue();
4314+
uint64_t rhsConstant = RHSConst->getZExtValue();
4315+
if (isPowerOf2_64(rhsConstant)) {
4316+
uint64_t newMaskValue = lhsConstValue & ~(rhsConstant - 1);
4317+
LHS =
4318+
DAG.getNode(ISD::AND, dl, LHS.getValueType(), LHS.getOperand(0),
4319+
DAG.getConstant(newMaskValue, dl, LHS.getValueType()));
4320+
RHS = DAG.getConstant(0, dl, RHS.getValueType());
4321+
CC = ISD::SETEQ;
4322+
}
4323+
}
4324+
}
4325+
}
4326+
43044327
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
43054328
SelectionDAG &DAG) const {
43064329
EVT VT = Op.getValueType();
@@ -10589,6 +10612,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1058910612
}
1059010613

1059110614
if (LHS.getValueType().isInteger()) {
10615+
10616+
simplifySetCCIntoEq(CC, LHS, RHS, DAG, dl);
10617+
1059210618
SDValue CCVal;
1059310619
SDValue Cmp = getAArch64Cmp(
1059410620
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);

llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll

+21-32
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
define i1 @lt8_u8(i8 %0) {
66
; CHECK-LABEL: lt8_u8:
77
; CHECK: // %bb.0:
8-
; CHECK-NEXT: and w8, w0, #0xff
9-
; CHECK-NEXT: cmp w8, #8
10-
; CHECK-NEXT: cset w0, lo
8+
; CHECK-NEXT: tst w0, #0xf8
9+
; CHECK-NEXT: cset w0, eq
1110
; CHECK-NEXT: ret
1211
%2 = icmp ult i8 %0, 8
1312
ret i1 %2
@@ -16,9 +15,8 @@ define i1 @lt8_u8(i8 %0) {
1615
define i1 @lt32_u8(i8 %0) {
1716
; CHECK-LABEL: lt32_u8:
1817
; CHECK: // %bb.0:
19-
; CHECK-NEXT: and w8, w0, #0xff
20-
; CHECK-NEXT: cmp w8, #32
21-
; CHECK-NEXT: cset w0, lo
18+
; CHECK-NEXT: tst w0, #0xe0
19+
; CHECK-NEXT: cset w0, eq
2220
; CHECK-NEXT: ret
2321
%2 = icmp ult i8 %0, 32
2422
ret i1 %2
@@ -27,9 +25,8 @@ define i1 @lt32_u8(i8 %0) {
2725
define i1 @lt64_u8(i8 %0) {
2826
; CHECK-LABEL: lt64_u8:
2927
; CHECK: // %bb.0:
30-
; CHECK-NEXT: and w8, w0, #0xff
31-
; CHECK-NEXT: cmp w8, #64
32-
; CHECK-NEXT: cset w0, lo
28+
; CHECK-NEXT: tst w0, #0xc0
29+
; CHECK-NEXT: cset w0, eq
3330
; CHECK-NEXT: ret
3431
%2 = icmp ult i8 %0, 64
3532
ret i1 %2
@@ -98,10 +95,9 @@ define i1 @lt64_u64(i64 %0) {
9895
define i1 @lt8_u16_and_5(i8 %0) {
9996
; CHECK-LABEL: lt8_u16_and_5:
10097
; CHECK: // %bb.0:
101-
; CHECK-NEXT: mov w8, #5 // =0x5
102-
; CHECK-NEXT: and w8, w0, w8
103-
; CHECK-NEXT: cmp w8, #16
104-
; CHECK-NEXT: cset w0, lo
98+
; CHECK-NEXT: mov w8, wzr
99+
; CHECK-NEXT: cmp w8, #0
100+
; CHECK-NEXT: cset w0, eq
105101
; CHECK-NEXT: ret
106102
%2 = and i8 %0, 5
107103
%3 = icmp ult i8 %2, 16
@@ -111,10 +107,8 @@ define i1 @lt8_u16_and_5(i8 %0) {
111107
define i1 @lt8_u16_and_19(i8 %0) {
112108
; CHECK-LABEL: lt8_u16_and_19:
113109
; CHECK: // %bb.0:
114-
; CHECK-NEXT: mov w8, #19 // =0x13
115-
; CHECK-NEXT: and w8, w0, w8
116-
; CHECK-NEXT: cmp w8, #16
117-
; CHECK-NEXT: cset w0, lo
110+
; CHECK-NEXT: tst w0, #0x10
111+
; CHECK-NEXT: cset w0, eq
118112
; CHECK-NEXT: ret
119113
%2 = and i8 %0, 19
120114
%3 = icmp ult i8 %2, 16
@@ -124,9 +118,9 @@ define i1 @lt8_u16_and_19(i8 %0) {
124118
define i1 @lt32_u16_and_7(i32 %0) {
125119
; CHECK-LABEL: lt32_u16_and_7:
126120
; CHECK: // %bb.0:
127-
; CHECK-NEXT: and w8, w0, #0x7
128-
; CHECK-NEXT: cmp w8, #16
129-
; CHECK-NEXT: cset w0, lo
121+
; CHECK-NEXT: mov w8, wzr
122+
; CHECK-NEXT: cmp w8, #0
123+
; CHECK-NEXT: cset w0, eq
130124
; CHECK-NEXT: ret
131125
%2 = and i32 %0, 7
132126
%3 = icmp ult i32 %2, 16
@@ -136,10 +130,8 @@ define i1 @lt32_u16_and_7(i32 %0) {
136130
define i1 @lt32_u16_and_21(i32 %0) {
137131
; CHECK-LABEL: lt32_u16_and_21:
138132
; CHECK: // %bb.0:
139-
; CHECK-NEXT: mov w8, #21 // =0x15
140-
; CHECK-NEXT: and w8, w0, w8
141-
; CHECK-NEXT: cmp w8, #16
142-
; CHECK-NEXT: cset w0, lo
133+
; CHECK-NEXT: tst w0, #0x10
134+
; CHECK-NEXT: cset w0, eq
143135
; CHECK-NEXT: ret
144136
%2 = and i32 %0, 21
145137
%3 = icmp ult i32 %2, 16
@@ -149,10 +141,9 @@ define i1 @lt32_u16_and_21(i32 %0) {
149141
define i1 @lt64_u16_and_9(i64 %0) {
150142
; CHECK-LABEL: lt64_u16_and_9:
151143
; CHECK: // %bb.0:
152-
; CHECK-NEXT: mov w8, #9 // =0x9
153-
; CHECK-NEXT: and x8, x0, x8
154-
; CHECK-NEXT: cmp x8, #16
155-
; CHECK-NEXT: cset w0, lo
144+
; CHECK-NEXT: mov x8, xzr
145+
; CHECK-NEXT: cmp x8, #0
146+
; CHECK-NEXT: cset w0, eq
156147
; CHECK-NEXT: ret
157148
%2 = and i64 %0, 9
158149
%3 = icmp ult i64 %2, 16
@@ -162,10 +153,8 @@ define i1 @lt64_u16_and_9(i64 %0) {
162153
define i1 @lt64_u16_and_23(i64 %0) {
163154
; CHECK-LABEL: lt64_u16_and_23:
164155
; CHECK: // %bb.0:
165-
; CHECK-NEXT: mov w8, #23 // =0x17
166-
; CHECK-NEXT: and x8, x0, x8
167-
; CHECK-NEXT: cmp x8, #16
168-
; CHECK-NEXT: cset w0, lo
156+
; CHECK-NEXT: tst x0, #0x10
157+
; CHECK-NEXT: cset w0, eq
169158
; CHECK-NEXT: ret
170159
%2 = and i64 %0, 23
171160
%3 = icmp ult i64 %2, 16

llvm/test/CodeGen/AArch64/signed-truncation-check.ll

+10-15
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,8 @@ define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
287287
; CHECK-LABEL: add_ultcmp_bad_i16_i8_add:
288288
; CHECK: // %bb.0:
289289
; CHECK-NEXT: add w8, w0, w1
290-
; CHECK-NEXT: and w8, w8, #0xffff
291-
; CHECK-NEXT: cmp w8, #256
292-
; CHECK-NEXT: cset w0, lo
290+
; CHECK-NEXT: tst w8, #0xff00
291+
; CHECK-NEXT: cset w0, eq
293292
; CHECK-NEXT: ret
294293
%tmp0 = add i16 %x, %y
295294
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -328,9 +327,8 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
328327
; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
329328
; CHECK: // %bb.0:
330329
; CHECK-NEXT: add w8, w0, #192
331-
; CHECK-NEXT: and w8, w8, #0xffff
332-
; CHECK-NEXT: cmp w8, #256
333-
; CHECK-NEXT: cset w0, lo
330+
; CHECK-NEXT: tst w8, #0xff00
331+
; CHECK-NEXT: cset w0, eq
334332
; CHECK-NEXT: ret
335333
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
336334
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -356,9 +354,8 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
356354
; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic:
357355
; CHECK: // %bb.0:
358356
; CHECK-NEXT: add w8, w0, #64
359-
; CHECK-NEXT: and w8, w8, #0xffff
360-
; CHECK-NEXT: cmp w8, #256
361-
; CHECK-NEXT: cset w0, lo
357+
; CHECK-NEXT: tst w8, #0xff00
358+
; CHECK-NEXT: cset w0, eq
362359
; CHECK-NEXT: ret
363360
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
364361
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -370,9 +367,8 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
370367
; CHECK-LABEL: add_ultcmp_bad_i16_i4:
371368
; CHECK: // %bb.0:
372369
; CHECK-NEXT: add w8, w0, #8
373-
; CHECK-NEXT: and w8, w8, #0xffff
374-
; CHECK-NEXT: cmp w8, #16
375-
; CHECK-NEXT: cset w0, lo
370+
; CHECK-NEXT: tst w8, #0xfff0
371+
; CHECK-NEXT: cset w0, eq
376372
; CHECK-NEXT: ret
377373
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
378374
%tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
@@ -384,9 +380,8 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
384380
; CHECK-LABEL: add_ultcmp_bad_i24_i8:
385381
; CHECK: // %bb.0:
386382
; CHECK-NEXT: add w8, w0, #128
387-
; CHECK-NEXT: and w8, w8, #0xffffff
388-
; CHECK-NEXT: cmp w8, #256
389-
; CHECK-NEXT: cset w0, lo
383+
; CHECK-NEXT: tst w8, #0xffff00
384+
; CHECK-NEXT: cset w0, eq
390385
; CHECK-NEXT: ret
391386
%tmp0 = add i24 %x, 128 ; 1U << (8-1)
392387
%tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8

0 commit comments

Comments
 (0)