Skip to content

Commit d38bff4

Browse files
authored
[AArch64] SimplifyDemandedBitsForTargetNode - add AArch64ISD::BICi handling (llvm#76644)
Fold BICi if all destination bits are already known to be zeroes ```llvm define <8 x i16> @haddu_known(<8 x i8> %a0, <8 x i8> %a1) { %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) %res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511> ret <8 x i16> %res } declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) ``` ``` haddu_known: // @haddu_known ushll v0.8h, v0.8b, #0 ushll v1.8h, v1.8b, #0 uhadd v0.8h, v0.8h, v1.8h bic v0.8h, #254, lsl #8 <-- this one will be removed as we know high bits are zero extended ret ``` Fixes llvm#53881 Fixes llvm#53622
1 parent 4cb110a commit d38bff4

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -24555,6 +24555,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2455524555
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
2455624556
return R;
2455724557
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
24558+
case AArch64ISD::BICi: {
24559+
APInt DemandedBits =
24560+
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
24561+
APInt DemandedElts =
24562+
APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
24563+
24564+
if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
24565+
SDValue(N, 0), DemandedBits, DemandedElts, DCI))
24566+
return SDValue();
24567+
24568+
break;
24569+
}
2455824570
case ISD::XOR:
2455924571
return performXorCombine(N, DAG, DCI, Subtarget);
2456024572
case ISD::MUL:
@@ -27595,6 +27607,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
2759527607
// used - simplify to just Val.
2759627608
return TLO.CombineTo(Op, ShiftR->getOperand(0));
2759727609
}
27610+
case AArch64ISD::BICi: {
27611+
// Fold BICi if all destination bits already known to be zeroed
27612+
SDValue Op0 = Op.getOperand(0);
27613+
KnownBits KnownOp0 =
27614+
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
27615+
// Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2))
27616+
uint64_t BitsToClear = Op->getConstantOperandVal(1)
27617+
<< Op->getConstantOperandVal(2);
27618+
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
27619+
if (APInt(Known.getBitWidth(), BitsToClear)
27620+
.isSubsetOf(AlreadyZeroedBitsToClear))
27621+
return TLO.CombineTo(Op, Op0);
27622+
27623+
Known = KnownOp0 &
27624+
KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));
27625+
27626+
return false;
27627+
}
2759827628
case ISD::INTRINSIC_WO_CHAIN: {
2759927629
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
2760027630
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();

llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll

-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
1212
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1313
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1414
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
15-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
1615
; CHECK-NEXT: ret
1716
%x0 = zext <8 x i8> %a0 to <8 x i16>
1817
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
2726
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
2827
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
2928
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
30-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
3129
; CHECK-NEXT: ret
3230
%x0 = zext <8 x i8> %a0 to <8 x i16>
3331
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
4240
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
4341
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
4442
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
45-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
4643
; CHECK-NEXT: ret
4744
%x0 = zext <8 x i8> %a0 to <8 x i16>
4845
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
5754
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
5855
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
5956
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
60-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
6157
; CHECK-NEXT: ret
6258
%x0 = zext <8 x i8> %a0 to <8 x i16>
6359
%x1 = zext <8 x i8> %a1 to <8 x i16>

0 commit comments

Comments
 (0)