Skip to content

Commit 94aeaa1

Browse files
committed
aarch64: Support FEAT_LSFE
1 parent 813bf8f commit 94aeaa1

File tree

12 files changed

+400
-16
lines changed

12 files changed

+400
-16
lines changed

.github/.cspell/project-dictionary.txt

+4
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ lcgr
6969
ldar
7070
ldaxp
7171
ldclrp
72+
ldfadd
73+
ldfmaxnm
74+
ldfminnm
7275
ldiapp
7376
ldrexd
7477
ldsetp
@@ -182,6 +185,7 @@ versatilepb
182185
virt
183186
vmlinux
184187
vmovdqa
188+
vreg
185189
vtable
186190
vtables
187191
wfxt

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ See the [`atomic128` module's readme](https://github.com/taiki-e/portable-atomic
7575
- When unstable `--cfg portable_atomic_unstable_f128` is also enabled, `AtomicF128` for [unstable `f128`](https://github.com/rust-lang/rust/issues/116909) is also provided.
7676

7777
Note:
78-
- Most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. (AArch64 with FEAT_LSFE and GPU targets have atomic instructions for float, [so we plan to use these instructions for them in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
78+
- Atomic float's `fetch_{add,sub,min,max}` are usually implemented using CAS loops, which can be slower than equivalent operations of atomic integers. As an exception, AArch64 with FEAT_LSFE and GPU targets have atomic float instructions and we use them on AArch64 when `lsfe` target feature is available at compile-time. We [plan to use atomic float instructions for GPU targets as well in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
7979
- Unstable cfgs are outside of the normal semver guarantees and minor or patch versions of portable-atomic may make breaking changes to them at any time.
8080

8181
- **`std`**<br>

build.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fn main() {
4848

4949
if version.minor >= 80 {
5050
println!(
51-
r#"cargo:rustc-check-cfg=cfg(target_feature,values("fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
51+
r#"cargo:rustc-check-cfg=cfg(target_feature,values("lsfe","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
5252
);
5353

5454
// Custom cfgs set by build script. Not public API.
@@ -59,7 +59,7 @@ fn main() {
5959
// TODO: handle multi-line target_feature_fallback
6060
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
6161
println!(
62-
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha","zacas"))"#
62+
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","lsfe","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha","zacas"))"#
6363
);
6464
}
6565

@@ -286,6 +286,9 @@ fn main() {
286286
target_feature_fallback("lse", lse);
287287
}
288288
}
289+
// As of rustc 1.85, target_feature "lsfe" is not available on rustc side:
290+
// https://github.com/rust-lang/rust/blob/1.85.0/compiler/rustc_target/src/target_features.rs
291+
target_feature_fallback("lsfe", false);
289292

290293
// As of Apple M1/M1 Pro, on Apple hardware, CAS-loop-based RMW is much slower than
291294
// LL/SC-loop-based RMW: https://github.com/taiki-e/portable-atomic/pull/89

src/imp/atomic128/aarch64.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -434,10 +434,10 @@ macro_rules! atomic_rmw_inst {
434434
};
435435
($op:ident, $order:ident, write = $write:ident) => {
436436
match $order {
437-
Ordering::Relaxed => $op!("2", ""),
438-
Ordering::Acquire => $op!("a", ""),
439-
Ordering::Release => $op!("6", ""),
440-
Ordering::AcqRel => $op!("e", ""),
437+
Ordering::Relaxed => $op!("2", ""), // ""
438+
Ordering::Acquire => $op!("a", ""), // "a"
439+
Ordering::Release => $op!("6", ""), // "l"
440+
Ordering::AcqRel => $op!("e", ""), // "al"
441441
// In MSVC environments, SeqCst stores/writes needs fences after writes.
442442
// https://reviews.llvm.org/D141748
443443
#[cfg(target_env = "msvc")]

src/imp/detect/aarch64_aa64reg.rs

+60-7
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,20 @@ include!("common.rs");
4444
struct AA64Reg {
4545
aa64isar0: u64,
4646
aa64isar1: u64,
47+
#[cfg(test)]
48+
aa64isar3: u64,
4749
aa64mmfr2: u64,
4850
}
4951

5052
#[cold]
5153
fn _detect(info: &mut CpuInfo) {
52-
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
54+
let AA64Reg {
55+
aa64isar0,
56+
aa64isar1,
57+
#[cfg(test)]
58+
aa64isar3,
59+
aa64mmfr2,
60+
} = imp::aa64reg();
5361

5462
// ID_AA64ISAR0_EL1, AArch64 Instruction Set Attribute Register 0
5563
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0
@@ -75,6 +83,14 @@ fn _detect(info: &mut CpuInfo) {
7583
if extract(aa64isar1, 23, 20) >= 0b0011 {
7684
info.set(CpuInfoFlag::rcpc3);
7785
}
86+
// ID_AA64ISAR3_EL1, AArch64 Instruction Set Attribute Register 3
87+
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR3-EL1--AArch64-Instruction-Set-Attribute-Register-3
88+
// LSFE, bits [19:16]
89+
// > FEAT_LSFE implements the functionality identified by the value 0b0001
90+
#[cfg(test)]
91+
if extract(aa64isar3, 19, 16) >= 0b0001 {
92+
info.set(CpuInfoFlag::lsfe);
93+
}
7894
// ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
7995
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2
8096
// AT, bits [35:32]
@@ -115,13 +131,27 @@ mod imp {
115131
out(reg) aa64isar1,
116132
options(pure, nomem, nostack, preserves_flags),
117133
);
134+
#[cfg(test)]
135+
let aa64isar3: u64;
136+
#[cfg(test)]
137+
asm!(
138+
"mrs {0}, ID_AA64ISAR3_EL1",
139+
out(reg) aa64isar3,
140+
options(pure, nomem, nostack, preserves_flags),
141+
);
118142
let aa64mmfr2: u64;
119143
asm!(
120144
"mrs {0}, ID_AA64MMFR2_EL1",
121145
out(reg) aa64mmfr2,
122146
options(pure, nomem, nostack, preserves_flags),
123147
);
124-
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
148+
AA64Reg {
149+
aa64isar0,
150+
aa64isar1,
151+
#[cfg(test)]
152+
aa64isar3,
153+
aa64mmfr2,
154+
}
125155
}
126156
}
127157
}
@@ -213,6 +243,8 @@ mod imp {
213243
Some(AA64Reg {
214244
aa64isar0: buf.ac_aa64isar0,
215245
aa64isar1: buf.ac_aa64isar1,
246+
#[cfg(test)]
247+
aa64isar3: 0,
216248
aa64mmfr2: buf.ac_aa64mmfr2,
217249
})
218250
}
@@ -226,7 +258,13 @@ mod imp {
226258
// https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
227259
match sysctl_cpu_id(c!("machdep.cpu0.cpu_id")) {
228260
Some(cpu_id) => cpu_id,
229-
None => AA64Reg { aa64isar0: 0, aa64isar1: 0, aa64mmfr2: 0 },
261+
None => AA64Reg {
262+
aa64isar0: 0,
263+
aa64isar1: 0,
264+
#[cfg(test)]
265+
aa64isar3: 0,
266+
aa64mmfr2: 0,
267+
},
230268
}
231269
}
232270
}
@@ -285,7 +323,13 @@ mod imp {
285323
let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
286324
let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
287325
let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
288-
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
326+
AA64Reg {
327+
aa64isar0,
328+
aa64isar1,
329+
#[cfg(test)]
330+
aa64isar3: 0,
331+
aa64mmfr2,
332+
}
289333
}
290334

291335
fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
@@ -330,11 +374,12 @@ mod tests {
330374
#[test]
331375
#[cfg_attr(portable_atomic_test_detect_false, ignore)]
332376
fn test_aa64reg() {
333-
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
377+
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
334378
test_helper::eprintln_nocapture!(
335-
"aa64isar0={},aa64isar1={},aa64mmfr2={}",
379+
"aa64isar0={},aa64isar1={},aa64isar3={},aa64mmfr2={}",
336380
aa64isar0,
337381
aa64isar1,
382+
aa64isar3,
338383
aa64mmfr2,
339384
);
340385
let atomic = extract(aa64isar0, 23, 20);
@@ -353,6 +398,12 @@ mod tests {
353398
} else {
354399
assert!(lrcpc < 0b0011, "{}", lrcpc);
355400
}
401+
let lsfe = extract(aa64isar3, 19, 16);
402+
if detect().lsfe() {
403+
assert_eq!(lsfe, 0b0001);
404+
} else {
405+
assert_eq!(lsfe, 0b0000);
406+
}
356407
let at = extract(aa64mmfr2, 35, 32);
357408
if detect().lse2() {
358409
assert_eq!(at, 0b0001);
@@ -484,6 +535,7 @@ mod tests {
484535
Ok(AA64Reg {
485536
aa64isar0: buf.ac_aa64isar0,
486537
aa64isar1: buf.ac_aa64isar1,
538+
aa64isar3: 0,
487539
aa64mmfr2: buf.ac_aa64mmfr2,
488540
})
489541
}
@@ -520,10 +572,11 @@ mod tests {
520572
}
521573
}
522574

523-
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
575+
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
524576
let sysctl_output = SysctlMachdepOutput::new();
525577
assert_eq!(aa64isar0, sysctl_output.field("machdep.id_aa64isar0").unwrap_or(0));
526578
assert_eq!(aa64isar1, sysctl_output.field("machdep.id_aa64isar1").unwrap_or(0));
579+
assert_eq!(aa64isar3, sysctl_output.field("machdep.id_aa64isar3").unwrap_or(0));
527580
assert_eq!(aa64mmfr2, sysctl_output.field("machdep.id_aa64mmfr2").unwrap_or(0));
528581
}
529582
}

src/imp/detect/aarch64_apple.rs

+3
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ fn _detect(info: &mut CpuInfo) {
9898
check!(lse, "hw.optional.arm.FEAT_LSE" || "hw.optional.armv8_1_atomics");
9999
check!(lse2, "hw.optional.arm.FEAT_LSE2");
100100
check!(lse128, "hw.optional.arm.FEAT_LSE128");
101+
#[cfg(test)]
102+
check!(lsfe, "hw.optional.arm.FEAT_LSFE");
101103
check!(rcpc3, "hw.optional.arm.FEAT_LRCPC3");
102104
}
103105

@@ -257,6 +259,7 @@ mod tests {
257259
(c!("hw.optional.armv8_1_atomics"), Some(1)),
258260
(c!("hw.optional.arm.FEAT_LSE2"), Some(1)),
259261
(c!("hw.optional.arm.FEAT_LSE128"), None),
262+
(c!("hw.optional.arm.FEAT_LSFE"), None),
260263
(c!("hw.optional.arm.FEAT_LRCPC"), Some(1)),
261264
(c!("hw.optional.arm.FEAT_LRCPC2"), Some(1)),
262265
(c!("hw.optional.arm.FEAT_LRCPC3"), None),

src/imp/detect/common.rs

+7
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,13 @@ flags! {
119119
// > If FEAT_LSE128 is implemented, then FEAT_LSE is implemented.
120120
#[cfg_attr(not(test), allow(dead_code))]
121121
lse128("lse128", any(target_feature /* nightly */, portable_atomic_target_feature)),
122+
// FEAT_LSFE, Large System Float Extension
123+
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension
124+
// > This feature is supported in AArch64 state only.
125+
// > FEAT_LSFE is OPTIONAL from Armv9.3.
126+
// > If FEAT_LSFE is implemented, then FEAT_FP is implemented.
127+
#[cfg(test)]
128+
lsfe("lsfe", any(target_feature /* N/A */, portable_atomic_target_feature)),
122129
}
123130

124131
// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/PowerPC/PPC.td

0 commit comments

Comments
 (0)