Detect CPU ISA features with single-file
CPU | ✅ x86, x86-64 ✅ arm, aarch64 ✅ mips ✅ powerpc ✅ s390x ✅ loongarch ✅ risc-v ✅ openrisc |
#define RUAPU_IMPLEMENTATION
#include "ruapu.h"
int main()
{
// initialize ruapu once
ruapu_init();
// now, tell me if this cpu has avx2
int has_avx2 = ruapu_supports("avx2");
// loop all supported features
const char* const* supported = ruapu_rua();
while (*supported)
{
fprintf(stderr, "%s\n", *supported);
supported++;
}
return 0;
} |
OS | ✅ Windows ✅ Linux ✅ macOS ✅ Android ✅ iOS ✅ FreeBSD ✅ NetBSD ✅ OpenBSD ✅ DragonflyBSD ✅ Solaris ✅ SyterKit | |
Compiler | ✅ GCC ✅ Clang ✅ MSVC ✅ MinGW |
- Create one
ruapu.c
for your project ruapu.c
is ONLY#define RUAPU_IMPLEMENTATION
and#include "ruapu.h"
- Other sources
#include "ruapu.h"
but NO#define RUAPU_IMPLEMENTATION
- Detect CPU ISA with single-file
sse2
,avx
,avx512f
,neon
, etc. - Detect vendor extended ISA
apple
amx
, risc-v vendor ISA, etc. - Detect richer ISA on Windows ARM
IsProcessorFeaturePresent()
returns little ISA information - Detect
x86-avx512
on macOS correctly macOS hides it incpuid
- Detect new CPU's ISA on old systems
they are usually not exposed in
auxv
orMISA
- Detect CPU hidden ISA
fma4
on zen1, ISA in hypervisor, etc.
CPU | ISA |
---|---|
x86 | mmx sse sse2 sse3 ssse3 sse41 sse42 sse4a xop avx f16c fma fma4 avx2 avx512f avx512bw avx512cd avx512dq avx512vl avx512vnni avx512bf16 avx512ifma avx512vbmi avx512vbmi2 avx512fp16 avx512er avx5124fmaps avx5124vnniw avxvnni avxvnniint8 avxvnniint16 avxifma amxfp16 amxbf16 amxint8 amxtile |
arm | half edsp neon vfpv4 idiv |
aarch64 | neon vfpv4 lse cpuid asimdrdm asimdhp asimddp asimdfhm bf16 i8mm frint jscvt fcma mte mte2 sve sve2 svebf16 svei8mm svef32mm svef64mm sme smef16f16 smef64f64 smei64i64 pmull crc32 aes sha1 sha2 sha3 sha512 sm3 sm4 svepmull svebitperm sveaes svesha3 svesm4 amx |
mips | msa mmi sx asx msa2 crypto |
powerpc | vsx |
s390x | zvector |
loongarch | lsx lasx |
risc-v | i m a f d c v zba zbb zbc zbs zbkb zbkc zbkx zfa zfbfmin zfh zfhmin zicond zicsr zifencei zmmul zvbb zvbc zvfh zvfhmin zvfbfmin zvfbfwma zvkb zvl32b zvl64b zvl128b zvl256b zvl512b zvl1024b xtheadba xtheadbb xtheadbs xtheadcondmov xtheadfmemidx xtheadfmv xtheadmac xtheadmemidx xtheadmempair xtheadsync xtheadvdot |
openrisc | orbis32 orbis64 orfpx32 orfpx64 orvdx64 |
Compile ruapu test program # GCC / MinGW
gcc main.c -o ruapu # Clang
clang main.c -o ruapu # MSVC
cl.exe /Fe: ruapu.exe main.c |
Run ruapu in command line ./ruapu
mmx = 1
sse = 1
sse2 = 1
sse3 = 1
ssse3 = 1
sse41 = 1
sse42 = 1
sse4a = 1
xop = 0
... more lines omitted ... |
Compile and install ruapu library # from pypi
pip3 install ruapu # from source code
pip3 install ./python |
Use ruapu in python import ruapu
ruapu.supports("avx2")
# True
ruapu.supports(isa="avx2")
# True
ruapu.rua()
#(mmx', 'sse', 'sse2', 'sse3', 'ssse3', 'sse41', 'sse42', 'avx', 'f16c', 'fma', 'avx2') |
Compile ruapu library # from source code
cd rust
cargo build --release |
Use ruapu in Rust extern crate ruapu;
fn main() {
println!("supports neon: {}", ruapu::supports("neon").unwrap());
println!("supports avx2: {}", ruapu::supports("avx2").unwrap());
println!("rua: {:?}", ruapu::rua());
} |
Compile ruapu library # from source code
cd lua
# lua binding has been tested on Lua 5.2~5.4
luarocks make |
Use ruapu in Lua ruapu = require "ruapu";
print(ruapu.supports("mmx"));
for _, ext in ipairs(ruapu.rua()) do
print(ext);
end |
Compile ruapu library % add this to deps list
% in your rebar.config
{ruapu, "0.1.0"} |
Use ruapu in Erlang ruapu:rua().
{ok,["neon","vfpv4","asimdrdm","asimdhp","asimddp",
"asimdfhm","bf16","i8mm","pmull","crc32","aes","sha1",
"sha2","sha3","sha512","amx"]}
> ruapu:supports("neon").
true
> ruapu:supports(neon).
true
> ruapu:supports(<<"neon">>).
true
> ruapu:supports("avx2").
false
> ruapu:supports(avx2).
false
> ruapu:supports(<<"avx2">>).
false |
Compile ruapu library # from source code
cd fortran
cmake -B build
cmake --build build |
Use ruapu in Fortran program main
use ruapu, only: ruapu_init, ruapu_supports, ruapu_rua
implicit none
character(len=:), allocatable :: isa_supported(:)
integer :: i
call ruapu_init()
print *, "supports sse: ", ruapu_supports("sse")
print *, "supports neon: ", ruapu_supports("neon")
isa_supported = ruapu_rua()
do i = 1, size(isa_supported)
print *, trim(isa_supported(i))
end do
end program main
|
Compile ruapu library cd go
go build -o ruapu-go |
Use ruapu in Golang package main
import (
"fmt"
"ruapu-go/ruapu"
"strconv"
)
func main() {
ruapu.Init()
avx2Status := ruapu.Supports("avx2")
fmt.Println("avx2:" + strconv.Itoa(avx2Status))
rua := ruapu.Rua()
fmt.Println(rua)
} |
Add ruapu library to your project
|
Use ruapu in Haskell import Ruapu
-- Ruapu.rua :: IO [String]
-- Ruapu.supports :: String -> IO Bool
main = do
Ruapu.init
Ruapu.supports "mmx" >>= putStrLn . show
Ruapu.rua >>= sequence_ . map putStrLn |
Compile ruapu library cd vlang
v . |
Use ruapu in Vlang module main
import ruapu
fn main() {
ruapu.ruapu_init()
mut avx2_status := ruapu.ruapu_supports('avx2')
if avx2_status {
println('avx2: ' + avx2_status.str())
}
println(ruapu.ruapu_rua())
} |
Compile ruapu library cd pascal
sudo apt install fpc
cmake .
make
fpc ruapu.lpr |
Use ruapu in Pascal program ruapu;
uses ruapu_pascal;
var
has_avx2: integer;
supported: PPAnsiChar;
begin
// initialize ruapu once
ruapu_init();
// now, tell me if this cpu has avx2
has_avx2 := ruapu_supports('avx2');
// loop all supported features
supported := ruapu_rua();
while supported^ <> nil do
begin
writeln(supported^);
inc(supported);
end;
readln();
end.
|
Compile ruapu library and example ./gradlew build Run example java -cp \
./build/libs/ruapu-1.0-SNAPSHOT.jar \
./Example.java |
Use ruapu in Java import ruapu.Ruapu;
import java.util.*;
class Example {
public static void main(String args[]) {
Ruapu ruapu = new Ruapu();
System.out.println("avx: " + ruapu.supports("avx"));
// avx: 1
System.out.println(Arrays.toString(ruapu.rua()));
// [mmx, sse, sse2, sse3, ssse3, sse41, sse42, avx, f16c, fma, avx2]
}
}
|
Github-hosted runner result (Linux)
mmx = 1
sse = 1
sse2 = 1
sse3 = 1
ssse3 = 1
sse41 = 1
sse42 = 1
sse4a = 1
xop = 0
avx = 1
f16c = 1
fma = 1
avx2 = 1
avx512f = 0
avx512bw = 0
avx512cd = 0
avx512dq = 0
avx512vl = 0
avx512vnni = 0
avx512bf16 = 0
avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
amxfp16 = 0
amxbf16 = 0
amxint8 = 0
amxtile = 0
Github-hosted runner result (macOS)
mmx = 1
sse = 1
sse2 = 1
sse3 = 1
ssse3 = 1
sse41 = 1
sse42 = 1
sse4a = 0
xop = 0
avx = 1
f16c = 1
fma = 1
avx2 = 1
avx512f = 0
avx512bw = 0
avx512cd = 0
avx512dq = 0
avx512vl = 0
avx512vnni = 0
avx512bf16 = 0
avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
amxfp16 = 0
amxbf16 = 0
amxint8 = 0
amxtile = 0
Github-hosted runner result (macOS M1)
neon = 1
vfpv4 = 1
cpuid = 0
asimdhp = 1
asimddp = 1
asimdfhm = 1
bf16 = 0
i8mm = 0
sve = 0
sve2 = 0
svebf16 = 0
svei8mm = 0
svef32mm = 0
Github-hosted runner result (Windows)
mmx = 1
sse = 1
sse2 = 1
sse3 = 1
ssse3 = 1
sse41 = 1
sse42 = 1
sse4a = 1
xop = 0
avx = 1
f16c = 1
fma = 1
avx2 = 1
avx512f = 0
avx512bw = 0
avx512cd = 0
avx512dq = 0
avx512vl = 0
avx512vnni = 0
avx512bf16 = 0
avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
amxfp16 = 0
amxbf16 = 0
amxint8 = 0
amxtile = 0
FreeBSD/NetBSD/OpenBSD VM result (x86_64)
mmx = 1
sse = 1
sse2 = 1
sse3 = 1
ssse3 = 1
sse41 = 1
sse42 = 1
sse4a = 1
xop = 0
avx = 1
f16c = 1
fma = 1
fma4 = 0
avx2 = 1
avx512f = 0
avx512bw = 0
avx512cd = 0
avx512dq = 0
avx512vl = 0
avx512vnni = 0
avx512bf16 = 0
avx512ifma = 0
avx512vbmi = 0
avx512vbmi2 = 0
avx512fp16 = 0
avx512er = 0
avx5124fmaps = 0
avx5124vnniw = 0
avxvnni = 0
avxvnniint8 = 0
avxifma = 0
amxfp16 = 0
amxbf16 = 0
amxint8 = 0
amxtile = 0
ruapu is implemented in C language to ensure the widest possible portability.
ruapu determines whether the CPU supports certain instruction sets by trying to execute instructions and detecting whether an Illegal Instruction
exception occurs. ruapu does not rely on the cpuid instructions and registers related to the CPU architecture, nor does it rely on the MISA
information and system calls of the operating system. This can help us get more detailed CPU ISA information.
ruapu is the abbreviation of rua-cpu, which means using various extended instructions to harass and amuse the CPU (rua!). Based on whether the CPU reacts violently (throws an illegal instruction exception), it is inferred whether the CPU supports a certain extended instruction set.
We consider gcc builtin functions to be good practice, saying __builtin_cpu_init()
and __builtin_cpu_supports()
. ruapu refers to this design, which can be a 1:1 replacement for gcc functions, and supports more operating systems and compilers, giving it better portability.
Why does SIGILL occur when executing in debugger or simulator, such as gdb
, lldb
, qemu-user
, sde
etc.
Because debuggers and simulators capture the signal and stop the ruapu signal handler function by default, we can continue execution at this time, or configure it specifically, such as handle SIGILL nostop
in gdb. ruapu technically cannot prevent programs from stopping in debuggers and emulators
Assume that the new extended instruction set is named rua
- Add
RUAPU_INSTCODE(rua, rua-inst-hex) // rua r0,r0
andRUAPU_ISAENTRY(rua)
inruapu.h
- Add
PRINT_ISA_SUPPORT(rua)
inmain.c
to print the detection result - Add entries about
rua
in README.md - Create a pull request!
https://godbolt.org/ is a good helper to view the compiled binary code of instructions.
- ncnn High-performance neural network inference framework
- libllm Efficient inference of large language models
- @nihui Write the initial POC code and ruapu maintainer
- @kernelbin Implement exception handling for Windows
- @zchrissirhcz Detect x86 FMA4
- @MollySophia Fix C++ export symbol
- @strongtz Detect more aarch64 ISA
- @monkeyking Detect apple arm64 AMX
- @junchao-loongson Add loongarch support
- @ziyao233 Detect more risc-v ISA
- @dreamcmi Detect more risc-v ISA
- @cocoa-xu Add FreeBSD support, python support
- @YuzukiTsuru Add OpenRISC support
- @whyb Detect x86 AMX
MIT License