最近在學習AArch64的SVE技術時,發現目前能夠在網上找到的gcc版本都不支持SVE intrinsic方式調用,在看文檔時發現,GCC要到2020年的GCC10時纔會支持:html
在github上看到了gcc倉庫裏存在一個名爲aarch64/sve-acle-branch的分支:linux
而後將這個分支的代碼下載下來進行交叉編譯,而後去編譯用SVE intrinsic方式實現的SVE測試程序,發現果真能夠編過,也能夠運行。編譯方法參考交叉編譯用於生成aarch64指令的GCC (9.2),這裏有一份已經編譯好的:連接: https://pan.baidu.com/s/1s3FWLgwavaGqR427NcW-hA 提取碼: kpnggit
測試程序:github
#include <stdlib.h> #include <stdio.h> #include <arm_sve.h> // Scalar version. void add_arrays(double * restrict dst, double *src, double c, const int N) { for (int i = 0; i < N; i++) dst[i] = src[i] + c; } // Vector version void vla_add_arrays(double * restrict dst, double *src, double c, const int N) { int64_t i = 0; svbool_t pg = svwhilelt_b64(i, (int64_t)N); while (svptest_any(svptrue_b64(), pg)) { svfloat64_t vsrc = svld1(pg, src + i); svfloat64_t vdst = svadd_x(pg, vsrc, c); svst1(pg, dst + i, vdst); i += svcntd(); pg = svwhilelt_b64(i, (int64_t)N); } } // Vector version void vla_add_arrays_2(double *dst, double *src, double c, const int N) { for (int i = 0; i < N; i += svcntd()) { svbool_t Pg = svwhilelt_b64(i, N); svfloat64_t vsrc = svld1(Pg, &src[i]); svfloat64_t vdst = svadd_x(Pg, vsrc, c); svst1(Pg, &dst[i], vdst); } } int main(void) { double src[100]; double c; double dst_serial[100], dst_vla[100], dst_vla2[100]; for (int i = 0; i < 100; ++i) { src[i] = (double) i / ((double) i + 1); } c = src[rand() % 100]; add_arrays(dst_serial, src, c, 100); vla_add_arrays(dst_vla, src, c, 100); vla_add_arrays_2(dst_vla2, src, c, 100); for (int i = 0; i < 100; ++i) { printf("%f %f %f, %f, %f\n", dst_serial[i], dst_vla[i], dst_vla2[i], src[i], c); } return 0; }
編譯:post
CC=/home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc CFLAGS=-g -Wall -march=armv8.2-a+sve -O1 LDFLAGS=--static all:demo demo:demo.o %.o:%c clean: $(RM) *.o demo
測試運行:學習
$ make clean;make; rm -f *.o demo /home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc -g -Wall -march=armv8.2-a+sve -O1 -c -o demo.o demo.c /home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc --static demo.o -o demo pengdl@pengdl-dell:~/work/SVE/cross_compile/teset $ qemu-aarch64 ./demo 0.988095 0.988095 0.988095, 0.000000, 0.988095 1.488095 1.488095 1.488095, 0.500000, 0.988095 1.654762 1.654762 1.654762, 0.666667, 0.988095 1.738095 1.738095 1.738095, 0.750000, 0.988095 1.788095 1.788095 1.788095, 0.800000, 0.988095 1.821429 1.821429 1.821429, 0.833333, 0.988095 1.845238 1.845238 1.845238, 0.857143, 0.988095 1.863095 1.863095 1.863095, 0.875000, 0.988095 1.876984 1.876984 1.876984, 0.888889, 0.988095 1.888095 1.888095 1.888095, 0.900000, 0.988095 ... ...
完。測試