交叉編譯支持SVE ACLE的gcc

最近在學習AArch64的SVE技術時,發現目前能夠在網上找到的gcc版本都不支持SVE intrinsic方式調用,在看文檔時發現,GCC要到2020年的GCC10時纔會支持:html

 

 在github上看到了gcc倉庫裏存在一個名爲aarch64/sve-acle-branch的分支:linux

 

而後將這個分支的代碼下載下來進行交叉編譯,而後去編譯用SVE intrinsic方式實現的SVE測試程序,發現果真能夠編過,也能夠運行。編譯方法參考交叉編譯用於生成aarch64指令的GCC (9.2),這裏有一份已經編譯好的:連接: https://pan.baidu.com/s/1s3FWLgwavaGqR427NcW-hA 提取碼: kpnggit

 

測試程序:github

#include <stdlib.h>
#include <stdio.h>
#include <arm_sve.h>

// Scalar version.
void add_arrays(double * restrict dst, double *src, double c, const int N) {
    for (int i = 0; i < N; i++)
        dst[i] = src[i] + c;
}

// Vector version
void vla_add_arrays(double * restrict dst, double *src, double c, const int N) {
    int64_t i = 0;

    svbool_t pg = svwhilelt_b64(i, (int64_t)N);
    while (svptest_any(svptrue_b64(), pg)) {
        svfloat64_t vsrc = svld1(pg, src + i);
        svfloat64_t vdst = svadd_x(pg, vsrc, c);
        svst1(pg, dst + i, vdst);

        i += svcntd();
        pg = svwhilelt_b64(i, (int64_t)N);
    }
}

// Vector version
void vla_add_arrays_2(double *dst, double *src, double c, const int N) {
    for (int i = 0; i < N; i += svcntd()) {
        svbool_t Pg = svwhilelt_b64(i, N);
        svfloat64_t vsrc = svld1(Pg, &src[i]);
        svfloat64_t vdst = svadd_x(Pg, vsrc, c);
        svst1(Pg, &dst[i], vdst);
    }
}

int main(void) {
    double src[100];
    double c;
    double dst_serial[100], dst_vla[100], dst_vla2[100];
    for (int i = 0; i < 100; ++i) {
        src[i] = (double) i / ((double) i + 1);
    }

    c = src[rand() % 100];

    add_arrays(dst_serial, src, c, 100);
    vla_add_arrays(dst_vla, src, c, 100);
    vla_add_arrays_2(dst_vla2, src, c, 100);

    for (int i = 0; i < 100; ++i) {
        printf("%f %f %f, %f, %f\n", dst_serial[i], dst_vla[i], dst_vla2[i], src[i], c);
    }
    return 0; 
}

 

編譯:post

 

CC=/home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc
CFLAGS=-g -Wall -march=armv8.2-a+sve -O1
LDFLAGS=--static

all:demo

demo:demo.o

%.o:%c

clean:
    $(RM) *.o demo

 

 

測試運行:學習

$ make clean;make;
rm -f *.o demo
/home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc -g -Wall -march=armv8.2-a+sve -O1   -c -o demo.o demo.c
/home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc --static  demo.o   -o demo
pengdl@pengdl-dell:~/work/SVE/cross_compile/teset
$ qemu-aarch64 ./demo
0.988095 0.988095 0.988095, 0.000000, 0.988095
1.488095 1.488095 1.488095, 0.500000, 0.988095
1.654762 1.654762 1.654762, 0.666667, 0.988095
1.738095 1.738095 1.738095, 0.750000, 0.988095
1.788095 1.788095 1.788095, 0.800000, 0.988095
1.821429 1.821429 1.821429, 0.833333, 0.988095
1.845238 1.845238 1.845238, 0.857143, 0.988095
1.863095 1.863095 1.863095, 0.875000, 0.988095
1.876984 1.876984 1.876984, 0.888889, 0.988095
1.888095 1.888095 1.888095, 0.900000, 0.988095
... ...

 

完。測試

相關文章
相關標籤/搜索