https://blog.csdn.net/liujiayu2/article/details/39964813
https://blog.csdn.net/u010839382/article/details/52743664
http://www.javashuo.com/article/p-xlnniefl-r.html
https://blog.csdn.net/yutianzuijin/article/details/79944292html
#include <stdio.h>
#include <conio.h>
#include <time.h>
#include <windows.h>
#include <xmmintrin.h>
float dot(float* A, float* B,int n)
{
int i = 0;
float sum = 0;
for (i; i < n; i++)
{
sum += A[i] * B[i];
}
return sum;
}
float dot_sse(const float* A, const float* B, int n)
{
__m128 aa, bb, sum;
float sum_s = 0.0;
aa = _mm_loadu_ps(A);
bb = _mm_loadu_ps(B);
sum = _mm_mul_ps(aa, bb);
sum_s = sum.m128_f32[0] + sum.m128_f32[1] + sum.m128_f32[2] + sum.m128_f32[3];
return sum_s;
}
int main()
{
int i;
int count = 50000;
float sum = 0.0, sum_sse = 0.0;
double time_c,time_sse;
LARGE_INTEGER start;
LARGE_INTEGER end;
LARGE_INTEGER freq;
float A[4] = { 1.0, 2.0, 3.0, 4.0};
float B[4] = { 6.0, 7.0, 8.0, 9.0};
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&start);
for (i = 0; i < count;i++)
{
sum = dot(A, B, 4);
}
QueryPerformanceCounter(&end);
time_c = (double)(end.QuadPart - start.QuadPart) / (double)(freq.QuadPart);
QueryPerformanceCounter(&start);
for (i = 0; i < count; i++)
{
sum_sse = dot_sse(A, B, 4);
}
QueryPerformanceCounter(&end);
time_sse = (double)(end.QuadPart - start.QuadPart) / (double)(freq.QuadPart);
printf("sum_C: %f, sum_sse: %f\n", sum, sum_sse);
printf("time_c: %f, time_sse: %f\n", time_c, time_sse);
printf("ratio: %f", time_c / time_sse);
_getch();
return 0;
}