SIMD homework
This commit is contained in:
parent
5c97a99839
commit
46ee06406f
@ -40,6 +40,13 @@ extern "C" void read_1x2_high(char *buffer, u64 size);
|
|||||||
extern "C" void read_2x2(char *buffer, u64 size);
|
extern "C" void read_2x2(char *buffer, u64 size);
|
||||||
extern "C" void read_4x2(char *buffer, u64 size);
|
extern "C" void read_4x2(char *buffer, u64 size);
|
||||||
extern "C" void read_8x2(char *buffer, u64 size);
|
extern "C" void read_8x2(char *buffer, u64 size);
|
||||||
|
extern "C" void read_4x2_simd(char *buffer, u64 size);
|
||||||
|
extern "C" void read_8x2_simd(char *buffer, u64 size);
|
||||||
|
extern "C" void read_16x2_simd(char *buffer, u64 size);
|
||||||
|
extern "C" void read_32x2_simd_offset(char *buffer, u64 size);
|
||||||
|
extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
|
||||||
|
extern "C" void read_16x4_simd(char *buffer, u64 size);
|
||||||
|
extern "C" void read_32x4_simd(char *buffer, u64 size);
|
||||||
|
|
||||||
void test_fread(reptester *tester, alloc_type type);
|
void test_fread(reptester *tester, alloc_type type);
|
||||||
void test_read(reptester *tester, alloc_type type);
|
void test_read(reptester *tester, alloc_type type);
|
||||||
@ -75,6 +82,13 @@ void test_read_1x2_high(reptester *tester, alloc_type type);
|
|||||||
void test_read_2x2(reptester *tester, alloc_type type);
|
void test_read_2x2(reptester *tester, alloc_type type);
|
||||||
void test_read_4x2(reptester *tester, alloc_type type);
|
void test_read_4x2(reptester *tester, alloc_type type);
|
||||||
void test_read_8x2(reptester *tester, alloc_type type);
|
void test_read_8x2(reptester *tester, alloc_type type);
|
||||||
|
void test_read_4x2_simd(reptester *tester, alloc_type type);
|
||||||
|
void test_read_8x2_simd(reptester *tester, alloc_type type);
|
||||||
|
void test_read_16x2_simd(reptester *tester, alloc_type type);
|
||||||
|
void test_read_32x2_simd_offset(reptester *tester, alloc_type type);
|
||||||
|
void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
|
||||||
|
void test_read_16x4_simd(reptester *tester, alloc_type type);
|
||||||
|
void test_read_32x4_simd(reptester *tester, alloc_type type);
|
||||||
u64 get_file_length(FILE *fp);
|
u64 get_file_length(FILE *fp);
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
@ -163,11 +177,20 @@ int main(int argc, char *argv[]) {
|
|||||||
// {{"WRITE 3", "WRITE 3 WITH MALLOC"}, test_write_3},
|
// {{"WRITE 3", "WRITE 3 WITH MALLOC"}, test_write_3},
|
||||||
// {{"WRITE 4", "WRITE 4 WITH MALLOC"}, test_write_4},
|
// {{"WRITE 4", "WRITE 4 WITH MALLOC"}, test_write_4},
|
||||||
// {{"WRITE 8", "WRITE 8 WITH MALLOC"}, test_write_8},
|
// {{"WRITE 8", "WRITE 8 WITH MALLOC"}, test_write_8},
|
||||||
{{"READ 1x2 LOW", "READ 1x2 LOW WITH MALLOC"}, test_read_1x2_low},
|
// {{"READ 1x2 LOW", "READ 1x2 LOW WITH MALLOC"}, test_read_1x2_low},
|
||||||
{{"READ 1x2 HIGH", "READ 1x2 HIGH WITH MALLOC"}, test_read_1x2_high},
|
// {{"READ 1x2 HIGH", "READ 1x2 HIGH WITH MALLOC"}, test_read_1x2_high},
|
||||||
{{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2},
|
// {{"READ 2x2", "READ 2x2 WITH MALLOC"}, test_read_2x2},
|
||||||
{{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2},
|
// {{"READ 4x2", "READ 4x2 WITH MALLOC"}, test_read_4x2},
|
||||||
{{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2},
|
// {{"READ 8x2", "READ 8x2 WITH MALLOC"}, test_read_8x2},
|
||||||
|
{{"READ 4x2_simd", "READ 4x2_simd WITH MALLOC"}, test_read_4x2_simd},
|
||||||
|
{{"READ 8x2_simd", "READ 8x2_simd WITH MALLOC"}, test_read_8x2_simd},
|
||||||
|
{{"READ 16x2_simd", "READ 16x2_simd WITH MALLOC"}, test_read_16x2_simd},
|
||||||
|
{{"READ 32x2_simd_offset", "READ 32x2_simd_offset WITH MALLOC"},
|
||||||
|
test_read_32x2_simd_offset},
|
||||||
|
{{"READ 32x2_simd_no_offset", "READ 32x2_simd_no_offset WITH MALLOC"},
|
||||||
|
test_read_32x2_simd_no_offset},
|
||||||
|
{{"READ 16x4_simd", "READ 16x4_simd WITH MALLOC"}, test_read_16x4_simd},
|
||||||
|
{{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"}, test_read_32x4_simd},
|
||||||
};
|
};
|
||||||
|
|
||||||
tester.params.read_size = get_file_length(fp);
|
tester.params.read_size = get_file_length(fp);
|
||||||
@ -1063,6 +1086,184 @@ void test_read_8x2(reptester *tester, alloc_type type) {
|
|||||||
handle_free(tester, type);
|
handle_free(tester, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_read_4x2_simd(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_4x2_simd(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_8x2_simd(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_8x2_simd(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_16x2_simd(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_16x2_simd(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_32x2_simd_offset(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_32x2_simd_offset(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_32x2_simd_no_offset(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_16x4_simd(reptester *tester, alloc_type type) {
|
||||||
|
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_16x4_simd(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_read_32x4_simd(reptester *tester, alloc_type type) {
|
||||||
|
u64 start = read_cpu_timer();
|
||||||
|
u64 fault_count_start = page_fault_count();
|
||||||
|
|
||||||
|
handle_alloc(tester, type);
|
||||||
|
|
||||||
|
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||||
|
|
||||||
|
read_32x4_simd(tester->params.buffer, total_size);
|
||||||
|
|
||||||
|
u64 fault_count_end = page_fault_count();
|
||||||
|
u64 end = read_cpu_timer();
|
||||||
|
|
||||||
|
u64 read_time = end - start;
|
||||||
|
u64 page_faults = fault_count_end - fault_count_start;
|
||||||
|
|
||||||
|
tester->results = {
|
||||||
|
total_size,
|
||||||
|
read_time,
|
||||||
|
page_faults,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_free(tester, type);
|
||||||
|
}
|
||||||
|
|
||||||
u64 get_file_length(FILE *fp) {
|
u64 get_file_length(FILE *fp) {
|
||||||
if (!fp) {
|
if (!fp) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -29,6 +29,13 @@ global read_1x2_high
|
|||||||
global read_2x2
|
global read_2x2
|
||||||
global read_4x2
|
global read_4x2
|
||||||
global read_8x2
|
global read_8x2
|
||||||
|
global read_4x2_simd
|
||||||
|
global read_8x2_simd
|
||||||
|
global read_16x2_simd
|
||||||
|
global read_32x2_simd_offset
|
||||||
|
global read_32x2_simd_no_offset
|
||||||
|
global read_16x4_simd
|
||||||
|
global read_32x4_simd
|
||||||
|
|
||||||
mov_all_bytes_asm:
|
mov_all_bytes_asm:
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
@ -390,3 +397,98 @@ read_8x2:
|
|||||||
jnle .loop
|
jnle .loop
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
read_4x2_simd:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
mov r8d, [rdi]
|
||||||
|
mov r8d, [rdi + 4]
|
||||||
|
add rax, 8
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_8x2_simd:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
mov r8, [rdi]
|
||||||
|
mov r8, [rdi + 8]
|
||||||
|
add rax, 16
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_16x2_simd:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
vmovdqu xmm0, [rdi]
|
||||||
|
vmovdqu xmm0, [rdi + 16]
|
||||||
|
add rax, 32
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_32x2_simd_offset:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
vmovdqu ymm0, [rdi]
|
||||||
|
vmovdqu ymm0, [rdi + 32]
|
||||||
|
add rax, 64
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_32x2_simd_no_offset:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
vmovdqu ymm0, [rdi]
|
||||||
|
vmovdqu ymm0, [rdi]
|
||||||
|
add rax, 64
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_16x4_simd:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
%rep 2
|
||||||
|
vmovdqu xmm0, [rdi]
|
||||||
|
vmovdqu xmm0, [rdi + 16]
|
||||||
|
%endrep
|
||||||
|
add rax, 64
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
read_32x4_simd:
|
||||||
|
xor rax, rax
|
||||||
|
|
||||||
|
align 64
|
||||||
|
.loop:
|
||||||
|
%rep 2
|
||||||
|
vmovdqu ymm0, [rdi]
|
||||||
|
vmovdqu ymm0, [rdi]
|
||||||
|
%endrep
|
||||||
|
add rax, 128
|
||||||
|
cmp rax, rsi
|
||||||
|
jb .loop
|
||||||
|
|
||||||
|
ret
|
||||||
|
Loading…
Reference in New Issue
Block a user