Switch to using mmap and test unaligned loads
This commit is contained in:
parent
37d3340df9
commit
7177add4ce
@ -5,6 +5,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define ARR_LEN(ARR) sizeof(ARR) / sizeof(*ARR)
|
||||
@ -51,6 +52,7 @@ extern "C" void read_32x2_simd_no_offset(char *buffer, u64 size);
|
||||
extern "C" void read_16x4_simd(char *buffer, u64 size);
|
||||
extern "C" void read_32x4_simd(char *buffer, u64 size);
|
||||
extern "C" void cache_test(char *buffer, u64 size, u64 mask);
|
||||
extern "C" void cache_test_unaligned(char *buffer, u64 size, u64 mask);
|
||||
|
||||
void test_fread(reptester *tester, alloc_type type);
|
||||
void test_read(reptester *tester, alloc_type type);
|
||||
@ -94,7 +96,9 @@ void test_read_32x2_simd_no_offset(reptester *tester, alloc_type type);
|
||||
void test_read_16x4_simd(reptester *tester, alloc_type type);
|
||||
void test_read_32x4_simd(reptester *tester, alloc_type type);
|
||||
void test_cache_test_16k(reptester *tester, alloc_type type);
|
||||
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type);
|
||||
void test_cache_test_32k(reptester *tester, alloc_type type);
|
||||
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type);
|
||||
void test_cache_test_64k(reptester *tester, alloc_type type);
|
||||
void test_cache_test_128k(reptester *tester, alloc_type type);
|
||||
void test_cache_test_256k(reptester *tester, alloc_type type);
|
||||
@ -216,30 +220,39 @@ int main(int argc, char *argv[]) {
|
||||
// {{"READ 32x4_simd", "READ 32x4_simd WITH MALLOC"},
|
||||
// test_read_32x4_simd},
|
||||
{{"CACHE TEST 16K", "CACHE TEST 16K WITH MALLOC"}, test_cache_test_16k},
|
||||
{{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"}, test_cache_test_32k},
|
||||
{{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"}, test_cache_test_64k},
|
||||
{{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"},
|
||||
test_cache_test_128k},
|
||||
{{"CACHE TEST 256K", "CACHE TEST 256K WITH MALLOC"},
|
||||
test_cache_test_256k},
|
||||
{{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"},
|
||||
test_cache_test_512k},
|
||||
{{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m},
|
||||
{{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m},
|
||||
{{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m},
|
||||
{{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m},
|
||||
{{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"}, test_cache_test_16m},
|
||||
{{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"}, test_cache_test_32m},
|
||||
{{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"}, test_cache_test_64m},
|
||||
{{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"},
|
||||
test_cache_test_512m},
|
||||
{{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"},
|
||||
test_cache_test_full},
|
||||
{{"CACHE TEST 16K UNALIGNED", "CACHE TEST 16K UNALIGNED WITH MALLOC"},
|
||||
test_cache_test_16k_unaligned},
|
||||
// {{"CACHE TEST 32K", "CACHE TEST 32K WITH MALLOC"},
|
||||
// test_cache_test_32k},
|
||||
// {{"CACHE TEST 64K", "CACHE TEST 64K WITH MALLOC"},
|
||||
// test_cache_test_64k},
|
||||
// {{"CACHE TEST 128K", "CACHE TEST 128K WITH MALLOC"},
|
||||
// test_cache_test_128k},
|
||||
// {{"CACHE TEST 256K", "CACHE TEST 256K WITH MALLOC"},
|
||||
// test_cache_test_256k},
|
||||
// {{"CACHE TEST 512K", "CACHE TEST 512K WITH MALLOC"},
|
||||
// test_cache_test_512k},
|
||||
// {{"CACHE TEST 1M", "CACHE TEST 1M WITH MALLOC"}, test_cache_test_1m},
|
||||
// {{"CACHE TEST 2M", "CACHE TEST 2M WITH MALLOC"}, test_cache_test_2m},
|
||||
// {{"CACHE TEST 4M", "CACHE TEST 4M WITH MALLOC"}, test_cache_test_4m},
|
||||
// {{"CACHE TEST 8M", "CACHE TEST 8M WITH MALLOC"}, test_cache_test_8m},
|
||||
// {{"CACHE TEST 16M", "CACHE TEST 16M WITH MALLOC"},
|
||||
// test_cache_test_16m},
|
||||
// {{"CACHE TEST 32M", "CACHE TEST 32M WITH MALLOC"},
|
||||
// test_cache_test_32m},
|
||||
// {{"CACHE TEST 64M", "CACHE TEST 64M WITH MALLOC"},
|
||||
// test_cache_test_64m},
|
||||
// {{"CACHE TEST 512M", "CACHE TEST 512M WITH MALLOC"},
|
||||
// test_cache_test_512m},
|
||||
// {{"CACHE TEST FULL", "CACHE TEST FULL WITH MALLOC"},
|
||||
// test_cache_test_full},
|
||||
};
|
||||
|
||||
tester.params.read_size = get_file_length(fp);
|
||||
tester.params.read_count = 1;
|
||||
tester.params.buffer = (char *)malloc(tester.params.read_size + 1);
|
||||
tester.params.buffer =
|
||||
(char *)mmap(NULL, tester.params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||
memset(tester.params.buffer, 0, tester.params.read_size + 1);
|
||||
|
||||
for (u64 i = 0; i < tester.params.read_size; ++i) {
|
||||
@ -256,7 +269,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
fclose(fp);
|
||||
|
||||
free(tester.params.buffer);
|
||||
munmap(tester.params.buffer, tester.params.read_size + 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1333,6 +1346,31 @@ void test_cache_test_16k(reptester *tester, alloc_type type) {
|
||||
handle_free(tester, type);
|
||||
}
|
||||
|
||||
void test_cache_test_16k_unaligned(reptester *tester, alloc_type type) {
|
||||
u64 start = read_cpu_timer();
|
||||
u64 fault_count_start = page_fault_count();
|
||||
|
||||
handle_alloc(tester, type);
|
||||
|
||||
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||
|
||||
cache_test_unaligned(tester->params.buffer, total_size, 0x3fff);
|
||||
|
||||
u64 fault_count_end = page_fault_count();
|
||||
u64 end = read_cpu_timer();
|
||||
|
||||
u64 read_time = end - start;
|
||||
u64 page_faults = fault_count_end - fault_count_start;
|
||||
|
||||
tester->results = {
|
||||
total_size,
|
||||
read_time,
|
||||
page_faults,
|
||||
};
|
||||
|
||||
handle_free(tester, type);
|
||||
}
|
||||
|
||||
void test_cache_test_32k(reptester *tester, alloc_type type) {
|
||||
u64 start = read_cpu_timer();
|
||||
u64 fault_count_start = page_fault_count();
|
||||
@ -1358,6 +1396,31 @@ void test_cache_test_32k(reptester *tester, alloc_type type) {
|
||||
handle_free(tester, type);
|
||||
}
|
||||
|
||||
void test_cache_test_32k_unaligned(reptester *tester, alloc_type type) {
|
||||
u64 start = read_cpu_timer();
|
||||
u64 fault_count_start = page_fault_count();
|
||||
|
||||
handle_alloc(tester, type);
|
||||
|
||||
u64 total_size = tester->params.read_size * tester->params.read_count;
|
||||
|
||||
cache_test_unaligned(tester->params.buffer, total_size, 0x7fff);
|
||||
|
||||
u64 fault_count_end = page_fault_count();
|
||||
u64 end = read_cpu_timer();
|
||||
|
||||
u64 read_time = end - start;
|
||||
u64 page_faults = fault_count_end - fault_count_start;
|
||||
|
||||
tester->results = {
|
||||
total_size,
|
||||
read_time,
|
||||
page_faults,
|
||||
};
|
||||
|
||||
handle_free(tester, type);
|
||||
}
|
||||
|
||||
void test_cache_test_64k(reptester *tester, alloc_type type) {
|
||||
u64 start = read_cpu_timer();
|
||||
u64 fault_count_start = page_fault_count();
|
||||
|
@ -37,6 +37,7 @@ global read_32x2_simd_no_offset
|
||||
global read_16x4_simd
|
||||
global read_32x4_simd
|
||||
global cache_test ; Expects 3 inputs (pointer, read_count, mask)
|
||||
global cache_test_unaligned ; Expects 3 inputs (pointer, read_count, mask)
|
||||
|
||||
mov_all_bytes_asm:
|
||||
xor rax, rax
|
||||
@ -509,3 +510,20 @@ cache_test:
|
||||
sub rsi, 128 ; Decrement count
|
||||
ja .loop
|
||||
ret
|
||||
|
||||
cache_test_unaligned:
|
||||
xor r10, r10 ; Zero loop counter
|
||||
add rdi, 5 ; Unalign pointer
|
||||
mov rbx, rdi ; Save original pointer
|
||||
.loop:
|
||||
add rdi, r10 ; Advance the pointer
|
||||
add r10, 128 ; Increment loop counter
|
||||
and r10, rdx ; Mask offset
|
||||
vmovdqu ymm0, [rdi + 0]
|
||||
vmovdqu ymm1, [rdi + 32]
|
||||
vmovdqu ymm2, [rdi + 64]
|
||||
vmovdqu ymm3, [rdi + 96]
|
||||
mov rdi, rbx ; Restore original pointer
|
||||
sub rsi, 128 ; Decrement count
|
||||
ja .loop
|
||||
ret
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
@ -10,7 +11,9 @@ void handle_alloc(reptester *tester, alloc_type type) {
|
||||
switch (type) {
|
||||
case ALLOC_TYPE_WITH_MALLOC:
|
||||
if (!(tester->params.buffer)) {
|
||||
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
|
||||
tester->params.buffer = (char *)mmap(
|
||||
NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
||||
}
|
||||
|
||||
@ -24,7 +27,7 @@ void handle_free(reptester *tester, alloc_type type) {
|
||||
switch (type) {
|
||||
case ALLOC_TYPE_WITH_MALLOC:
|
||||
if (tester->params.buffer) {
|
||||
free(tester->params.buffer);
|
||||
munmap(tester->params.buffer, tester->params.read_size + 1);
|
||||
tester->params.buffer = NULL;
|
||||
}
|
||||
|
||||
@ -58,7 +61,9 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
|
||||
|
||||
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
||||
buffer = tester->params.buffer;
|
||||
tester->params.buffer = (char *)malloc(tester->params.read_size + 1);
|
||||
tester->params.buffer =
|
||||
(char *)mmap(NULL, tester->params.read_size + 1, PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_SHARED | MAP_NORESERVE, -1, 0);
|
||||
memset(tester->params.buffer, 0, tester->params.read_size + 1);
|
||||
}
|
||||
|
||||
@ -100,7 +105,7 @@ void run_func_test(reptester *tester, reptest_func func, const char *func_name,
|
||||
}
|
||||
|
||||
if (type == ALLOC_TYPE_WITH_MALLOC) {
|
||||
free(tester->params.buffer);
|
||||
munmap(tester->params.buffer, tester->params.read_size + 1);
|
||||
tester->params.buffer = buffer;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user