提交 47fe2bc9 编写于 作者: W William Wang

maprobe: fix usage of volatile to avoid extra store

上级 421b3b8f
......@@ -71,6 +71,7 @@ extern void test_pointer_tracing_latency(uint64_t size, int step, int iter, int
extern void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
extern void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
extern void test_same_address_load_latency(int iter, int to_csv);
extern void test_read_after_write_latency(int iter, int to_csv);
extern void legacy_test_mem_throughput(uint64_t iter);
extern void legacy_test_mem_throughput_same_set(uint64_t iter);
......
......@@ -50,7 +50,7 @@ void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
{
// printf("pointer tracing latency test\n");
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
_perf_start_timer();
uint64_t nnode = setup_pointer_tracing_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, step);
_perf_end_timer();
......@@ -79,13 +79,13 @@ void test_same_address_load_latency(int iter, int to_csv)
{
// printf("same address load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
_perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) {
result += *((uint64_t*) (address));
result += *((volatile uint64_t*) (address));
}
_perf_end_timer();
// _perf_print_timer();
......@@ -102,11 +102,39 @@ void test_same_address_load_latency(int iter, int to_csv)
_perf_g_total_samples += total_access;
}
void test_read_after_write_latency(int iter, int to_csv)
{
// printf("same address store-load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will store data to memory
// _perf_print_timer();
_perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) {
result += *((uint64_t*) (address));
address += sizeof(uint64_t);
}
_perf_end_timer();
// _perf_print_timer();
uint64_t total_access = iter;
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", 0, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
} else {
printf("read after write latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n",
(float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_access;
}
void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv)
{
// printf("stride %d linear access latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
uint64_t num_access = size / step;
// _perf_print_timer();
......@@ -139,7 +167,7 @@ void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64
// test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time"
// );
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
// alloc memory for random access addr array and data
......
......@@ -27,11 +27,11 @@ void typical_linear_load_test_set()
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L1 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 10, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 100, 0);
printf("L2 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 2, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 4, 0);
printf("L1 (L2 same slice) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
......@@ -39,11 +39,11 @@ void typical_linear_load_test_set()
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
printf("L1 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 10, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 100, 0);
printf("L2 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 2, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 4, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
......@@ -103,6 +103,10 @@ void typical_memory_disambiuation_test_set()
test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0);
printf("load then store to the same address:\n");
test_read_after_write_latency(1024, 0);
test_read_after_write_latency(1024, 0);
test_read_after_write_latency(1024, 0);
// more to be added
}
......@@ -141,10 +145,12 @@ void latency_test_example()
_perf_calibrate();
printf("latency test example:\n");
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 5, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_same_address_load_latency(1024, 0);
test_read_after_write_latency(1024, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册