提交 421b3b8f 编写于 作者: W William Wang

maprobe: add linear read, random read, l-l vio test

上级 4f4982b1
#include "maprobe.h"
struct perf perf;
uint64_t _perf_g_total_samples = 0;
void _perf_start_timer()
{
......
......@@ -22,12 +22,22 @@
// #define _PERF_TEST_ADDR_BASE 0x2000400000
#endif
#define _PERF_CACHELINE_SIZE_BYTE (64 * BYTE)
#define _PERF_L1_NOALIAS_SIZE_BYTE (32 * KB)
#define _PERF_PAGE_SIZE_BYTE (4 * KB)
#define _PERF_L1_NOALIAS_SIZE_BYTE (16 * KB)
#define _PERF_L1_SIZE_BYTE (64 * KB)
#define _PERF_L2_SIZE_BYTE (512 * KB)
#define _PERF_L3_SIZE_BYTE (2 * MB)
#define _PERF_L2_SIZE_BYTE (1 * MB)
#define _PERF_L3_SIZE_BYTE (6 * MB)
#define _PERF_MEM_SIZE_BYTE (1024 * MB)
#define _PERF_L1_NUM_WAYS 4
#define _PERF_SET_SIZE_BYTE (_PERF_L1_SIZE_BYTE / _PERF_L1_NUM_WAYS)
#define _PERF_L1_NUM_SETS 256
#define _PERF_L2_NUM_SLICES 4
// #define _PERF_L2_NUM_SETS 512
#define _PERF_ADDR_STRIDE_L1_SAME_BANK _PERF_CACHELINE_SIZE_BYTE
#define _PERF_ADDR_STRIDE_L1_SAME_SET (_PERF_L1_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L2_SAME_SLICE (_PERF_L2_NUM_SLICES * _PERF_CACHELINE_SIZE_BYTE)
// #define _PERF_ADDR_STRIDE_L2_SAME_SET (_PERF_L2_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_NEXT_PAGE (_PERF_PAGE_SIZE_BYTE)
// probe const
#define _PERF_BLACKHOLE _PERF_TEST_ADDR_BASE
......@@ -42,9 +52,10 @@ struct perf
uint64_t cycle;
uint64_t instrcnt;
};
extern struct perf perf;
extern uint64_t _perf_g_total_samples;
// common perf tools
extern void _perf_start_timer();
extern void _perf_end_timer();
......@@ -53,11 +64,15 @@ extern void _perf_calibrate();
extern void _perf_blackhole(uint64_t value);
// latency test
extern uint64_t setup_latency_test_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step);
extern uint64_t read_latency_test_linklist(uint64_t base_addr, uint64_t num_valid_node);
extern uint64_t setup_pointer_tracing_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step);
extern uint64_t read_pointer_tracing_linklist(uint64_t base_addr, uint64_t num_valid_node);
extern void latency_test_warmup(uint64_t base_addr, uint64_t end_addr);
extern void test_latency(uint64_t size, int iter);
extern void test_mem_throughput(uint64_t iter);
extern void test_mem_throughput_same_set(uint64_t iter);
extern void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv);
extern void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
extern void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
extern void test_same_address_load_latency(int iter, int to_csv);
extern void legacy_test_mem_throughput(uint64_t iter);
extern void legacy_test_mem_throughput_same_set(uint64_t iter);
#endif
\ No newline at end of file
#include "maprobe.h"
uint64_t setup_latency_test_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step)
// inline uint64_t get_next_linear_address(uint64_t current_addr, uint64_t step) {
// return current_addr + step;
// }
inline uint64_t generate_rand_address(uint64_t base_addr, uint64_t end_addr, uint64_t align) {
return (rand() % (end_addr - base_addr) + base_addr) / align * align;
}
void generate_rand_address_array(uint64_t* dest, uint64_t base_addr, uint64_t end_addr, uint64_t align, int number) {
for (int i = 0; i < number; i++) {
*(dest + i) = generate_rand_address(base_addr, end_addr, align);
}
}
uint64_t generate_pointer_tracing_address(uint64_t base_addr, uint64_t end_addr, uint64_t step) {
return setup_pointer_tracing_linklist(base_addr, end_addr, step);
}
uint64_t setup_pointer_tracing_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step)
{
uint64_t num_valid_node = 0;
assert(step % 8 == 0);
......@@ -14,7 +32,7 @@ uint64_t setup_latency_test_linklist(uint64_t base_addr, uint64_t end_addr, uint
return num_valid_node;
}
uint64_t read_latency_test_linklist(uint64_t base_addr, uint64_t num_valid_node)
uint64_t read_pointer_tracing_linklist(uint64_t base_addr, uint64_t num_valid_node)
{
uint64_t cur_addr = base_addr;
for (int i = 0; i < num_valid_node; i++) {
......@@ -25,31 +43,145 @@ uint64_t read_latency_test_linklist(uint64_t base_addr, uint64_t num_valid_node)
void latency_test_warmup(uint64_t base_addr, uint64_t end_addr)
{
setup_latency_test_linklist(base_addr, end_addr, _PERF_CACHELINE_SIZE_BYTE);
setup_pointer_tracing_linklist(base_addr, end_addr, _PERF_CACHELINE_SIZE_BYTE);
}
void test_latency(uint64_t size, int iter)
void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
{
volatile uint64_t result = 0; // make sure compiler will not opt read_latency_test_linklist
printf("range 0x%xB (%d iters) latency test\n", size, iter);
// printf("pointer tracing latency test\n");
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
_perf_start_timer();
uint64_t nnode = setup_latency_test_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, _PERF_CACHELINE_SIZE_BYTE);
uint64_t nnode = setup_pointer_tracing_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, step);
_perf_end_timer();
uint64_t total_node = nnode * iter;
// _perf_print_timer();
_perf_start_timer();
for (int i = 0; i < iter; i++) {
result += read_latency_test_linklist(_PERF_TEST_ADDR_BASE, nnode);
result += read_pointer_tracing_linklist(_PERF_TEST_ADDR_BASE, nnode);
}
_perf_end_timer();
// _perf_print_timer();
printf("range 0x%xB (%d iters) read latency %f (%d samples)\n", size, iter, (float)perf.cycle / total_node, total_node);
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_node, iter, total_node, perf.cycle);
} else {
printf("range %ldKB (%d iters) pointer tracing read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n",
size/KB, iter, (float)perf.cycle / total_node, total_node * 8 * BYTE / (float)perf.cycle, total_node, perf.cycle
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_node;
}
void test_same_address_load_latency(int iter, int to_csv)
{
// printf("same address load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
_perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) {
result += *((uint64_t*) (address));
}
_perf_end_timer();
// _perf_print_timer();
uint64_t total_access = iter;
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", 0, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
} else {
printf("same address read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n",
(float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_access;
}
void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv)
{
// printf("stride %d linear access latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
uint64_t num_access = size / step;
// _perf_print_timer();
_perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) {
for (int j = 0; j < num_access; j++) {
result += *((uint64_t*) (address));
address += step;
}
}
_perf_end_timer();
// _perf_print_timer();
uint64_t total_access = num_access * iter;
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
} else {
printf("range %ldKB (%d iters) linear read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB\n",
size/KB, iter, (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, step
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_access;
}
void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv)
{
// printf("align %d random access (cache line) latency test, %s\n",
// test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time"
// );
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
// alloc memory for random access addr array and data
assert(test_align >= 8 * BYTE);
// assert(size >= test_align);
// uint64_t num_access = size / test_align;
if (pregen_addr) {
uint64_t test_array_base_addr = _PERF_TEST_ADDR_BASE + num_access * sizeof(uint64_t*);
uint64_t address_array_base_addr = _PERF_TEST_ADDR_BASE;
generate_rand_address_array((uint64_t*)address_array_base_addr, test_array_base_addr, test_array_base_addr + test_range, test_align, num_access);
_perf_start_timer();
for (int i = 0; i < iter; i++) {
for (int j = 0; j < num_access; j++) {
result += *((uint64_t*) (address_array_base_addr + j * sizeof(uint64_t*)));
}
}
_perf_end_timer();
} else {
_perf_start_timer();
for (int i = 0; i < iter; i++) {
for (int j = 0; j < num_access; j++) {
result += *((uint64_t*) (generate_rand_address(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + test_range, test_align)));
}
}
_perf_end_timer();
}
// _perf_print_timer();
uint64_t total_access = num_access * iter;
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", test_range, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
} else {
printf("range %ldKB, access cover %ldKB (%d iters) random read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), align %ldB, %s\n",
test_range/KB, total_access*8*_PERF_CACHELINE_SIZE_BYTE/KB, iter, (float)perf.cycle / (total_access), total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, test_align,
pregen_addr ? "pregen addr" : "runtime addr"
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_access;
}
void test_mem_throughput(uint64_t iter)
void legacy_test_mem_throughput(uint64_t iter)
{
uint64_t remain = iter;
uint64_t result = 0;
......@@ -64,7 +196,7 @@ void test_mem_throughput(uint64_t iter)
printf("mem band width %f B/cycle (%d samples)\n", (float)iter * _PERF_CACHELINE_SIZE_BYTE / perf.cycle, iter);
}
void test_mem_throughput_same_set(uint64_t iter)
void legacy_test_mem_throughput_same_set(uint64_t iter)
{
uint64_t remain = iter;
uint64_t result = 0;
......@@ -72,7 +204,7 @@ void test_mem_throughput_same_set(uint64_t iter)
_perf_start_timer();
while (remain--) {
result += *(uint64_t*) access_addr;
access_addr += _PERF_SET_SIZE_BYTE;
access_addr += _PERF_ADDR_STRIDE_L1_SAME_SET;
}
_perf_end_timer();
*(uint64_t*) _PERF_BLACKHOLE = result;
......
#include <klib.h>
#include "maprobe.h"
int main()
void typical_linear_load_test_set()
{
_perf_calibrate();
printf("------------- linear load test set -------------\n");
printf("page size linear double word load:\n");
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 1, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 2, 0);
printf("page size linear cache line load:\n");
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("dcache/2 linear double word load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, sizeof(uint64_t), 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, sizeof(uint64_t), 2, 0);
printf("dcache/2 linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("dcache linear double word load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, sizeof(uint64_t), 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, sizeof(uint64_t), 2, 0);
printf("dcache linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L2 linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L1 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 10, 0);
printf("L2 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 2, 0);
printf("L1 (L2 same slice) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
printf("L2 (L2 same slice) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
printf("L1 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 10, 0);
printf("L2 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 2, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
void typical_random_load_test_set()
{
printf("------------- random load test set -------------\n");
printf("from page size random load (word):\n");
test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, 8*BYTE, 1, 1, 0);
test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, 8*BYTE, 0, 1, 0);
printf("from page size random load (cache line):\n");
test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
printf("from dcache/2 size random load (word):\n");
test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, 8*BYTE, 1, 1, 0);
test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, 8*BYTE, 0, 1, 0);
printf("from dcache/2 size random load (cache line):\n");
test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
printf("from dcache size random load (word):\n");
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, 8*BYTE, 1, 1, 0);
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, 8*BYTE, 0, 1, 0);
printf("from dcache size random load (cache line):\n");
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
printf("from l2 size random load (word):\n");
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, 8*BYTE, 1, 1, 0);
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, 8*BYTE, 0, 1, 0);
printf("from l2 size random load (cache line):\n");
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
void typical_pointer_tracing_load_test_set()
{
printf("------------- pointer tracing load test set -------------\n");
printf("cacheline by cacheline tracing:\n");
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 10, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
printf("page by page, tracing:\n");
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE*2, _PERF_PAGE_SIZE_BYTE, 10, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_PAGE_SIZE_BYTE, 10, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_PAGE_SIZE_BYTE, 10, 0);
test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_PAGE_SIZE_BYTE, 10, 0);
test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_PAGE_SIZE_BYTE, 10, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
void typical_memory_disambiuation_test_set()
{
printf("------------- memory disambiuation test set -------------\n");
printf("load from the same address:\n");
test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0);
// more to be added
}
// typical latency test for fast regression
void typical_latency_test()
{
_perf_g_total_samples = 0;
typical_linear_load_test_set();
typical_random_load_test_set();
typical_pointer_tracing_load_test_set();
typical_memory_disambiuation_test_set();
}
void pointer_tracing_graph()
{
_perf_g_total_samples = 0;
_perf_calibrate();
printf("data for pointer tracing latency graph:\n");
printf("range (B), read latency, iters, samples\n");
for (int i = 1*KB; i < 64*KB; i = i + 1*KB) {
test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 2, 1);
}
for (int i = 64*KB; i < 1024*KB; i = i + 64*KB) {
test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
}
test_pointer_tracing_latency(1024*KB, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
for (int i = 1*MB; i <8*MB; i = i + 1*MB) {
test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
}
printf("total samples: %ld\n", _perf_g_total_samples);
}
// a simple test set used to check if test is working correctly
void latency_test_example()
{
_perf_calibrate();
printf("latency test example:\n");
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_same_address_load_latency(1024, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
void legacy_latency_throughput_test()
{
_perf_calibrate();
printf("Memory throughput:\n");
test_mem_throughput(512);
legacy_test_mem_throughput(1024);
printf("L1 latency:\n");
test_latency(4 * KB, 5);
test_latency(_PERF_L1_NOALIAS_SIZE_BYTE, 2);
test_latency(_PERF_L1_SIZE_BYTE/2, 2);
test_latency(_PERF_L1_SIZE_BYTE, 2);
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_pointer_tracing_latency(_PERF_L1_NOALIAS_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L2 latency:\n");
test_latency(_PERF_L2_SIZE_BYTE/2, 2);
// test_latency(_PERF_L2_SIZE_BYTE, 2);
test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
// test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L3 latency:\n");
test_latency(_PERF_L3_SIZE_BYTE/2, 2);
// test_latency(_PERF_L3_SIZE_BYTE,2);
test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
// test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE,2, 0);
// printf("MEM:\n");
// test_latency(_PERF_L3_SIZE_BYTE*2,2);
// test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE*2, _PERF_CACHELINE_SIZE_BYTE,2, 0);
printf("total samples: %ld\n", _perf_g_total_samples);
}
int main()
{
latency_test_example();
typical_latency_test();
// pointer_tracing_graph();
// latency_test();
// legacy_latency_throughput_test();
return 0;
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册