Skip to content

Commit bc84f9b

Browse files
committed
Add detailed MMU cache statistics for fetch path
Add detailed MMU cache statistics for fetch path Introduce fine-grained counters to replace the previous aggregated fetch hit/miss statistics. The new metrics track: - total fetches - icache hits/misses - victim cache hits/misses - TLB hits/misses The old MMU statistics only distinguished total fetch hits/misses, which no longer reflect real performance after the addition of icache and victim cache layers. This change enables more accurate profiling and debugging. This provides a clearer view of instruction fetch performance and helps identify which cache level contributes most to stalls or misses.
1 parent efde8b1 commit bc84f9b

File tree

3 files changed

+83
-20
lines changed

3 files changed

+83
-20
lines changed

main.c

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,11 +1024,30 @@ static void print_mmu_cache_stats(vm_t *vm)
10241024
fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
10251025
for (uint32_t i = 0; i < vm->n_hart; i++) {
10261026
hart_t *hart = vm->hart[i];
1027-
uint64_t fetch_hits = 0, fetch_misses = 0;
1028-
fetch_hits = hart->cache_fetch[1].hits + hart->cache_fetch[2].hits;
1029-
fetch_misses =
1030-
hart->cache_fetch[1].misses + hart->cache_fetch[2].misses;
1031-
uint64_t fetch_total = fetch_hits + fetch_misses;
1027+
1028+
/* Combine 2-entry tlb statistics */
1029+
uint64_t fetch_hits_tlb = 0, fetch_misses_tlb = 0;
1030+
fetch_hits_tlb =
1031+
hart->cache_fetch[0].tlb_hits + hart->cache_fetch[1].tlb_hits;
1032+
fetch_misses_tlb =
1033+
hart->cache_fetch[0].tlb_misses + hart->cache_fetch[1].tlb_misses;
1034+
1035+
/* Combine icache statistics */
1036+
uint64_t fetch_hits_icache = 0, fetch_misses_icache = 0;
1037+
fetch_hits_icache =
1038+
hart->cache_fetch[0].icache_hits + hart->cache_fetch[1].icache_hits;
1039+
fetch_misses_icache = hart->cache_fetch[0].icache_misses +
1040+
hart->cache_fetch[1].icache_misses;
1041+
1042+
/* Combine victim cache statistics */
1043+
uint64_t fetch_hits_vcache = 0, fetch_misses_vcache = 0;
1044+
fetch_hits_vcache =
1045+
hart->cache_fetch[0].vcache_hits + hart->cache_fetch[1].vcache_hits;
1046+
fetch_misses_vcache = hart->cache_fetch[0].vcache_misses +
1047+
hart->cache_fetch[1].vcache_misses;
1048+
1049+
uint64_t access_total =
1050+
hart->cache_fetch[0].total_fetch + hart->cache_fetch[1].total_fetch;
10321051

10331052
/* Combine 8-set × 2-way load cache statistics */
10341053
uint64_t load_hits = 0, load_misses = 0;
@@ -1050,14 +1069,32 @@ static void print_mmu_cache_stats(vm_t *vm)
10501069
}
10511070
uint64_t store_total = store_hits + store_misses;
10521071

1053-
fprintf(stderr, "\nHart %u:\n", i);
1054-
fprintf(stderr, " Fetch: %12llu hits, %12llu misses", fetch_hits,
1055-
fetch_misses);
1056-
if (fetch_total > 0)
1057-
fprintf(stderr, " (%.2f%% hit rate)",
1058-
100.0 * fetch_hits / fetch_total);
1059-
fprintf(stderr, "\n");
10601072

1073+
fprintf(stderr, "\n=== Introduction Cache Statistics ===\n");
1074+
fprintf(stderr, " Total access: %12llu\n", access_total);
1075+
fprintf(stderr, " Icache hits: %12llu (%.2f%%)\n", fetch_hits_icache,
1076+
(fetch_hits_icache * 100.0) / access_total);
1077+
fprintf(stderr, " Icache misses: %12llu (%.2f%%)\n",
1078+
fetch_misses_icache,
1079+
(fetch_misses_icache * 100.0) / access_total);
1080+
fprintf(stderr,
1081+
" ├ Vcache hits: %8llu (%.2f%% of Icache misses)\n",
1082+
fetch_hits_vcache,
1083+
(fetch_hits_vcache * 100.0) / fetch_misses_icache,
1084+
(fetch_hits_vcache * 100.0) / access_total);
1085+
fprintf(stderr,
1086+
" └ Vcache misses: %8llu (%.2f%% of Icache misses)\n",
1087+
fetch_misses_vcache,
1088+
(fetch_misses_vcache * 100.0) / fetch_misses_icache,
1089+
(fetch_misses_vcache * 100.0) / access_total);
1090+
fprintf(stderr, " ├ TLB hits: %4llu (%.2f%%)\n",
1091+
fetch_hits_tlb,
1092+
(fetch_hits_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb));
1093+
fprintf(
1094+
stderr, " └ TLB misses: %4llu (%.2f%%)\n", fetch_misses_tlb,
1095+
(fetch_misses_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb));
1096+
1097+
fprintf(stderr, "\n=== Data Cache Statistics ===\n");
10611098
fprintf(stderr, " Load: %12llu hits, %12llu misses (8x2)", load_hits,
10621099
load_misses);
10631100
if (load_total > 0)

riscv.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -371,29 +371,40 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED)
371371

372372
static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
373373
{
374-
/* cache hit */
375374
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
376375
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
377376
icache_block_t *blk = &vm->icache.i_block[idx];
378377
uint32_t vpn = addr >> RV_PAGE_SHIFT;
379378
uint32_t index = __builtin_parity(vpn) & 0x1;
380379

380+
#ifdef MMU_CACHE_STATS
381+
vm->cache_fetch[index].total_fetch++;
382+
#endif
383+
384+
/* icache lookup */
381385
if (likely(blk->valid && blk->tag == tag)) {
382386
#ifdef MMU_CACHE_STATS
383-
vm->cache_fetch[index].hits++;
387+
vm->cache_fetch[index].icache_hits++;
384388
#endif
385389
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
386390
*value = *(const uint32_t *) (blk->base + ofs);
387391
return;
388392
}
389393

390-
/* search the victim cache */
394+
/* icache miss, try victim cache */
395+
#ifdef MMU_CACHE_STATS
396+
vm->cache_fetch[index].icache_misses++;
397+
#endif
398+
391399
uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
392400
for (int i = 0; i < VCACHE_BLOCKS; i++) {
393401
victim_cache_block_t *vblk = &vm->icache.v_block[i];
394402

395-
/* victim cache hit, swap blocks */
396403
if (vblk->valid && vblk->tag == vcache_key) {
404+
/* victim cache hit, swap blocks */
405+
#ifdef MMU_CACHE_STATS
406+
vm->cache_fetch[index].vcache_hits++;
407+
#endif
397408
icache_block_t tmp = *blk;
398409
*blk = *vblk;
399410
*vblk = tmp;
@@ -407,11 +418,15 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
407418
}
408419

409420
#ifdef MMU_CACHE_STATS
410-
vm->cache_fetch[index].misses++;
421+
vm->cache_fetch[index].vcache_misses++;
411422
#endif
412423

413-
/* cache miss, Continue using the original va->pa*/
424+
/* TLB lookup */
414425
if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
426+
/*TLB miss: need to translate VA to PA*/
427+
#ifdef MMU_CACHE_STATS
428+
vm->cache_fetch[index].tlb_misses++;
429+
#endif
415430
mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
416431
RV_EXC_FETCH_PFAULT);
417432
if (vm->error)
@@ -423,6 +438,12 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
423438
vm->cache_fetch[index].n_pages = vpn;
424439
vm->cache_fetch[index].page_addr = page_addr;
425440
}
441+
/*TLB hit*/
442+
else {
443+
#ifdef MMU_CACHE_STATS
444+
vm->cache_fetch[index].tlb_hits++;
445+
#endif
446+
}
426447

427448
*value =
428449
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];

riscv.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,13 @@ typedef struct {
3636
uint32_t n_pages;
3737
uint32_t *page_addr;
3838
#ifdef MMU_CACHE_STATS
39-
uint64_t hits;
40-
uint64_t misses;
39+
uint64_t total_fetch;
40+
uint64_t tlb_hits;
41+
uint64_t tlb_misses;
42+
uint64_t icache_hits;
43+
uint64_t icache_misses;
44+
uint64_t vcache_hits;
45+
uint64_t vcache_misses;
4146
#endif
4247
} mmu_fetch_cache_t;
4348

0 commit comments

Comments
 (0)