202109060639

05b25e73 · 饶先宏 · 72c20b52 · 05b25e73 · 05b25e73 · 05b25e73
379 changed file
--- a/examples/hdl4se_riscv/de2/de2_riscv_v4.qws
+++ b/examples/hdl4se_riscv/de2/de2_riscv_v4.qws
--- a/examples/hdl4se_riscv/de2/de2_riscv_v4.sof
+++ b/examples/hdl4se_riscv/de2/de2_riscv_v4.sof
--- a/examples/hdl4se_riscv/riscv-tests/.gitignore
+++ b/examples/hdl4se_riscv/riscv-tests/.gitignore
+*~
+*.riscv
+*.host
+*.o
+*.dump
+*.out
+*.hex
+.*.swp
+*.pyc
+/autom4te.cache
+/Makefile
+/config.log
+/config.status
+/build
--- a/examples/hdl4se_riscv/riscv-tests/.gitmodules
+++ b/examples/hdl4se_riscv/riscv-tests/.gitmodules
+[submodule "env"]
+	path = env
+	url = https://github.com/riscv/riscv-test-env.git
--- a/examples/hdl4se_riscv/riscv-tests/LICENSE
+++ b/examples/hdl4se_riscv/riscv-tests/LICENSE
+Copyright (c) 2012-2015, The Regents of the University of California (Regents).
+All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Neither the name of the Regents nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
+OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
+BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
+HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
+MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
--- a/examples/hdl4se_riscv/riscv-tests/Makefile.in
+++ b/examples/hdl4se_riscv/riscv-tests/Makefile.in
+prefix          := @prefix@
+abs_top_src_dir := @abs_top_srcdir@
+XLEN            := @XLEN@
+target_alias    := @target_alias@
+ifeq ($(target_alias),)
+RISCV_PREFIX_VAR :=
+else
+RISCV_PREFIX_VAR := RISCV_PREFIX=@target_alias@-
+endif
+instbasedir     := $(DESTDIR)$(prefix)
+bmarkdir        := $(abs_top_src_dir)/benchmarks
+isa_src_dir     := $(abs_top_src_dir)/isa
+debug_src_dir   := $(abs_top_src_dir)/debug
+
+all: benchmarks isa
+
+install: all
+	install -d $(instbasedir)/share/riscv-tests/isa
+	install -d $(instbasedir)/share/riscv-tests/benchmarks
+	install -p -m 644 `find isa -maxdepth 1 -type f` $(instbasedir)/share/riscv-tests/isa
+	install -p -m 644 `find benchmarks -maxdepth 1 -type f` $(instbasedir)/share/riscv-tests/benchmarks
+
+benchmarks:
+	mkdir -p benchmarks
+	$(MAKE) -C benchmarks -f $(bmarkdir)/Makefile src_dir=$(bmarkdir) XLEN=$(XLEN) $(RISCV_PREFIX_VAR)
+
+isa:
+	mkdir -p isa
+	$(MAKE) -C isa -f $(isa_src_dir)/Makefile src_dir=$(isa_src_dir) XLEN=$(XLEN) $(RISCV_PREFIX_VAR)
+
+debug-check:
+	mkdir -p debug
+	$(MAKE) -C debug -f $(debug_src_dir)/Makefile src_dir=$(debug_src_dir) XLEN=$(XLEN)
+
+debug-check-fast:
+	mkdir -p debug
+	$(MAKE) -C debug -f $(debug_src_dir)/Makefile src_dir=$(debug_src_dir) XLEN=$(XLEN) spike$(XLEN)
+
+clean:
+	[ ! -d isa ]        || $(MAKE) -C isa -f $(isa_src_dir)/Makefile src_dir=$(isa_src_dir) clean
+	[ ! -d benchmarks ] || $(MAKE) -C benchmarks -f $(bmarkdir)/Makefile src_dir=$(bmarkdir) clean
+	[ ! -d debug ]      || $(MAKE) -C debug -f $(debug_src_dir)/Makefile src_dir=$(debug_src_dir) clean
+
+.PHONY: benchmarks isa clean
+
--- a/examples/hdl4se_riscv/riscv-tests/README.md
+++ b/examples/hdl4se_riscv/riscv-tests/README.md
+riscv-tests
+================
+
+About
+-----------
+
+This repository hosts unit tests for RISC-V processors.
+
+Building from repository
+-----------------------------
+
+We assume that the RISCV environment variable is set to the RISC-V tools
+install path, and that the riscv-gnu-toolchain package is installed.
+
+    $ git clone https://github.com/riscv/riscv-tests
+    $ cd riscv-tests
+    $ git submodule update --init --recursive
+    $ autoconf
+    $ ./configure --prefix=$RISCV/target
+    $ make
+    $ make install
+
+The rest of this document describes the format of test programs for the RISC-V
+architecture.
+
+Test Virtual Machines
+-------------------------
+
+To allow maximum reuse of a given test, each test program is constrained to
+only use features of a given *test virtual machine* or TVM. A TVM hides
+differences between alternative implementations by defining:
+
+* The set of registers and instructions that can be used. 
+* Which portions of memory can be accessed.
+* The way the test program starts and ends execution. 
+* The way that test data is input.
+* The way that test results are output.
+
+The following table shows the TVMs currently defined for RISC-V. All of these
+TVMs only support a single hardware thread.
+
+TVM Name | Description
+--- | ---
+`rv32ui` | RV32 user-level, integer only
+`rv32si` | RV32 supervisor-level, integer only
+`rv64ui` | RV64 user-level, integer only
+`rv64uf` | RV64 user-level, integer and floating-point
+`rv64uv` | RV64 user-level, integer, floating-point, and vector
+`rv64si` | RV64 supervisor-level, integer only
+`rv64sv` | RV64 supervisor-level, integer and vector
+
+A test program for RISC-V is written within a single assembly language file,
+which is passed through the C preprocessor, and all regular assembly
+directives can be used. An example test program is shown below. Each test
+program should first include the `riscv_test.h` header file, which defines the
+macros used by the TVM. The header file will have different contents depending
+on the target environment for which the test will be built.  One of the goals
+of the various TVMs is to allow the same test program to be compiled and run
+on very different target environments yet still produce the same results. The
+following table shows the target environment currently defined.
+
+Target Environment Name | Description
+--- | ---
+`p` | virtual memory is disabled, only core 0 boots up
+`pm` | virtual memory is disabled, all cores boot up
+`pt` | virtual memory is disabled, timer interrupt fires every 100 cycles
+`v` | virtual memory is enabled
+
+Each test program must next specify for which TVM it is designed by including
+the appropriate TVM macro, `RVTEST_RV64U` in this example. This specification
+can change the way in which subsequent macros are interpreted, and supports
+a static check of the TVM functionality used by the program.
+
+The test program will begin execution at the first instruction after
+`RVTEST_CODE_BEGIN`, and continue until execution reaches an `RVTEST_PASS`
+macro or the `RVTEST_CODE_END` macro, which is implicitly a success. A test
+can explicitly fail by invoking the `RVTEST_FAIL` macro.
+
+The example program contains self-checking code to test the result of the add.
+However, self-checks rely on correct functioning of the processor instructions
+used to implement the self check (e.g., the branch) and so cannot be the only
+testing strategy.
+
+All tests should also contain a test data section, delimited by
+`RVTEST_DATA_BEGIN` and `RVTEST_DATA_END`. There is no alignment guarantee for
+the start of the test data section, so regular assembler alignment
+instructions should be used to ensure desired alignment of data values. This
+region of memory will be captured at the end of the test to act as a signature
+from the test. The signature can be compared with that from a run on the
+golden model.
+
+Any given test environment for running tests should also include a timeout
+facility, which will class a test as failing if it does not successfully
+complete a test within a reasonable time bound.
+
+    #include "riscv_test.h"
+
+    RVTEST_RV64U        # Define TVM used by program.
+
+    # Test code region.
+    RVTEST_CODE_BEGIN   # Start of test code.
+            lw      x2, testdata
+            addi    x2, 1         # Should be 42 into $2.
+            sw      x2, result    # Store result into memory overwriting 1s.
+            li      x3, 42        # Desired result.
+            bne     x2, x3, fail  # Fail out if doesn't match.
+            RVTEST_PASS           # Signal success.
+    fail:
+            RVTEST_FAIL
+    RVTEST_CODE_END     # End of test code.
+
+    # Input data section.
+    # This section is optional, and this data is NOT saved in the output.
+    .data
+            .align 3
+    testdata:
+            .dword 41
+
+    # Output data section.
+    RVTEST_DATA_BEGIN   # Start of test output data region.
+            .align 3
+    result:
+            .dword -1
+    RVTEST_DATA_END     # End of test output data region.
+
+User-Level TVMs
+--------------------
+
+Test programs for the `rv32u*` and `rv64u*` TVMs can contain all instructions
+from the respective base user-level ISA (RV32 or RV64), except for those with
+the SYSTEM major opcode (syscall, break, rdcycle, rdtime, rdinstret). All user
+registers (pc, x0-x31, f0-f31, fsr) can be accessed.
+
+The `rv32ui` and `rv64ui` TVMs are integer-only subsets of `rv32u` and `rv64u`
+respectively. These subsets can not use any floating-point instructions (major
+opcodes: LOAD-FP, STORE-FP, MADD, MSUB, NMSUB, NMADD, OP-FP), and hence cannot
+access the floating-point register state (f0-f31 and fsr). The integer-only
+TVMs are useful for initial processor bringup and to test simpler
+implementations that lack a hardware FPU.
+
+Note that any `rv32ui` test program is also valid for the `rv32u` TVM, and
+similarly `rv64ui` is a strict subset of `rv64u`. To allow a given test to run
+on the widest possible set of implementations, it is desirable to write any
+given test to run on the smallest or least capable TVM possible. For example,
+any simple tests of integer functionality should be written for the `rv64ui`
+TVM, as the same test can then be run on RV64 implementations with or without a
+hardware FPU. As another example, all tests for these base user-level TVMs will
+also be valid for more advanced processors with instruction-set extensions.
+
+At the start of execution, the values of all registers are undefined. All
+branch and jump destinations must be to labels within the test code region of
+the assembler source file. The code and data sections will be relocated
+differently for the various implementations of the test environment, and so
+test program results shall not depend on absolute addresses of instructions or
+data memory. The test build environment should support randomization of the
+section relocation to provide better coverage and to ensure test signatures do
+not contain absolute addresses.
+
+Supervisor-Level TVMs
+--------------------------
+
+The supervisor-level TVMs allow testing of supervisor-level state and
+instructions.  As with the user-level TVMs, we provide integer-only
+supervisor-level TVMs indicated with a trailing `i`.
+
+History and Acknowledgements
+---------------------------------
+
+This style of test virtual machine originated with the T0 (Torrent-0) vector
+microprocessor project at UC Berkeley and ICSI, begun in 1992. The main
+developers of this test strategy were Krste Asanovic and David Johnson. A
+precursor to `torture` was `rantor` developed by Phil Kohn at ICSI.
+
+A variant of this testing approach was also used for the Scale vector-thread
+processor at MIT, begun in 2000. Ronny Krashinsky and Christopher Batten were
+the principal architects of the Scale chip. Jeffrey Cohen and Mark Hampton
+developed a version of torture capable of generating vector-thread code.
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/Makefile
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/Makefile
+#=======================================================================
+# UCB VLSI FLOW: Makefile for riscv-bmarks
+#-----------------------------------------------------------------------
+# Yunsup Lee (yunsup@cs.berkeley.edu)
+#
+
+XLEN ?= 64
+
+default: all
+
+src_dir = .
+
+instname = riscv-bmarks
+instbasedir = $(UCB_VLSI_HOME)/install
+
+#--------------------------------------------------------------------
+# Sources
+#--------------------------------------------------------------------
+
+bmarks = \
+	median \
+	qsort \
+	rsort \
+	towers \
+	vvadd \
+	multiply \
+	mm \
+	dhrystone \
+	spmv \
+	mt-vvadd \
+	mt-matmul \
+	pmp \
+
+#--------------------------------------------------------------------
+# Build rules
+#--------------------------------------------------------------------
+
+RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf-
+RISCV_GCC ?= $(RISCV_PREFIX)gcc
+RISCV_GCC_OPTS ?= -DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common -fno-builtin-printf -fno-tree-loop-distribute-patterns
+RISCV_LINK ?= $(RISCV_GCC) -T $(src_dir)/common/test.ld $(incs)
+RISCV_LINK_OPTS ?= -static -nostdlib -nostartfiles -lm -lgcc -T $(src_dir)/common/test.ld
+RISCV_OBJDUMP ?= $(RISCV_PREFIX)objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.text.init --section=.data
+RISCV_SIM ?= spike --isa=rv$(XLEN)gc
+
+incs  += -I$(src_dir)/../env -I$(src_dir)/common $(addprefix -I$(src_dir)/, $(bmarks))
+objs  :=
+
+define compile_template
+$(1).riscv: $(wildcard $(src_dir)/$(1)/*) $(wildcard $(src_dir)/common/*)
+	$$(RISCV_GCC) $$(incs) $$(RISCV_GCC_OPTS) -o $$@ $(wildcard $(src_dir)/$(1)/*.c) $(wildcard $(src_dir)/common/*.c) $(wildcard $(src_dir)/common/*.S) $$(RISCV_LINK_OPTS)
+endef
+
+$(foreach bmark,$(bmarks),$(eval $(call compile_template,$(bmark))))
+
+#------------------------------------------------------------
+# Build and run benchmarks on riscv simulator
+
+bmarks_riscv_bin  = $(addsuffix .riscv,  $(bmarks))
+bmarks_riscv_dump = $(addsuffix .riscv.dump, $(bmarks))
+bmarks_riscv_out  = $(addsuffix .riscv.out,  $(bmarks))
+
+$(bmarks_riscv_dump): %.riscv.dump: %.riscv
+	$(RISCV_OBJDUMP) $< > $@
+
+$(bmarks_riscv_out): %.riscv.out: %.riscv
+	$(RISCV_SIM) $< > $@
+
+riscv: $(bmarks_riscv_dump)
+run: $(bmarks_riscv_out)
+
+junk += $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(bmarks_riscv_hex) $(bmarks_riscv_out)
+
+#------------------------------------------------------------
+# Default
+
+all: riscv
+
+#------------------------------------------------------------
+# Install
+
+date_suffix = $(shell date +%Y-%m-%d_%H-%M)
+install_dir = $(instbasedir)/$(instname)-$(date_suffix)
+latest_install = $(shell ls -1 -d $(instbasedir)/$(instname)* | tail -n 1)
+
+install:
+	mkdir $(install_dir)
+	cp -r $(bmarks_riscv_bin) $(bmarks_riscv_dump) $(install_dir)
+
+install-link:
+	rm -rf $(instbasedir)/$(instname)
+	ln -s $(latest_install) $(instbasedir)/$(instname)
+
+#------------------------------------------------------------
+# Clean up
+
+clean:
+	rm -rf $(objs) $(junk)
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/common/crt.S
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/common/crt.S
+# See LICENSE for license details.
+
+#include "encoding.h"
+
+#if __riscv_xlen == 64
+# define LREG ld
+# define SREG sd
+# define REGBYTES 8
+#else
+# define LREG lw
+# define SREG sw
+# define REGBYTES 4
+#endif
+
+  .section ".text.init"
+  .globl _start
+_start:
+  li  x1, 0
+  li  x2, 0
+  li  x3, 0
+  li  x4, 0
+  li  x5, 0
+  li  x6, 0
+  li  x7, 0
+  li  x8, 0
+  li  x9, 0
+  li  x10,0
+  li  x11,0
+  li  x12,0
+  li  x13,0
+  li  x14,0
+  li  x15,0
+  li  x16,0
+  li  x17,0
+  li  x18,0
+  li  x19,0
+  li  x20,0
+  li  x21,0
+  li  x22,0
+  li  x23,0
+  li  x24,0
+  li  x25,0
+  li  x26,0
+  li  x27,0
+  li  x28,0
+  li  x29,0
+  li  x30,0
+  li  x31,0
+
+  # enable FPU and accelerator if present
+  li t0, MSTATUS_FS | MSTATUS_XS
+  csrs mstatus, t0
+
+  # make sure XLEN agrees with compilation choice
+  li t0, 1
+  slli t0, t0, 31
+#if __riscv_xlen == 64
+  bgez t0, 1f
+#else
+  bltz t0, 1f
+#endif
+2:
+  li a0, 1
+  sw a0, tohost, t0
+  j 2b
+1:
+
+#ifdef __riscv_flen
+  # initialize FPU if we have one
+  la t0, 1f
+  csrw mtvec, t0
+
+  fssr    x0
+  fmv.s.x f0, x0
+  fmv.s.x f1, x0
+  fmv.s.x f2, x0
+  fmv.s.x f3, x0
+  fmv.s.x f4, x0
+  fmv.s.x f5, x0
+  fmv.s.x f6, x0
+  fmv.s.x f7, x0
+  fmv.s.x f8, x0
+  fmv.s.x f9, x0
+  fmv.s.x f10,x0
+  fmv.s.x f11,x0
+  fmv.s.x f12,x0
+  fmv.s.x f13,x0
+  fmv.s.x f14,x0
+  fmv.s.x f15,x0
+  fmv.s.x f16,x0
+  fmv.s.x f17,x0
+  fmv.s.x f18,x0
+  fmv.s.x f19,x0
+  fmv.s.x f20,x0
+  fmv.s.x f21,x0
+  fmv.s.x f22,x0
+  fmv.s.x f23,x0
+  fmv.s.x f24,x0
+  fmv.s.x f25,x0
+  fmv.s.x f26,x0
+  fmv.s.x f27,x0
+  fmv.s.x f28,x0
+  fmv.s.x f29,x0
+  fmv.s.x f30,x0
+  fmv.s.x f31,x0
+1:
+#endif
+
+  # initialize trap vector
+  la t0, trap_entry
+  csrw mtvec, t0
+
+  # initialize global pointer
+.option push
+.option norelax
+  la gp, __global_pointer$
+.option pop
+
+  la  tp, _end + 63
+  and tp, tp, -64
+
+  # get core id
+  csrr a0, mhartid
+  # for now, assume only 1 core
+  li a1, 1
+1:bgeu a0, a1, 1b
+
+  # give each core 128KB of stack + TLS
+#define STKSHIFT 17
+  add sp, a0, 1
+  sll sp, sp, STKSHIFT
+  add sp, sp, tp
+  sll a2, a0, STKSHIFT
+  add tp, tp, a2
+
+  j _init
+
+  .align 2
+trap_entry:
+  addi sp, sp, -272
+
+  SREG x1, 1*REGBYTES(sp)
+  SREG x2, 2*REGBYTES(sp)
+  SREG x3, 3*REGBYTES(sp)
+  SREG x4, 4*REGBYTES(sp)
+  SREG x5, 5*REGBYTES(sp)
+  SREG x6, 6*REGBYTES(sp)
+  SREG x7, 7*REGBYTES(sp)
+  SREG x8, 8*REGBYTES(sp)
+  SREG x9, 9*REGBYTES(sp)
+  SREG x10, 10*REGBYTES(sp)
+  SREG x11, 11*REGBYTES(sp)
+  SREG x12, 12*REGBYTES(sp)
+  SREG x13, 13*REGBYTES(sp)
+  SREG x14, 14*REGBYTES(sp)
+  SREG x15, 15*REGBYTES(sp)
+  SREG x16, 16*REGBYTES(sp)
+  SREG x17, 17*REGBYTES(sp)
+  SREG x18, 18*REGBYTES(sp)
+  SREG x19, 19*REGBYTES(sp)
+  SREG x20, 20*REGBYTES(sp)
+  SREG x21, 21*REGBYTES(sp)
+  SREG x22, 22*REGBYTES(sp)
+  SREG x23, 23*REGBYTES(sp)
+  SREG x24, 24*REGBYTES(sp)
+  SREG x25, 25*REGBYTES(sp)
+  SREG x26, 26*REGBYTES(sp)
+  SREG x27, 27*REGBYTES(sp)
+  SREG x28, 28*REGBYTES(sp)
+  SREG x29, 29*REGBYTES(sp)
+  SREG x30, 30*REGBYTES(sp)
+  SREG x31, 31*REGBYTES(sp)
+
+  csrr a0, mcause
+  csrr a1, mepc
+  mv a2, sp
+  jal handle_trap
+  csrw mepc, a0
+
+  # Remain in M-mode after eret
+  li t0, MSTATUS_MPP
+  csrs mstatus, t0
+
+  LREG x1, 1*REGBYTES(sp)
+  LREG x2, 2*REGBYTES(sp)
+  LREG x3, 3*REGBYTES(sp)
+  LREG x4, 4*REGBYTES(sp)
+  LREG x5, 5*REGBYTES(sp)
+  LREG x6, 6*REGBYTES(sp)
+  LREG x7, 7*REGBYTES(sp)
+  LREG x8, 8*REGBYTES(sp)
+  LREG x9, 9*REGBYTES(sp)
+  LREG x10, 10*REGBYTES(sp)
+  LREG x11, 11*REGBYTES(sp)
+  LREG x12, 12*REGBYTES(sp)
+  LREG x13, 13*REGBYTES(sp)
+  LREG x14, 14*REGBYTES(sp)
+  LREG x15, 15*REGBYTES(sp)
+  LREG x16, 16*REGBYTES(sp)
+  LREG x17, 17*REGBYTES(sp)
+  LREG x18, 18*REGBYTES(sp)
+  LREG x19, 19*REGBYTES(sp)
+  LREG x20, 20*REGBYTES(sp)
+  LREG x21, 21*REGBYTES(sp)
+  LREG x22, 22*REGBYTES(sp)
+  LREG x23, 23*REGBYTES(sp)
+  LREG x24, 24*REGBYTES(sp)
+  LREG x25, 25*REGBYTES(sp)
+  LREG x26, 26*REGBYTES(sp)
+  LREG x27, 27*REGBYTES(sp)
+  LREG x28, 28*REGBYTES(sp)
+  LREG x29, 29*REGBYTES(sp)
+  LREG x30, 30*REGBYTES(sp)
+  LREG x31, 31*REGBYTES(sp)
+
+  addi sp, sp, 272
+  mret
+
+.section ".tohost","aw",@progbits
+.align 6
+.globl tohost
+tohost: .dword 0
+.align 6
+.globl fromhost
+fromhost: .dword 0
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/common/syscalls.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/common/syscalls.c
+// See LICENSE for license details.
+
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <limits.h>
+#include <sys/signal.h>
+#include "util.h"
+
+#define SYS_write 64
+
+#undef strcmp
+
+extern volatile uint64_t tohost;
+extern volatile uint64_t fromhost;
+
+static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
+{
+  volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
+  magic_mem[0] = which;
+  magic_mem[1] = arg0;
+  magic_mem[2] = arg1;
+  magic_mem[3] = arg2;
+  __sync_synchronize();
+
+  tohost = (uintptr_t)magic_mem;
+  while (fromhost == 0)
+    ;
+  fromhost = 0;
+
+  __sync_synchronize();
+  return magic_mem[0];
+}
+
+#define NUM_COUNTERS 2
+static uintptr_t counters[NUM_COUNTERS];
+static char* counter_names[NUM_COUNTERS];
+
+void setStats(int enable)
+{
+  int i = 0;
+#define READ_CTR(name) do { \
+    while (i >= NUM_COUNTERS) ; \
+    uintptr_t csr = read_csr(name); \
+    if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
+    counters[i++] = csr; \
+  } while (0)
+
+  READ_CTR(mcycle);
+  READ_CTR(minstret);
+
+#undef READ_CTR
+}
+
+void __attribute__((noreturn)) tohost_exit(uintptr_t code)
+{
+  tohost = (code << 1) | 1;
+  while (1);
+}
+
+uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
+{
+  tohost_exit(1337);
+}
+
+void exit(int code)
+{
+  tohost_exit(code);
+}
+
+void abort()
+{
+  exit(128 + SIGABRT);
+}
+
+void printstr(const char* s)
+{
+  syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
+}
+
+void __attribute__((weak)) thread_entry(int cid, int nc)
+{
+  // multi-threaded programs override this function.
+  // for the case of single-threaded programs, only let core 0 proceed.
+  while (cid != 0);
+}
+
+int __attribute__((weak)) main(int argc, char** argv)
+{
+  // single-threaded programs override this function.
+  printstr("Implement main(), foo!\n");
+  return -1;
+}
+
+static void init_tls()
+{
+  register void* thread_pointer asm("tp");
+  extern char _tdata_begin, _tdata_end, _tbss_end;
+  size_t tdata_size = &_tdata_end - &_tdata_begin;
+  memcpy(thread_pointer, &_tdata_begin, tdata_size);
+  size_t tbss_size = &_tbss_end - &_tdata_end;
+  memset(thread_pointer + tdata_size, 0, tbss_size);
+}
+
+void _init(int cid, int nc)
+{
+  init_tls();
+  thread_entry(cid, nc);
+
+  // only single-threaded programs should ever get here.
+  int ret = main(0, 0);
+
+  char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
+  char* pbuf = buf;
+  for (int i = 0; i < NUM_COUNTERS; i++)
+    if (counters[i])
+      pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
+  if (pbuf != buf)
+    printstr(buf);
+
+  exit(ret);
+}
+
+#undef putchar
+int putchar(int ch)
+{
+  static __thread char buf[64] __attribute__((aligned(64)));
+  static __thread int buflen = 0;
+
+  buf[buflen++] = ch;
+
+  if (ch == '\n' || buflen == sizeof(buf))
+  {
+    syscall(SYS_write, 1, (uintptr_t)buf, buflen);
+    buflen = 0;
+  }
+
+  return 0;
+}
+
+void printhex(uint64_t x)
+{
+  char str[17];
+  int i;
+  for (i = 0; i < 16; i++)
+  {
+    str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
+    x >>= 4;
+  }
+  str[16] = 0;
+
+  printstr(str);
+}
+
+static inline void printnum(void (*putch)(int, void**), void **putdat,
+                    unsigned long long num, unsigned base, int width, int padc)
+{
+  unsigned digs[sizeof(num)*CHAR_BIT];
+  int pos = 0;
+
+  while (1)
+  {
+    digs[pos++] = num % base;
+    if (num < base)
+      break;
+    num /= base;
+  }
+
+  while (width-- > pos)
+    putch(padc, putdat);
+
+  while (pos-- > 0)
+    putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
+}
+
+static unsigned long long getuint(va_list *ap, int lflag)
+{
+  if (lflag >= 2)
+    return va_arg(*ap, unsigned long long);
+  else if (lflag)
+    return va_arg(*ap, unsigned long);
+  else
+    return va_arg(*ap, unsigned int);
+}
+
+static long long getint(va_list *ap, int lflag)
+{
+  if (lflag >= 2)
+    return va_arg(*ap, long long);
+  else if (lflag)
+    return va_arg(*ap, long);
+  else
+    return va_arg(*ap, int);
+}
+
+static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
+{
+  register const char* p;
+  const char* last_fmt;
+  register int ch, err;
+  unsigned long long num;
+  int base, lflag, width, precision, altflag;
+  char padc;
+
+  while (1) {
+    while ((ch = *(unsigned char *) fmt) != '%') {
+      if (ch == '\0')
+        return;
+      fmt++;
+      putch(ch, putdat);
+    }
+    fmt++;
+
+    // Process a %-escape sequence
+    last_fmt = fmt;
+    padc = ' ';
+    width = -1;
+    precision = -1;
+    lflag = 0;
+    altflag = 0;
+  reswitch:
+    switch (ch = *(unsigned char *) fmt++) {
+
+    // flag to pad on the right
+    case '-':
+      padc = '-';
+      goto reswitch;
+      
+    // flag to pad with 0's instead of spaces
+    case '0':
+      padc = '0';
+      goto reswitch;
+
+    // width field
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      for (precision = 0; ; ++fmt) {
+        precision = precision * 10 + ch - '0';
+        ch = *fmt;
+        if (ch < '0' || ch > '9')
+          break;
+      }
+      goto process_precision;
+
+    case '*':
+      precision = va_arg(ap, int);
+      goto process_precision;
+
+    case '.':
+      if (width < 0)
+        width = 0;
+      goto reswitch;
+
+    case '#':
+      altflag = 1;
+      goto reswitch;
+
+    process_precision:
+      if (width < 0)
+        width = precision, precision = -1;
+      goto reswitch;
+
+    // long flag (doubled for long long)
+    case 'l':
+      lflag++;
+      goto reswitch;
+
+    // character
+    case 'c':
+      putch(va_arg(ap, int), putdat);
+      break;
+
+    // string
+    case 's':
+      if ((p = va_arg(ap, char *)) == NULL)
+        p = "(null)";
+      if (width > 0 && padc != '-')
+        for (width -= strnlen(p, precision); width > 0; width--)
+          putch(padc, putdat);
+      for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
+        putch(ch, putdat);
+        p++;
+      }
+      for (; width > 0; width--)
+        putch(' ', putdat);
+      break;
+
+    // (signed) decimal
+    case 'd':
+      num = getint(&ap, lflag);
+      if ((long long) num < 0) {
+        putch('-', putdat);
+        num = -(long long) num;
+      }
+      base = 10;
+      goto signed_number;
+
+    // unsigned decimal
+    case 'u':
+      base = 10;
+      goto unsigned_number;
+
+    // (unsigned) octal
+    case 'o':
+      // should do something with padding so it's always 3 octits
+      base = 8;
+      goto unsigned_number;
+
+    // pointer
+    case 'p':
+      static_assert(sizeof(long) == sizeof(void*));
+      lflag = 1;
+      putch('0', putdat);
+      putch('x', putdat);
+      /* fall through to 'x' */
+
+    // (unsigned) hexadecimal
+    case 'x':
+      base = 16;
+    unsigned_number:
+      num = getuint(&ap, lflag);
+    signed_number:
+      printnum(putch, putdat, num, base, width, padc);
+      break;
+
+    // escaped '%' character
+    case '%':
+      putch(ch, putdat);
+      break;
+      
+    // unrecognized escape sequence - just print it literally
+    default:
+      putch('%', putdat);
+      fmt = last_fmt;
+      break;
+    }
+  }
+}
+
+int printf(const char* fmt, ...)
+{
+  va_list ap;
+  va_start(ap, fmt);
+
+  vprintfmt((void*)putchar, 0, fmt, ap);
+
+  va_end(ap);
+  return 0; // incorrect return value, but who cares, anyway?
+}
+
+int sprintf(char* str, const char* fmt, ...)
+{
+  va_list ap;
+  char* str0 = str;
+  va_start(ap, fmt);
+
+  void sprintf_putch(int ch, void** data)
+  {
+    char** pstr = (char**)data;
+    **pstr = ch;
+    (*pstr)++;
+  }
+
+  vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
+  *str = 0;
+
+  va_end(ap);
+  return str - str0;
+}
+
+void* memcpy(void* dest, const void* src, size_t len)
+{
+  if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
+    const uintptr_t* s = src;
+    uintptr_t *d = dest;
+    while (d < (uintptr_t*)(dest + len))
+      *d++ = *s++;
+  } else {
+    const char* s = src;
+    char *d = dest;
+    while (d < (char*)(dest + len))
+      *d++ = *s++;
+  }
+  return dest;
+}
+
+void* memset(void* dest, int byte, size_t len)
+{
+  if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
+    uintptr_t word = byte & 0xFF;
+    word |= word << 8;
+    word |= word << 16;
+    word |= word << 16 << 16;
+
+    uintptr_t *d = dest;
+    while (d < (uintptr_t*)(dest + len))
+      *d++ = word;
+  } else {
+    char *d = dest;
+    while (d < (char*)(dest + len))
+      *d++ = byte;
+  }
+  return dest;
+}
+
+size_t strlen(const char *s)
+{
+  const char *p = s;
+  while (*p)
+    p++;
+  return p - s;
+}
+
+size_t strnlen(const char *s, size_t n)
+{
+  const char *p = s;
+  while (n-- && *p)
+    p++;
+  return p - s;
+}
+
+int strcmp(const char* s1, const char* s2)
+{
+  unsigned char c1, c2;
+
+  do {
+    c1 = *s1++;
+    c2 = *s2++;
+  } while (c1 != 0 && c1 == c2);
+
+  return c1 - c2;
+}
+
+char* strcpy(char* dest, const char* src)
+{
+  char* d = dest;
+  while ((*d++ = *src++))
+    ;
+  return dest;
+}
+
+long atol(const char* str)
+{
+  long res = 0;
+  int sign = 0;
+
+  while (*str == ' ')
+    str++;
+
+  if (*str == '-' || *str == '+') {
+    sign = *str == '-';
+    str++;
+  }
+
+  while (*str) {
+    res *= 10;
+    res += *str++ - '0';
+  }
+
+  return sign ? -res : res;
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/common/test.ld
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/common/test.ld
+/*======================================================================*/
+/* Proxy kernel linker script                                           */
+/*======================================================================*/
+/* This is the linker script used when building the proxy kernel. */
+
+/*----------------------------------------------------------------------*/
+/* Setup                                                                */
+/*----------------------------------------------------------------------*/
+
+/* The OUTPUT_ARCH command specifies the machine architecture where the
+   argument is one of the names used in the BFD library. More
+   specifically one of the entires in bfd/cpu-mips.c */
+
+OUTPUT_ARCH( "riscv" )
+ENTRY(_start)
+
+/*----------------------------------------------------------------------*/
+/* Sections                                                             */
+/*----------------------------------------------------------------------*/
+
+SECTIONS
+{
+
+  /* text: test code section */
+  . = 0x00000000;
+  .text.init : { *(.text.init) }
+
+  . = ALIGN(0x1000);
+  .tohost : { *(.tohost) }
+
+  . = ALIGN(0x1000);
+  .text : { *(.text) }
+
+  /* data segment */
+  .data : { *(.data) }
+
+  .sdata : {
+    __global_pointer$ = . + 0x800;
+    *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
+    *(.sdata .sdata.* .gnu.linkonce.s.*)
+  }
+
+  /* bss segment */
+  .sbss : {
+    *(.sbss .sbss.* .gnu.linkonce.sb.*)
+    *(.scommon)
+  }
+  .bss : { *(.bss) }
+
+  /* thread-local data segment */
+  .tdata :
+  {
+    _tdata_begin = .;
+    *(.tdata)
+    _tdata_end = .;
+  }
+  .tbss :
+  {
+    *(.tbss)
+    _tbss_end = .;
+  }
+
+  /* End of uninitalized data segement */
+  _end = .;
+}
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/common/util.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/common/util.h
+// See LICENSE for license details.
+
+#ifndef __UTIL_H
+#define __UTIL_H
+
+extern void setStats(int enable);
+
+#include <stdint.h>
+
+#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
+
+static int verify(int n, const volatile int* test, const int* verify)
+{
+  int i;
+  // Unrolled for faster verification
+  for (i = 0; i < n/2*2; i+=2)
+  {
+    int t0 = test[i], t1 = test[i+1];
+    int v0 = verify[i], v1 = verify[i+1];
+    if (t0 != v0) return i+1;
+    if (t1 != v1) return i+2;
+  }
+  if (n % 2 != 0 && test[n-1] != verify[n-1])
+    return n;
+  return 0;
+}
+
+static int verifyDouble(int n, const volatile double* test, const double* verify)
+{
+  int i;
+  // Unrolled for faster verification
+  for (i = 0; i < n/2*2; i+=2)
+  {
+    double t0 = test[i], t1 = test[i+1];
+    double v0 = verify[i], v1 = verify[i+1];
+    int eq1 = t0 == v0, eq2 = t1 == v1;
+    if (!(eq1 & eq2)) return i+1+eq1;
+  }
+  if (n % 2 != 0 && test[n-1] != verify[n-1])
+    return n;
+  return 0;
+}
+
+static void __attribute__((noinline)) barrier(int ncores)
+{
+  static volatile int sense;
+  static volatile int count;
+  static __thread int threadsense;
+
+  __sync_synchronize();
+
+  threadsense = !threadsense;
+  if (__sync_fetch_and_add(&count, 1) == ncores-1)
+  {
+    count = 0;
+    sense = threadsense;
+  }
+  else while(sense != threadsense)
+    ;
+
+  __sync_synchronize();
+}
+
+static uint64_t lfsr(uint64_t x)
+{
+  uint64_t bit = (x ^ (x >> 1)) & 1;
+  return (x >> 1) | (bit << 62);
+}
+
+static uintptr_t insn_len(uintptr_t pc)
+{
+  return (*(unsigned short*)pc & 3) ? 4 : 2;
+}
+
+#ifdef __riscv
+#include "encoding.h"
+#endif
+
+#define stringify_1(s) #s
+#define stringify(s) stringify_1(s)
+#define stats(code, iter) do { \
+    unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
+    code; \
+    _c += read_csr(mcycle), _i += read_csr(minstret); \
+    if (cid == 0) \
+      printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
+             stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
+  } while(0)
+
+#endif //__UTIL_H
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone.c
+// See LICENSE for license details.
+
+#pragma GCC optimize ("no-inline")
+
+#include "dhrystone.h"
+
+#ifndef REG
+#define REG
+        /* REG becomes defined as empty */
+        /* i.e. no register variables   */
+#else
+#undef REG
+#define REG register
+#endif
+
+extern  int     Int_Glob;
+extern  char    Ch_1_Glob;
+
+
+Proc_6 (Enum_Val_Par, Enum_Ref_Par)
+/*********************************/
+    /* executed once */
+    /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
+
+Enumeration  Enum_Val_Par;
+Enumeration *Enum_Ref_Par;
+{
+  *Enum_Ref_Par = Enum_Val_Par;
+  if (! Func_3 (Enum_Val_Par))
+    /* then, not executed */
+    *Enum_Ref_Par = Ident_4;
+  switch (Enum_Val_Par)
+  {
+    case Ident_1: 
+      *Enum_Ref_Par = Ident_1;
+      break;
+    case Ident_2: 
+      if (Int_Glob > 100)
+        /* then */
+      *Enum_Ref_Par = Ident_1;
+      else *Enum_Ref_Par = Ident_4;
+      break;
+    case Ident_3: /* executed */
+      *Enum_Ref_Par = Ident_2;
+      break;
+    case Ident_4: break;
+    case Ident_5: 
+      *Enum_Ref_Par = Ident_3;
+      break;
+  } /* switch */
+} /* Proc_6 */
+
+
+Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
+/**********************************************/
+    /* executed three times                                      */ 
+    /* first call:      Int_1_Par_Val == 2, Int_2_Par_Val == 3,  */
+    /*                  Int_Par_Ref becomes 7                    */
+    /* second call:     Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
+    /*                  Int_Par_Ref becomes 17                   */
+    /* third call:      Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
+    /*                  Int_Par_Ref becomes 18                   */
+One_Fifty       Int_1_Par_Val;
+One_Fifty       Int_2_Par_Val;
+One_Fifty      *Int_Par_Ref;
+{
+  One_Fifty Int_Loc;
+
+  Int_Loc = Int_1_Par_Val + 2;
+  *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
+} /* Proc_7 */
+
+
+Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
+/*********************************************************************/
+    /* executed once      */
+    /* Int_Par_Val_1 == 3 */
+    /* Int_Par_Val_2 == 7 */
+Arr_1_Dim       Arr_1_Par_Ref;
+Arr_2_Dim       Arr_2_Par_Ref;
+int             Int_1_Par_Val;
+int             Int_2_Par_Val;
+{
+  REG One_Fifty Int_Index;
+  REG One_Fifty Int_Loc;
+
+  Int_Loc = Int_1_Par_Val + 5;
+  Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
+  Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
+  Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
+  for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
+    Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
+  Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
+  Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
+  Int_Glob = 5;
+} /* Proc_8 */
+
+
+Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
+/*************************************************/
+    /* executed three times                                         */
+    /* first call:      Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R'    */
+    /* second call:     Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C'    */
+    /* third call:      Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C'    */
+
+Capital_Letter   Ch_1_Par_Val;
+Capital_Letter   Ch_2_Par_Val;
+{
+  Capital_Letter        Ch_1_Loc;
+  Capital_Letter        Ch_2_Loc;
+
+  Ch_1_Loc = Ch_1_Par_Val;
+  Ch_2_Loc = Ch_1_Loc;
+  if (Ch_2_Loc != Ch_2_Par_Val)
+    /* then, executed */
+    return (Ident_1);
+  else  /* not executed */
+  {
+    Ch_1_Glob = Ch_1_Loc;
+    return (Ident_2);
+   }
+} /* Func_1 */
+
+
+Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
+/*************************************************/
+    /* executed once */
+    /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
+    /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
+
+Str_30  Str_1_Par_Ref;
+Str_30  Str_2_Par_Ref;
+{
+  REG One_Thirty        Int_Loc;
+      Capital_Letter    Ch_Loc;
+
+  Int_Loc = 2;
+  while (Int_Loc <= 2) /* loop body executed once */
+    if (Func_1 (Str_1_Par_Ref[Int_Loc],
+                Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
+      /* then, executed */
+    {
+      Ch_Loc = 'A';
+      Int_Loc += 1;
+    } /* if, while */
+  if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
+    /* then, not executed */
+    Int_Loc = 7;
+  if (Ch_Loc == 'R')
+    /* then, not executed */
+    return (true);
+  else /* executed */
+  {
+    if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
+      /* then, not executed */
+    {
+      Int_Loc += 7;
+      Int_Glob = Int_Loc;
+      return (true);
+    }
+    else /* executed */
+      return (false);
+  } /* if Ch_Loc */
+} /* Func_2 */
+
+
+Boolean Func_3 (Enum_Par_Val)
+/***************************/
+    /* executed once        */
+    /* Enum_Par_Val == Ident_3 */
+Enumeration Enum_Par_Val;
+{
+  Enumeration Enum_Loc;
+
+  Enum_Loc = Enum_Par_Val;
+  if (Enum_Loc == Ident_3)
+    /* then, executed */
+    return (true);
+  else /* not executed */
+    return (false);
+} /* Func_3 */
+
+void debug_printf(const char* str, ...)
+{
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone.h
+// See LICENSE for license details.
+
+#ifndef _DHRYSTONE_H
+#define _DHRYSTONE_H
+
+/****************** "DHRYSTONE" Benchmark Program ***************************/
+#define Version "C, Version 2.2"
+/*  File:       dhry_1.c (part 2 of 3)
+ *  Author:     Reinhold P. Weicker
+ *              Siemens Nixdorf, Paderborn/Germany
+ *              weicker@specbench.org
+ *  Date:       May 25, 1988
+ *  Modified:	Steven Pemberton, CWI, Amsterdam; Steven.Pemberton@cwi.nl
+ *  Date:       October, 1993; March 1995
+ *              Included both files into one source, that gets compiled
+ *              in two passes. Made program auto-compiling, and auto-running,
+ *              and generally made it much easier to use.
+ *
+ *              Original Version (in Ada) published in
+ *              "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
+ *              pp. 1013 - 1030, together with the statistics
+ *              on which the distribution of statements etc. is based.
+ *
+ *              In this C version, the following C library functions are used:
+ *              - strcpy, strcmp (inside the measurement loop)
+ *              - printf, scanf (outside the measurement loop)
+ *              In addition, Berkeley UNIX system calls "times ()" or "time ()"
+ *              are used for execution time measurement. For measurements
+ *              on other systems, these calls have to be changed.
+ *
+ *  Collection of Results:
+ *              Reinhold Weicker (address see above) and
+ *              
+ *              Rick Richardson
+ *              PC Research. Inc.
+ *              94 Apple Orchard Drive
+ *              Tinton Falls, NJ 07724
+ *                      Phone:  (201) 389-8963 (9-17 EST)               
+ *                      Usenet: ...!uunet!pcrat!rick
+ *
+ *      Please send results to Rick Richardson and/or Reinhold Weicker.
+ *      Complete information should be given on hardware and software used.
+ *      Hardware information includes: Machine type, CPU, type and size
+ *      of caches; for microprocessors: clock frequency, memory speed
+ *      (number of wait states).
+ *      Software information includes: Compiler (and runtime library)
+ *      manufacturer and version, compilation switches, OS version.
+ *      The Operating System version may give an indication about the compiler;
+ *      Dhrystone itself performs no OS calls in the measurement loop.
+ *
+ *      The complete output generated by the program should be mailed
+ *      such that at least some checks for correctness can be made.
+ *
+ ***************************************************************************
+ *
+ * Defines:     The following "Defines" are possible:
+ *      -DREG          (default: Not defined)
+ *              As an approximation to what an average C programmer
+ *              might do, causes the "register" storage class to be applied
+ *              - for local variables, if they are used (dynamically)
+ *                five or more times
+ *              - for parameters if they are used (dynamically)
+ *                six or more times
+ *              Note that an optimal "register" strategy is
+ *              compiler-dependent, and that "register" declarations
+ *              do not necessarily lead to faster execution.
+ *      -DNOSTRUCTASSIGN        (default: Not defined)
+ *              Define if the C compiler does not support
+ *              assignment of structures.
+ *      -DNOENUMS               (default: Not defined)
+ *              Define if the C compiler does not support
+ *              enumeration types.
+ *      -DTIMES                 (default)
+ *      -DTIME
+ *              The "times" function of UNIX (returning process times)
+ *              or the "time" function (returning wallclock time)
+ *              is used for measurement. 
+ *              For single user machines, "time ()" is adequate. For
+ *              multi-user machines where you cannot get single-user
+ *              access, use the "times ()" function. If you have
+ *              neither, use a stopwatch in the dead of night.
+ *              "printf"s are provided marking the points "Start Timer"
+ *              and "Stop Timer". DO NOT use the UNIX "time(1)"
+ *              command, as this will measure the total time to
+ *              run this program, which will (erroneously) include
+ *              the time to allocate storage (malloc) and to perform
+ *              the initialization.
+ *      -DHZ=nnn
+ *              In Berkeley UNIX, the function "times" returns process
+ *              time in 1/HZ seconds, with HZ = 60 for most systems.
+ *              CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
+ *              A VALUE.
+ *
+ ***************************************************************************
+ *
+ *  History:	Version C/2.1 was made for two reasons:
+ *
+ *	1) There was an obvious need for a common C version of
+ *      Dhrystone, since C is at present the most popular system
+ *      programming language for the class of processors
+ *      (microcomputers, minicomputers) where Dhrystone is used most.
+ *      There should be, as far as possible, only one C version of
+ *      Dhrystone such that results can be compared without
+ *      restrictions. In the past, the C versions distributed
+ *      by Rick Richardson (Version 1.1) and by Reinhold Weicker
+ *      had small (though not significant) differences.
+ *
+ *      2) As far as it is possible without changes to the Dhrystone
+ *      statistics, optimizing compilers should be prevented from
+ *      removing significant statements.
+ *
+ *      This C version has been developed in cooperation with
+ *      Rick Richardson (Tinton Falls, NJ), it incorporates many
+ *      ideas from the "Version 1.1" distributed previously by
+ *      him over the UNIX network Usenet.
+ *      I also thank Chaim Benedelac (National Semiconductor),
+ *      David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
+ *      Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
+ *      for their help with comments on earlier versions of the
+ *      benchmark.
+ *
+ *  Changes:    In the initialization part, this version follows mostly
+ *      Rick Richardson's version distributed via Usenet, not the
+ *      version distributed earlier via floppy disk by Reinhold Weicker.
+ *      As a concession to older compilers, names have been made
+ *      unique within the first 8 characters.
+ *      Inside the measurement loop, this version follows the
+ *      version previously distributed by Reinhold Weicker.
+ *
+ *      At several places in the benchmark, code has been added,
+ *      but within the measurement loop only in branches that 
+ *      are not executed. The intention is that optimizing compilers
+ *      should be prevented from moving code out of the measurement
+ *      loop, or from removing code altogether. Since the statements
+ *      that are executed within the measurement loop have NOT been
+ *      changed, the numbers defining the "Dhrystone distribution"
+ *      (distribution of statements, operand types and locality)
+ *      still hold. Except for sophisticated optimizing compilers,
+ *      execution times for this version should be the same as
+ *      for previous versions.
+ *
+ *      Since it has proven difficult to subtract the time for the
+ *      measurement loop overhead in a correct way, the loop check
+ *      has been made a part of the benchmark. This does have
+ *      an impact - though a very minor one - on the distribution
+ *      statistics which have been updated for this version.
+ *
+ *      All changes within the measurement loop are described
+ *      and discussed in the companion paper "Rationale for
+ *      Dhrystone version 2".
+ *
+ *      Because of the self-imposed limitation that the order and
+ *      distribution of the executed statements should not be
+ *      changed, there are still cases where optimizing compilers
+ *      may not generate code for some statements. To a certain
+ *      degree, this is unavoidable for small synthetic benchmarks.
+ *      Users of the benchmark are advised to check code listings
+ *      whether code is generated for all statements of Dhrystone.
+ *
+ *      Version 2.1 is identical to version 2.0 distributed via
+ *      the UNIX network Usenet in March 1988 except that it corrects
+ *      some minor deficiencies that were found by users of version 2.0.
+ *      The only change within the measurement loop is that a
+ *      non-executed "else" part was added to the "if" statement in
+ *      Func_3, and a non-executed "else" part removed from Proc_3.
+ *
+ * Version C/2.2, Steven Pemberton, October 1993
+ *	Functionally, identical to version 2.2; the changes are in
+ *	how you compile and use it:
+ *	- Everything is in one file now, but compiled in 2 passes
+ *	- Compile (and run) by running the file through the shell: 'sh dhry.c"
+ *	- Uses the system definition of HZ if one can be found
+ *	- HZ must be defined, otherwise it won't compile (no defaults here)
+ *	- The (uninteresting) output is printed to stderr (dhry2 > /dev/null)
+ *	- The number of loops is passed as a parameter, rather than read
+ *	  (dhry2 500000)
+ *	- If the number of loops is insufficient to get a good result,
+ *	  it repeats it with loops*10 until it is enough (rather than just
+ *	  stopping)
+ *	- Output says which sort of clock it is using, and the HZ value
+ *	- You can use -DREG instead of the -DREG=register of previous versions
+ *	- Some stylistic cleanups.
+ *		
+ ***************************************************************************
+ *
+ *  Compilation model and measurement (IMPORTANT):
+ *
+ *  The following "ground rules" apply for measurements:
+ *  - Separate compilation
+ *  - No procedure merging
+ *  - Otherwise, compiler optimizations are allowed but should be indicated
+ *  - Default results are those without register declarations
+ *  See the companion paper "Rationale for Dhrystone Version 2" for a more
+ *  detailed discussion of these ground rules.
+ *
+ *  For 16-Bit processors (e.g. 80186, 80286), times for all compilation
+ *  models ("small", "medium", "large" etc.) should be given if possible,
+ *  together with a definition of these models for the compiler system used.
+ *
+ **************************************************************************
+ *
+ *  Dhrystone (C version) statistics:
+ *
+ *  [Comment from the first distribution, updated for version 2.
+ *   Note that because of language differences, the numbers are slightly
+ *   different from the Ada version.]
+ *
+ *  The following program contains statements of a high level programming
+ *  language (here: C) in a distribution considered representative:           
+ *
+ *    assignments                  52 (51.0 %)
+ *    control statements           33 (32.4 %)
+ *    procedure, function calls    17 (16.7 %)
+ *
+ *  103 statements are dynamically executed. The program is balanced with
+ *  respect to the three aspects:                                             
+ *
+ *    - statement type
+ *    - operand type
+ *    - operand locality
+ *         operand global, local, parameter, or constant.                     
+ *
+ *  The combination of these three aspects is balanced only approximately.    
+ *
+ *  1. Statement Type:                                                        
+ *  -----------------             number
+ *
+ *     V1 = V2                     9
+ *       (incl. V1 = F(..)
+ *     V = Constant               12
+ *     Assignment,                 7
+ *       with array element
+ *     Assignment,                 6
+ *       with record component
+ *                                --
+ *                                34       34
+ *
+ *     X = Y +|-|"&&"|"|" Z        5
+ *     X = Y +|-|"==" Constant     6
+ *     X = X +|- 1                 3
+ *     X = Y *|/ Z                 2
+ *     X = Expression,             1
+ *           two operators
+ *     X = Expression,             1
+ *           three operators
+ *                                --
+ *                                18       18
+ *
+ *     if ....                    14
+ *       with "else"      7
+ *       without "else"   7
+ *           executed        3
+ *           not executed    4
+ *     for ...                     7  |  counted every time
+ *     while ...                   4  |  the loop condition
+ *     do ... while                1  |  is evaluated
+ *     switch ...                  1
+ *     break                       1
+ *     declaration with            1
+ *       initialization
+ *                                --
+ *                                34       34
+ *
+ *     P (...)  procedure call    11
+ *       user procedure      10
+ *       library procedure    1
+ *     X = F (...)
+ *             function  call      6
+ *       user function        5                                         
+ *       library function     1                                               
+ *                                --                                          
+ *                                17       17
+ *                                        ---
+ *                                        103
+ *
+ *    The average number of parameters in procedure or function calls
+ *    is 1.82 (not counting the function values aX *
+ *
+ *  2. Operators
+ *  ------------
+ *                          number    approximate
+ *                                    percentage
+ *
+ *    Arithmetic             32          50.8                                 
+ *
+ *       +                     21          33.3                              
+ *       -                      7          11.1                              
+ *       *                      3           4.8
+ *       / (int div)            1           1.6
+ *
+ *    Comparison             27           42.8
+ *
+ *       ==                     9           14.3
+ *       /=                     4            6.3
+ *       >                      1            1.6
+ *       <                      3            4.8
+ *       >=                     1            1.6
+ *       <=                     9           14.3
+ *
+ *    Logic                   4            6.3
+ *
+ *       && (AND-THEN)          1            1.6
+ *       |  (OR)                1            1.6
+ *       !  (NOT)               2            3.2
+ * 
+ *                           --          -----
+ *                           63          100.1
+ *
+ *
+ *  3. Operand Type (counted once per operand reference):
+ *  ---------------
+ *                          number    approximate
+ *                                    percentage
+ *
+ *     Integer               175        72.3 %
+ *     Character              45        18.6 %
+ *     Pointer                12         5.0 %
+ *     String30                6         2.5 %
+ *     Array                   2         0.8 %
+ *     Record                  2         0.8 %
+ *                           ---       -------
+ *                           242       100.0 %
+ *
+ *  When there is an access path leading to the final operand (e.g. a record
+ *  component), only the final data type on the access path is counted.       
+ *
+ *
+ *  4. Operand Locality:                                                      
+ *  -------------------
+ *                                number    approximate
+ *                                          percentage
+ *
+ *     local variable              114        47.1 %
+ *     global variable              22         9.1 %
+ *     parameter                    45        18.6 %
+ *        value                        23         9.5 %
+ *        reference                    22         9.1 %
+ *     function result               6         2.5 %
+ *     constant                     55        22.7 %
+ *                                 ---       -------
+ *                                 242       100.0 %
+ *
+ *  The program does not compute anything meaningful, but it is syntactically
+ *  and semantically correct. All variables have a value assigned to them
+ *  before they are used as a source operand.
+ *
+ *  There has been no explicit effort to account for the effects of a
+ *  cache, or to balance the use of long or short displacements for code or
+ *  data.
+ *
+ ***************************************************************************
+ */
+
+/* Compiler and system dependent definitions: */
+
+/* variables for time measurement: */
+
+#ifdef TIME
+
+#define CLOCK_TYPE "time()"
+#undef HZ
+#define HZ	(1) /* time() returns time in seconds */
+extern long     time(); /* see library function "time"  */
+#define Too_Small_Time 2 /* Measurements should last at least 2 seconds */
+#define Start_Timer() Begin_Time = time ( (long *) 0)
+#define Stop_Timer()  End_Time   = time ( (long *) 0)
+
+#else
+
+#ifdef MSC_CLOCK /* Use Microsoft C hi-res clock */
+
+#undef HZ
+#undef TIMES
+#include <time.h>
+#define HZ	CLK_TCK
+#define CLOCK_TYPE "MSC clock()"
+extern clock_t	clock();
+#define Too_Small_Time (2*HZ)
+#define Start_Timer() Begin_Time = clock()
+#define Stop_Timer()  End_Time   = clock()
+
+#elif defined(__riscv)
+
+#define HZ 1000000
+#define Too_Small_Time 1
+#define CLOCK_TYPE "rdcycle()"
+#define Start_Timer() Begin_Time = read_csr(mcycle)
+#define Stop_Timer() End_Time = read_csr(mcycle)
+
+#else
+                /* Use times(2) time function unless    */
+                /* explicitly defined otherwise         */
+#define CLOCK_TYPE "times()"
+#include <sys/types.h>
+#include <sys/times.h>
+#ifndef HZ	/* Added by SP 900619 */
+#include <sys/param.h> /* If your system doesn't have this, use -DHZ=xxx */
+#else
+	*** You must define HZ!!! ***
+#endif /* HZ */
+#ifndef PASS2
+struct tms      time_info;
+#endif
+/*extern  int     times ();*/
+                /* see library function "times" */
+#define Too_Small_Time (2*HZ)
+                /* Measurements should last at least about 2 seconds */
+#define Start_Timer() times(&time_info); Begin_Time=(long)time_info.tms_utime
+#define Stop_Timer()  times(&time_info); End_Time = (long)time_info.tms_utime
+
+#endif /* MSC_CLOCK */
+#endif /* TIME */
+
+
+#define Mic_secs_Per_Second     1000000
+#define NUMBER_OF_RUNS		500 /* Default number of runs */
+
+#ifdef  NOSTRUCTASSIGN
+#define structassign(d, s)      memcpy(&(d), &(s), sizeof(d))
+#else
+#define structassign(d, s)      d = s
+#endif
+
+#ifdef  NOENUM
+#define Ident_1 0
+#define Ident_2 1
+#define Ident_3 2
+#define Ident_4 3
+#define Ident_5 4
+  typedef int   Enumeration;
+#else
+  typedef       enum    {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
+                Enumeration;
+#endif
+        /* for boolean and enumeration types in Ada, Pascal */
+
+/* General definitions: */
+
+#include <stdio.h>
+#include <string.h>
+                /* for strcpy, strcmp */
+
+#define Null 0 
+                /* Value of a Null pointer */
+#define true  1
+#define false 0
+
+typedef int     One_Thirty;
+typedef int     One_Fifty;
+typedef char    Capital_Letter;
+typedef int     Boolean;
+typedef char    Str_30 [31];
+typedef int     Arr_1_Dim [50];
+typedef int     Arr_2_Dim [50] [50];
+
+typedef struct record 
+    {
+    struct record *Ptr_Comp;
+    Enumeration    Discr;
+    union {
+          struct {
+                  Enumeration Enum_Comp;
+                  int         Int_Comp;
+                  char        Str_Comp [31];
+                  } var_1;
+          struct {
+                  Enumeration E_Comp_2;
+                  char        Str_2_Comp [31];
+                  } var_2;
+          struct {
+                  char        Ch_1_Comp;
+                  char        Ch_2_Comp;
+                  } var_3;
+          } variant;
+      } Rec_Type, *Rec_Pointer;
+
+#endif
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/dhrystone/dhrystone_main.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Dhrystone bencmark
+//--------------------------------------------------------------------------
+//
+// This is the classic Dhrystone synthetic integer benchmark.
+//
+
+#pragma GCC optimize ("no-inline")
+
+#include "dhrystone.h"
+
+void debug_printf(const char* str, ...);
+
+#include "util.h"
+
+#include <alloca.h>
+
+/* Global Variables: */
+
+Rec_Pointer     Ptr_Glob,
+                Next_Ptr_Glob;
+int             Int_Glob;
+Boolean         Bool_Glob;
+char            Ch_1_Glob,
+                Ch_2_Glob;
+int             Arr_1_Glob [50];
+int             Arr_2_Glob [50] [50];
+
+Enumeration     Func_1 ();
+  /* forward declaration necessary since Enumeration may not simply be int */
+
+#ifndef REG
+        Boolean Reg = false;
+#define REG
+        /* REG becomes defined as empty */
+        /* i.e. no register variables   */
+#else
+        Boolean Reg = true;
+#undef REG
+#define REG register
+#endif
+
+Boolean		Done;
+
+long            Begin_Time,
+                End_Time,
+                User_Time;
+long            Microseconds,
+                Dhrystones_Per_Second;
+
+/* end of variables for time measurement */
+
+
+int main (int argc, char** argv)
+/*****/
+  /* main program, corresponds to procedures        */
+  /* Main and Proc_0 in the Ada version             */
+{
+        One_Fifty       Int_1_Loc;
+  REG   One_Fifty       Int_2_Loc;
+        One_Fifty       Int_3_Loc;
+  REG   char            Ch_Index;
+        Enumeration     Enum_Loc;
+        Str_30          Str_1_Loc;
+        Str_30          Str_2_Loc;
+  REG   int             Run_Index;
+  REG   int             Number_Of_Runs;
+
+  /* Arguments */
+  Number_Of_Runs = NUMBER_OF_RUNS;
+
+  /* Initializations */
+
+  Next_Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
+  Ptr_Glob = (Rec_Pointer) alloca (sizeof (Rec_Type));
+
+  Ptr_Glob->Ptr_Comp                    = Next_Ptr_Glob;
+  Ptr_Glob->Discr                       = Ident_1;
+  Ptr_Glob->variant.var_1.Enum_Comp     = Ident_3;
+  Ptr_Glob->variant.var_1.Int_Comp      = 40;
+  strcpy (Ptr_Glob->variant.var_1.Str_Comp, 
+          "DHRYSTONE PROGRAM, SOME STRING");
+  strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
+
+  Arr_2_Glob [8][7] = 10;
+        /* Was missing in published program. Without this statement,    */
+        /* Arr_2_Glob [8][7] would have an undefined value.             */
+        /* Warning: With 16-Bit processors and Number_Of_Runs > 32000,  */
+        /* overflow may occur for this array element.                   */
+
+  debug_printf("\n");
+  debug_printf("Dhrystone Benchmark, Version %s\n", Version);
+  if (Reg)
+  {
+    debug_printf("Program compiled with 'register' attribute\n");
+  }
+  else
+  {
+    debug_printf("Program compiled without 'register' attribute\n");
+  }
+  debug_printf("Using %s, HZ=%d\n", CLOCK_TYPE, HZ);
+  debug_printf("\n");
+
+  Done = false;
+  while (!Done) {
+    debug_printf("Trying %d runs through Dhrystone:\n", Number_Of_Runs);
+
+    /***************/
+    /* Start timer */
+    /***************/
+
+    setStats(1);
+    Start_Timer();
+
+    for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
+    {
+
+      Proc_5();
+      Proc_4();
+	/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
+      Int_1_Loc = 2;
+      Int_2_Loc = 3;
+      strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
+      Enum_Loc = Ident_2;
+      Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
+	/* Bool_Glob == 1 */
+      while (Int_1_Loc < Int_2_Loc)  /* loop body executed once */
+      {
+	Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
+	  /* Int_3_Loc == 7 */
+	Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
+	  /* Int_3_Loc == 7 */
+	Int_1_Loc += 1;
+      } /* while */
+	/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
+      Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
+	/* Int_Glob == 5 */
+      Proc_1 (Ptr_Glob);
+      for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
+			       /* loop body executed twice */
+      {
+	if (Enum_Loc == Func_1 (Ch_Index, 'C'))
+	    /* then, not executed */
+	  {
+	  Proc_6 (Ident_1, &Enum_Loc);
+	  strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
+	  Int_2_Loc = Run_Index;
+	  Int_Glob = Run_Index;
+	  }
+      }
+	/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
+      Int_2_Loc = Int_2_Loc * Int_1_Loc;
+      Int_1_Loc = Int_2_Loc / Int_3_Loc;
+      Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
+	/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
+      Proc_2 (&Int_1_Loc);
+	/* Int_1_Loc == 5 */
+
+    } /* loop "for Run_Index" */
+
+    /**************/
+    /* Stop timer */
+    /**************/
+
+    Stop_Timer();
+    setStats(0);
+
+    User_Time = End_Time - Begin_Time;
+
+    if (User_Time < Too_Small_Time)
+    {
+      printf("Measured time too small to obtain meaningful results\n");
+      Number_Of_Runs = Number_Of_Runs * 10;
+      printf("\n");
+    } else Done = true;
+  }
+
+  debug_printf("Final values of the variables used in the benchmark:\n");
+  debug_printf("\n");
+  debug_printf("Int_Glob:            %d\n", Int_Glob);
+  debug_printf("        should be:   %d\n", 5);
+  debug_printf("Bool_Glob:           %d\n", Bool_Glob);
+  debug_printf("        should be:   %d\n", 1);
+  debug_printf("Ch_1_Glob:           %c\n", Ch_1_Glob);
+  debug_printf("        should be:   %c\n", 'A');
+  debug_printf("Ch_2_Glob:           %c\n", Ch_2_Glob);
+  debug_printf("        should be:   %c\n", 'B');
+  debug_printf("Arr_1_Glob[8]:       %d\n", Arr_1_Glob[8]);
+  debug_printf("        should be:   %d\n", 7);
+  debug_printf("Arr_2_Glob[8][7]:    %d\n", Arr_2_Glob[8][7]);
+  debug_printf("        should be:   Number_Of_Runs + 10\n");
+  debug_printf("Ptr_Glob->\n");
+  debug_printf("  Ptr_Comp:          %d\n", (long) Ptr_Glob->Ptr_Comp);
+  debug_printf("        should be:   (implementation-dependent)\n");
+  debug_printf("  Discr:             %d\n", Ptr_Glob->Discr);
+  debug_printf("        should be:   %d\n", 0);
+  debug_printf("  Enum_Comp:         %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
+  debug_printf("        should be:   %d\n", 2);
+  debug_printf("  Int_Comp:          %d\n", Ptr_Glob->variant.var_1.Int_Comp);
+  debug_printf("        should be:   %d\n", 17);
+  debug_printf("  Str_Comp:          %s\n", Ptr_Glob->variant.var_1.Str_Comp);
+  debug_printf("        should be:   DHRYSTONE PROGRAM, SOME STRING\n");
+  debug_printf("Next_Ptr_Glob->\n");
+  debug_printf("  Ptr_Comp:          %d\n", (long) Next_Ptr_Glob->Ptr_Comp);
+  debug_printf("        should be:   (implementation-dependent), same as above\n");
+  debug_printf("  Discr:             %d\n", Next_Ptr_Glob->Discr);
+  debug_printf("        should be:   %d\n", 0);
+  debug_printf("  Enum_Comp:         %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
+  debug_printf("        should be:   %d\n", 1);
+  debug_printf("  Int_Comp:          %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
+  debug_printf("        should be:   %d\n", 18);
+  debug_printf("  Str_Comp:          %s\n",
+                                Next_Ptr_Glob->variant.var_1.Str_Comp);
+  debug_printf("        should be:   DHRYSTONE PROGRAM, SOME STRING\n");
+  debug_printf("Int_1_Loc:           %d\n", Int_1_Loc);
+  debug_printf("        should be:   %d\n", 5);
+  debug_printf("Int_2_Loc:           %d\n", Int_2_Loc);
+  debug_printf("        should be:   %d\n", 13);
+  debug_printf("Int_3_Loc:           %d\n", Int_3_Loc);
+  debug_printf("        should be:   %d\n", 7);
+  debug_printf("Enum_Loc:            %d\n", Enum_Loc);
+  debug_printf("        should be:   %d\n", 1);
+  debug_printf("Str_1_Loc:           %s\n", Str_1_Loc);
+  debug_printf("        should be:   DHRYSTONE PROGRAM, 1'ST STRING\n");
+  debug_printf("Str_2_Loc:           %s\n", Str_2_Loc);
+  debug_printf("        should be:   DHRYSTONE PROGRAM, 2'ND STRING\n");
+  debug_printf("\n");
+
+
+  Microseconds = ((User_Time / Number_Of_Runs) * Mic_secs_Per_Second) / HZ;
+  Dhrystones_Per_Second = (HZ * Number_Of_Runs) / User_Time;
+
+  printf("Microseconds for one run through Dhrystone: %ld\n", Microseconds);
+  printf("Dhrystones per Second:                      %ld\n", Dhrystones_Per_Second);
+
+  return 0;
+}
+
+
+Proc_1 (Ptr_Val_Par)
+/******************/
+
+REG Rec_Pointer Ptr_Val_Par;
+    /* executed once */
+{
+  REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;  
+                                        /* == Ptr_Glob_Next */
+  /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp,    */
+  /* corresponds to "rename" in Ada, "with" in Pascal           */
+  
+  structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); 
+  Ptr_Val_Par->variant.var_1.Int_Comp = 5;
+  Next_Record->variant.var_1.Int_Comp 
+        = Ptr_Val_Par->variant.var_1.Int_Comp;
+  Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
+  Proc_3 (&Next_Record->Ptr_Comp);
+    /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp 
+                        == Ptr_Glob->Ptr_Comp */
+  if (Next_Record->Discr == Ident_1)
+    /* then, executed */
+  {
+    Next_Record->variant.var_1.Int_Comp = 6;
+    Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp, 
+           &Next_Record->variant.var_1.Enum_Comp);
+    Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
+    Proc_7 (Next_Record->variant.var_1.Int_Comp, 10, 
+           &Next_Record->variant.var_1.Int_Comp);
+  }
+  else /* not executed */
+    structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
+} /* Proc_1 */
+
+
+Proc_2 (Int_Par_Ref)
+/******************/
+    /* executed once */
+    /* *Int_Par_Ref == 1, becomes 4 */
+
+One_Fifty   *Int_Par_Ref;
+{
+  One_Fifty  Int_Loc;  
+  Enumeration   Enum_Loc;
+
+  Int_Loc = *Int_Par_Ref + 10;
+  do /* executed once */
+    if (Ch_1_Glob == 'A')
+      /* then, executed */
+    {
+      Int_Loc -= 1;
+      *Int_Par_Ref = Int_Loc - Int_Glob;
+      Enum_Loc = Ident_1;
+    } /* if */
+  while (Enum_Loc != Ident_1); /* true */
+} /* Proc_2 */
+
+
+Proc_3 (Ptr_Ref_Par)
+/******************/
+    /* executed once */
+    /* Ptr_Ref_Par becomes Ptr_Glob */
+
+Rec_Pointer *Ptr_Ref_Par;
+
+{
+  if (Ptr_Glob != Null)
+    /* then, executed */
+    *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
+  Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
+} /* Proc_3 */
+
+
+Proc_4 () /* without parameters */
+/*******/
+    /* executed once */
+{
+  Boolean Bool_Loc;
+
+  Bool_Loc = Ch_1_Glob == 'A';
+  Bool_Glob = Bool_Loc | Bool_Glob;
+  Ch_2_Glob = 'B';
+} /* Proc_4 */
+
+
+Proc_5 () /* without parameters */
+/*******/
+    /* executed once */
+{
+  Ch_1_Glob = 'A';
+  Bool_Glob = false;
+} /* Proc_5 */
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/median/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/median/dataset1.h
+// See LICENSE for license details.
+
+
+#define DATA_SIZE 400
+
+int input_data[DATA_SIZE] =
+{
+   41, 454, 833, 335, 564,   1, 187, 989, 749, 365, 350, 572, 132,  64, 949, 153, 584, 216, 805, 140,
+  621, 210,   6, 572, 931, 339, 890, 593, 392, 898, 694, 228, 961,  12, 110, 883, 116, 750, 296, 646,
+  426, 500, 314, 436, 659, 701, 774, 812, 319, 981, 678, 150, 875, 696, 376, 564, 474, 272, 938, 258,
+  539, 647, 569, 509, 203,  88, 280, 703, 759, 669, 606, 375, 511, 551, 657, 936, 195, 592,  81, 569,
+  267, 952, 229, 800, 337, 584, 944, 643, 902, 368, 241, 489, 913, 328, 826, 313, 933, 592, 985, 388,
+  195, 543, 960, 649, 566, 979, 350, 997, 649, 814, 657,  79, 181, 208, 111, 998, 859, 629,  65, 847,
+  288, 704, 349, 997, 141, 253, 905, 715, 886, 430, 264, 415, 576, 538, 979, 700, 761,   4, 241, 494,
+  478, 100, 499, 864, 403, 693, 222, 416, 444, 296, 721, 285, 676, 620, 317,  78, 224, 351, 937, 540,
+  288, 646, 119, 169, 615, 527, 606, 289, 389, 796, 351, 801, 455, 720, 278, 758, 367, 745, 358,  92,
+  584, 989,  62, 271, 985, 853, 403, 788, 346, 531, 517, 222, 559, 461, 908, 241, 775, 358, 255, 332,
+  778, 684, 598, 740, 143, 446,  33, 311, 125, 743, 941, 557, 933, 479, 799, 557, 553, 925, 431, 796,
+  648, 357, 952, 891, 287, 666,  19, 514,  49, 557,  86, 870,  95, 853, 441, 440, 587,  61, 614, 678,
+  382, 396, 280,   9, 808,  17, 971, 170, 819, 291, 344, 380, 450, 536, 512, 185, 965, 917, 347, 539,
+  808, 983, 882, 887, 537,  54, 946, 612, 701, 951, 356, 479, 567, 151, 891,   7,  22, 641, 568, 335,
+  665, 730, 423,  95, 434, 728, 158, 280,   2, 395,  84, 688, 247, 911,  49, 476, 435, 815, 792, 729,
+  869, 265, 486, 127, 414, 236, 369, 214, 548, 180, 518,   6, 888, 503, 682, 596, 284, 173, 264, 643,
+  499, 346, 290, 599, 897,  68, 215, 849, 731, 658, 688, 619, 251, 121, 786, 131, 555, 828, 302, 667,
+  528, 433, 544, 487, 322, 753, 947, 125, 287, 626, 824,  14, 304,  10, 788, 403, 733, 106, 959, 703,
+  366, 818, 722, 964, 294, 406, 975, 874, 653, 856, 748,  86,  91,  60, 378, 660, 105, 667, 102, 153,
+  381, 121, 651,  98, 825, 412, 840, 236, 356,  12, 148, 423,  54, 965, 140, 216, 955, 621, 343, 361
+};
+
+int verify_data[DATA_SIZE] =
+{
+    0, 454, 454, 564, 335, 187, 187, 749, 749, 365, 365, 350, 132, 132, 153, 584, 216, 584, 216, 621,
+  210, 210, 210, 572, 572, 890, 593, 593, 593, 694, 694, 694, 228, 110, 110, 116, 750, 296, 646, 426,
+  500, 426, 436, 436, 659, 701, 774, 774, 812, 678, 678, 678, 696, 696, 564, 474, 474, 474, 272, 539,
+  539, 569, 569, 509, 203, 203, 280, 703, 703, 669, 606, 511, 511, 551, 657, 657, 592, 195, 569, 267,
+  569, 267, 800, 337, 584, 584, 643, 902, 643, 368, 368, 489, 489, 826, 328, 826, 592, 933, 592, 388,
+  388, 543, 649, 649, 649, 566, 979, 649, 814, 657, 657, 181, 181, 181, 208, 859, 859, 629, 629, 288,
+  704, 349, 704, 349, 253, 253, 715, 886, 715, 430, 415, 415, 538, 576, 700, 761, 700, 241, 241, 478,
+  478, 478, 499, 499, 693, 403, 416, 416, 416, 444, 296, 676, 620, 620, 317, 224, 224, 351, 540, 540,
+  540, 288, 169, 169, 527, 606, 527, 389, 389, 389, 796, 455, 720, 455, 720, 367, 745, 367, 358, 358,
+  584, 584, 271, 271, 853, 853, 788, 403, 531, 517, 517, 517, 461, 559, 461, 775, 358, 358, 332, 332,
+  684, 684, 684, 598, 446, 143, 311, 125, 311, 743, 743, 933, 557, 799, 557, 557, 557, 553, 796, 648,
+  648, 648, 891, 891, 666, 287, 514,  49, 514,  86, 557,  95, 853, 441, 441, 441, 440, 587, 614, 614,
+  396, 382, 280, 280,  17, 808, 170, 819, 291, 344, 344, 380, 450, 512, 512, 512, 917, 917, 539, 539,
+  808, 882, 887, 882, 537, 537, 612, 701, 701, 701, 479, 479, 479, 567, 151,  22,  22, 568, 568, 568,
+  665, 665, 423, 423, 434, 434, 280, 158, 280,  84, 395, 247, 688, 247, 476, 435, 476, 792, 792, 792,
+  729, 486, 265, 414, 236, 369, 236, 369, 214, 518, 180, 518, 503, 682, 596, 596, 284, 264, 264, 499,
+  499, 346, 346, 599, 599, 215, 215, 731, 731, 688, 658, 619, 251, 251, 131, 555, 555, 555, 667, 528,
+  528, 528, 487, 487, 487, 753, 753, 287, 287, 626, 626, 304,  14, 304, 403, 733, 403, 733, 703, 703,
+  703, 722, 818, 722, 406, 406, 874, 874, 856, 748, 748,  91,  86,  91, 378, 378, 660, 105, 153, 153,
+  153, 381, 121, 651, 412, 825, 412, 356, 236, 148, 148, 148, 423, 140, 216, 216, 621, 621, 361,   0
+};
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Median filter (c version)
+//--------------------------------------------------------------------------
+
+void median( int n, int input[], int results[] )
+{
+  int A, B, C, i;
+
+  // Zero the ends
+  results[0]   = 0;
+  results[n-1] = 0;
+
+  // Do the filter
+  for ( i = 1; i < (n-1); i++ ) {
+
+    A = input[i-1];
+    B = input[i];
+    C = input[i+1];
+
+    if ( A < B ) {
+      if ( B < C )
+        results[i] = B;
+      else if ( C < A )
+        results[i] = A;
+      else
+        results[i] = C;
+    }
+
+    else {
+      if ( A < C )
+        results[i] = A;
+      else if ( C < B )
+        results[i] = B;
+      else
+        results[i] = C;
+    }
+
+  }
+
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median.h
+// See LICENSE for license details.
+
+//**************************************************************************
+// Median filters
+//--------------------------------------------------------------------------
+
+// Simple C version
+void median( int n, int input[], int results[] );
+
+// Simple assembly version
+void median_asm( int n, int input[], int results[] );
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median_gendata.pl
+#!/usr/bin/perl -w
+#==========================================================================
+# median_gendata.pl
+#
+# Author : Christopher Batten (cbatten@mit.edu)
+# Date   : May 9, 2005
+#
+(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
+#
+# Simple script which creates an input data set and the reference data
+# for the median benchmark.
+#
+ENDMSG
+
+use strict "vars";
+use warnings;
+no  warnings("once");
+use Getopt::Long;
+
+#--------------------------------------------------------------------------
+# Command line processing
+#--------------------------------------------------------------------------
+
+our %opts;
+
+sub usage()
+{
+
+  print "\n";
+  print " Usage: median_gendata.pl [options] \n";
+  print "\n";
+  print " Options:\n";
+  print "  --help  print this message\n";
+  print "  --size  size of input data [750]\n";
+  print "  --seed  random seed [1]\n";
+  print "$usageMsg";
+
+  exit();
+}
+
+sub processCommandLine()
+{
+
+  $opts{"help"} = 0;
+  $opts{"size"} = 750;
+  $opts{"seed"} = 1;
+  Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
+  $opts{"help"} and usage();
+
+}
+
+#--------------------------------------------------------------------------
+# Helper Functions
+#--------------------------------------------------------------------------
+
+sub printArray
+{
+  my $arrayName = $_[0];
+  my $arrayRef  = $_[1];
+
+  my $numCols = 20;
+  my $arrayLen = scalar(@{$arrayRef});
+
+  print "int ".$arrayName."[DATA_SIZE] = \n";
+  print "{\n";
+
+  if ( $arrayLen <= $numCols ) {
+    print "  ";
+    for ( my $i = 0; $i < $arrayLen; $i++ ) {
+      print sprintf("%3d",$arrayRef->[$i]);
+      if ( $i != $arrayLen-1 ) {
+        print ", ";
+      }
+    }
+    print "\n";
+  }
+
+  else {
+    my $numRows = int($arrayLen/$numCols);
+    for ( my $j = 0; $j < $numRows; $j++ ) {
+      print "  ";
+      for ( my $i = 0; $i < $numCols; $i++ ) {
+        my $index = $j*$numCols + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+    if ( $arrayLen > ($numRows*$numCols) ) {
+      print "  ";
+      for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
+        my $index = $numCols*$numRows + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+  }
+
+  print  "};\n\n";
+}
+
+#--------------------------------------------------------------------------
+# Main
+#--------------------------------------------------------------------------
+
+sub main()
+{
+
+  processCommandLine();
+  srand($opts{"seed"});
+
+  my @values;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    push( @values, int(rand(999)) );
+  }
+
+  my @median;
+  $median[0] = 0;
+  $median[$opts{"size"}-1] = 0;
+  for ( my $i = 1; $i < $opts{"size"}-1; $i++ ) {
+    my @tempList = ( $values[$i-1], $values[$i], $values[$i+1] );
+    my @sorted = sort { $a <=> $b } @tempList;
+    $median[$i] = $sorted[1];
+  }
+
+  print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
+  printArray( "input_data", \@values );
+  printArray( "verify_data", \@median );
+
+}
+
+main();
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/median/median_main.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Median filter bencmark
+//--------------------------------------------------------------------------
+//
+// This benchmark performs a 1D three element median filter. The
+// input data (and reference data) should be generated using the
+// median_gendata.pl perl script and dumped to a file named
+// dataset1.h.
+
+#include "util.h"
+
+#include "median.h"
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#include "dataset1.h"
+
+//--------------------------------------------------------------------------
+// Main
+
+int main( int argc, char* argv[] )
+{
+  int results_data[DATA_SIZE];
+
+#if PREALLOCATE
+  // If needed we preallocate everything in the caches
+  median( DATA_SIZE, input_data, results_data );
+#endif
+
+  // Do the filter
+  setStats(1);
+  median( DATA_SIZE, input_data, results_data );
+  setStats(0);
+
+  // Check the results
+  return verify( DATA_SIZE, results_data, verify_data );
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/common.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/common.h
+// See LICENSE for license details.
+
+#ifndef _MM_H
+#define _MM_H
+
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+
+#ifdef SP
+typedef float t;
+#define fma fmaf
+#else
+typedef double t;
+#endif
+
+#define inline inline __attribute__((always_inline))
+
+#define alloca_aligned(s, a) ((void*)(((uintptr_t)alloca((s)+(a)-1)+(a)-1)&~((a)-1)))
+
+#include "rb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void mm(size_t m, size_t n, size_t p,
+        t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc);
+
+#ifdef __cplusplus
+}
+#endif
+
+//void rb(t* a, t* b, t* c, size_t lda, size_t ldb, size_t ldc);
+
+#endif
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/gen.scala
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/gen.scala
+import scala.sys.process._
+object MMGen {
+  implicit def i2s(i: Int) = i.toString
+  def writeFile(name: String, contents: String) = {
+    val f = new java.io.FileWriter(name)
+    f.write(contents)
+    f.close
+  }
+
+  var indent = 0
+  def spacing = "  " * indent
+  def assign(lhs: String, rhs: String) =
+    spacing + lhs + " = " + rhs + ";\n"
+  def init(t: String, n: String, v: String) =
+    assign(t+" "+n, v)
+  def open_block(s: String = "") = {
+    val result = (if (s != "") spacing + s else "") + spacing + "{\n"
+    indent = indent + 1
+    result
+  }
+  def close_block = {
+    indent = indent - 1
+    spacing + "}\n"
+  }
+
+  def ar(m: String, i: String) = m+"["+i+"]"
+  def r(a: String, b: String*) = (a :: b.toList).reduceLeft(_+"_"+_)
+
+  def rb(m: Int, n: Int, p: Int) = {
+    var s = open_block("static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)\n")
+
+    for (i <- 0 until m)
+      s += init("t*", r("c", i), "&"+ar("c", "ldc*"+i))
+    for (i <- 0 until m; j <- 0 until n)
+      s += init("t", r("c", i, j), ar(r("c", i), j))
+
+    def doit(m: Int, n: Int, p: Int) = {
+      for (i <- 0 until m)
+        s += init("t*", r("a", i), "&"+ar("a", "lda*"+i))
+      for (k <- 0 until p)
+        s += init("t*", r("b", k), "&"+ar("b", "ldb*"+k))
+      for (k <- 0 until p; i <- 0 until m; j <- 0 until n)
+        s += assign(r("c", i, j), "fma(" + ar(r("a", i), k) + ", " + ar(r("b", k), j) + ", " + r("c", i, j) + ")")
+    }
+
+    s += open_block("for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)\n")
+    doit(m, n, p)
+    s += close_block
+
+    s += open_block("for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)\n")
+    doit(m, n, 1)
+    s += close_block
+
+    for (i <- 0 until m; j <- 0 until n)
+      s += assign(ar(r("c", i), j), r("c", i, j))
+    s += close_block
+
+    s
+  }
+  def gcd(a: Int, b: Int): Int = if (b == 0) a else gcd(b, a%b)
+  def lcm(a: Int, b: Int): Int = a*b/gcd(a, b)
+  def lcm(a: Seq[Int]): Int = {
+    if (a.tail.isEmpty) a.head
+    else lcm(a.head, lcm(a.tail))
+  }
+  def test1(m: Int, n: Int, p: Int, m1: Int, n1: Int, p1: Int) = {
+    val decl = "static const int RBM = "+m+", RBN = "+n+", RBK = "+p+";\n" +
+               "static const int CBM = "+m1+", CBN = "+n1+", CBK = "+p1+";\n"
+    writeFile("rb.h", decl + rb(m, n, p))
+    //"make"!!
+
+    "make run"!
+
+    ("cp a.out " + Seq("b", m, n, p, m1, n1, p1, "run").reduce(_+"."+_))!
+  }
+  def main(args: Array[String]): Unit = {
+    test1(4, 5, 6, 24, 25, 24)
+    //for (i <- 4 to 6; j <- 4 to 6; k <- 4 to 6)
+    //  test1(i, j, k, if (i == 5) 35 else 36, if (j == 5) 35 else 36, if (k == 5) 35 else 36)
+  }
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/mm.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/mm.c
+// See LICENSE for license details.
+
+#include "common.h"
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+#include <alloca.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static void mm_naive(size_t m, size_t n, size_t p,
+                            t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
+{
+  for (size_t i = 0; i < m; i++)
+  {
+    for (size_t j = 0; j < n; j++)
+    {
+      t s0 = c[i*ldc+j], s1 = 0, s2 = 0, s3 = 0;
+      for (size_t k = 0; k < p/4*4; k+=4)
+      {
+        s0 = fma(a[i*lda+k+0], b[(k+0)*ldb+j], s0);
+        s1 = fma(a[i*lda+k+1], b[(k+1)*ldb+j], s1);
+        s2 = fma(a[i*lda+k+2], b[(k+2)*ldb+j], s2);
+        s3 = fma(a[i*lda+k+3], b[(k+3)*ldb+j], s3);
+      }
+      for (size_t k = p/4*4; k < p; k++)
+        s0 = fma(a[i*lda+k], b[k*ldb+j], s0);
+      c[i*ldc+j] = (s0 + s1) + (s2 + s3);
+    }
+  }
+}
+
+static inline void mm_rb(size_t m, size_t n, size_t p,
+                         t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
+{
+  size_t mb = m/RBM*RBM, nb = n/RBN*RBN;
+  for (size_t i = 0; i < mb; i += RBM)
+  {
+    for (size_t j = 0; j < nb; j += RBN)
+      kloop(p, a+i*lda, lda, b+j, ldb, c+i*ldc+j, ldc);
+    mm_naive(RBM, n - nb, p, a+i*lda, lda, b+nb, ldb, c+i*ldc+nb, ldc);
+  }
+  mm_naive(m - mb, n, p, a+mb*lda, lda, b, ldb, c+mb*ldc, ldc);
+}
+
+static inline void repack(t* a, size_t lda, const t* a0, size_t lda0, size_t m, size_t p)
+{
+  for (size_t i = 0; i < m; i++)
+  {
+    for (size_t j = 0; j < p/8*8; j+=8)
+    {
+      t t0 = a0[i*lda0+j+0];
+      t t1 = a0[i*lda0+j+1];
+      t t2 = a0[i*lda0+j+2];
+      t t3 = a0[i*lda0+j+3];
+      t t4 = a0[i*lda0+j+4];
+      t t5 = a0[i*lda0+j+5];
+      t t6 = a0[i*lda0+j+6];
+      t t7 = a0[i*lda0+j+7];
+      a[i*lda+j+0] = t0;
+      a[i*lda+j+1] = t1;
+      a[i*lda+j+2] = t2;
+      a[i*lda+j+3] = t3;
+      a[i*lda+j+4] = t4;
+      a[i*lda+j+5] = t5;
+      a[i*lda+j+6] = t6;
+      a[i*lda+j+7] = t7;
+    }
+    for (size_t j = p/8*8; j < p; j++)
+      a[i*lda+j] = a0[i*lda0+j];
+  }
+}
+
+static void mm_cb(size_t m, size_t n, size_t p,
+                  t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
+{
+  size_t nmb = m/CBM, nnb = n/CBN, npb = p/CBK;
+  size_t mb = nmb*CBM, nb = nnb*CBN, pb = npb*CBK;
+  //t a1[mb*pb], b1[pb*nb], c1[mb*nb];
+  t* a1 = (t*)alloca_aligned(sizeof(t)*mb*pb, 8192);
+  t* b1 = (t*)alloca_aligned(sizeof(t)*pb*nb, 8192);
+  t* c1 = (t*)alloca_aligned(sizeof(t)*mb*nb, 8192);
+
+    for (size_t i = 0; i < mb; i += CBM)
+      for (size_t j = 0; j < pb; j += CBK)
+        repack(a1 + (npb*(i/CBM) + j/CBK)*(CBM*CBK), CBK, a + i*lda + j, lda, CBM, CBK);
+
+  for (size_t i = 0; i < pb; i += CBK)
+    for (size_t j = 0; j < nb; j += CBN)
+      repack(b1 + (nnb*(i/CBK) + j/CBN)*(CBK*CBN), CBN, b + i*ldb + j, ldb, CBK, CBN);
+
+    for (size_t i = 0; i < mb; i += CBM)
+      for (size_t j = 0; j < nb; j += CBN)
+        repack(c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, c + i*ldc + j, ldc, CBM, CBN);
+
+  for (size_t i = 0; i < mb; i += CBM)
+  {
+    for (size_t j = 0; j < nb; j += CBN)
+    {
+      for (size_t k = 0; k < pb; k += CBK)
+      {
+        mm_rb(CBM, CBN, CBK,
+              a1 + (npb*(i/CBM) + k/CBK)*(CBM*CBK), CBK,
+              b1 + (nnb*(k/CBK) + j/CBN)*(CBK*CBN), CBN,
+              c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
+      }
+      if (pb < p)
+      {
+        mm_rb(CBM, CBN, p - pb,
+              a + i*lda + pb, lda,
+              b + pb*ldb + j, ldb,
+              c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
+      }
+    }
+    if (nb < n)
+    {
+      for (size_t k = 0; k < p; k += CBK)
+      {
+        mm_rb(CBM, n - nb, MIN(p - k, CBK),
+              a + i*lda + k, lda,
+              b + k*ldb + nb, ldb,
+              c + i*ldc + nb, ldc);
+      }
+    }
+  }
+  if (mb < m)
+  {
+    for (size_t j = 0; j < n; j += CBN)
+    {
+      for (size_t k = 0; k < p; k += CBK)
+      {
+        mm_rb(m - mb, MIN(n - j, CBN), MIN(p - k, CBK),
+              a + mb*lda + k, lda,
+              b + k*ldb + j, ldb,
+              c + mb*ldc + j, ldc);
+      }
+    }
+  }
+
+    for (size_t i = 0; i < mb; i += CBM)
+      for (size_t j = 0; j < nb; j += CBN)
+        repack(c + i*ldc + j, ldc, c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, CBM, CBN);
+}
+
+void mm(size_t m, size_t n, size_t p,
+        t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
+{
+  if (__builtin_expect(m <= 2*CBM && n <= 2*CBN && p <= 2*CBK, 1))
+    mm_rb(m, n, p, a, lda, b, ldb, c, ldc);
+  else
+    mm_cb(m, n, p, a, lda, b, ldb, c, ldc);
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/mm_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/mm_main.c
+// See LICENSE for license details.
+
+#include "common.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "util.h"
+
+#pragma GCC optimize ("unroll-loops")
+
+void thread_entry(int cid, int nc)
+{
+  const int R = 8;
+  int m, n, p;
+  uint64_t s = 0xdeadbeefU;
+  
+  m = CBM;
+  n = CBN;
+  p = CBK;
+
+  t a[m*p];
+  t b[p*n];
+  t c[m*n];
+
+  for (size_t i = 0; i < m; i++)
+    for (size_t j = 0; j < p; j++)
+      a[i*p+j] = (t)(s = lfsr(s));
+  for (size_t i = 0; i < p; i++)
+    for (size_t j = 0; j < n; j++)
+      b[i*n+j] = (t)(s = lfsr(s));
+  memset(c, 0, m*n*sizeof(c[0]));
+
+  size_t instret, cycles;
+  for (int i = 0; i < R; i++)
+  {
+    instret = -read_csr(minstret);
+    cycles = -read_csr(mcycle);
+    mm(m, n, p, a, p, b, n, c, n);
+    instret += read_csr(minstret);
+    cycles += read_csr(mcycle);
+  }
+
+  asm volatile("fence");
+
+  printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
+         cid, RBM, RBN, RBK, CBM, CBN, CBK);
+  printf("C%d: %d instructions\n", cid, (int)(instret));
+  printf("C%d: %d cycles\n", cid, (int)(cycles));
+  printf("C%d: %d flops\n", cid, 2*m*n*p);
+  printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));
+
+#if 1
+  for (size_t i = 0; i < m; i++)
+  {
+    for (size_t j = 0; j < n; j++)
+    {
+      t s = 0;
+      for (size_t k = 0; k < p; k++)
+        s += a[i*p+k] * b[k*n+j];
+      s *= R;
+      if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
+      {
+        printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
+        exit(1);
+      }
+    }
+  }
+#endif
+
+  barrier(nc);
+  exit(0);
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/rb.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mm/rb.h
+static const int RBM = 4, RBN = 5, RBK = 6;
+static const int CBM = 24, CBN = 25, CBK = 24;
+static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)
+{
+  t* c_0 = &c[ldc*0];
+  t* c_1 = &c[ldc*1];
+  t* c_2 = &c[ldc*2];
+  t* c_3 = &c[ldc*3];
+  t c_0_0 = c_0[0];
+  t c_0_1 = c_0[1];
+  t c_0_2 = c_0[2];
+  t c_0_3 = c_0[3];
+  t c_0_4 = c_0[4];
+  t c_1_0 = c_1[0];
+  t c_1_1 = c_1[1];
+  t c_1_2 = c_1[2];
+  t c_1_3 = c_1[3];
+  t c_1_4 = c_1[4];
+  t c_2_0 = c_2[0];
+  t c_2_1 = c_2[1];
+  t c_2_2 = c_2[2];
+  t c_2_3 = c_2[3];
+  t c_2_4 = c_2[4];
+  t c_3_0 = c_3[0];
+  t c_3_1 = c_3[1];
+  t c_3_2 = c_3[2];
+  t c_3_3 = c_3[3];
+  t c_3_4 = c_3[4];
+  for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)
+  {
+    t* a_0 = &a[lda*0];
+    t* a_1 = &a[lda*1];
+    t* a_2 = &a[lda*2];
+    t* a_3 = &a[lda*3];
+    t* b_0 = &b[ldb*0];
+    t* b_1 = &b[ldb*1];
+    t* b_2 = &b[ldb*2];
+    t* b_3 = &b[ldb*3];
+    t* b_4 = &b[ldb*4];
+    t* b_5 = &b[ldb*5];
+    c_0_0 = fma(a_0[0], b_0[0], c_0_0);
+    c_0_1 = fma(a_0[0], b_0[1], c_0_1);
+    c_0_2 = fma(a_0[0], b_0[2], c_0_2);
+    c_0_3 = fma(a_0[0], b_0[3], c_0_3);
+    c_0_4 = fma(a_0[0], b_0[4], c_0_4);
+    c_1_0 = fma(a_1[0], b_0[0], c_1_0);
+    c_1_1 = fma(a_1[0], b_0[1], c_1_1);
+    c_1_2 = fma(a_1[0], b_0[2], c_1_2);
+    c_1_3 = fma(a_1[0], b_0[3], c_1_3);
+    c_1_4 = fma(a_1[0], b_0[4], c_1_4);
+    c_2_0 = fma(a_2[0], b_0[0], c_2_0);
+    c_2_1 = fma(a_2[0], b_0[1], c_2_1);
+    c_2_2 = fma(a_2[0], b_0[2], c_2_2);
+    c_2_3 = fma(a_2[0], b_0[3], c_2_3);
+    c_2_4 = fma(a_2[0], b_0[4], c_2_4);
+    c_3_0 = fma(a_3[0], b_0[0], c_3_0);
+    c_3_1 = fma(a_3[0], b_0[1], c_3_1);
+    c_3_2 = fma(a_3[0], b_0[2], c_3_2);
+    c_3_3 = fma(a_3[0], b_0[3], c_3_3);
+    c_3_4 = fma(a_3[0], b_0[4], c_3_4);
+    c_0_0 = fma(a_0[1], b_1[0], c_0_0);
+    c_0_1 = fma(a_0[1], b_1[1], c_0_1);
+    c_0_2 = fma(a_0[1], b_1[2], c_0_2);
+    c_0_3 = fma(a_0[1], b_1[3], c_0_3);
+    c_0_4 = fma(a_0[1], b_1[4], c_0_4);
+    c_1_0 = fma(a_1[1], b_1[0], c_1_0);
+    c_1_1 = fma(a_1[1], b_1[1], c_1_1);
+    c_1_2 = fma(a_1[1], b_1[2], c_1_2);
+    c_1_3 = fma(a_1[1], b_1[3], c_1_3);
+    c_1_4 = fma(a_1[1], b_1[4], c_1_4);
+    c_2_0 = fma(a_2[1], b_1[0], c_2_0);
+    c_2_1 = fma(a_2[1], b_1[1], c_2_1);
+    c_2_2 = fma(a_2[1], b_1[2], c_2_2);
+    c_2_3 = fma(a_2[1], b_1[3], c_2_3);
+    c_2_4 = fma(a_2[1], b_1[4], c_2_4);
+    c_3_0 = fma(a_3[1], b_1[0], c_3_0);
+    c_3_1 = fma(a_3[1], b_1[1], c_3_1);
+    c_3_2 = fma(a_3[1], b_1[2], c_3_2);
+    c_3_3 = fma(a_3[1], b_1[3], c_3_3);
+    c_3_4 = fma(a_3[1], b_1[4], c_3_4);
+    c_0_0 = fma(a_0[2], b_2[0], c_0_0);
+    c_0_1 = fma(a_0[2], b_2[1], c_0_1);
+    c_0_2 = fma(a_0[2], b_2[2], c_0_2);
+    c_0_3 = fma(a_0[2], b_2[3], c_0_3);
+    c_0_4 = fma(a_0[2], b_2[4], c_0_4);
+    c_1_0 = fma(a_1[2], b_2[0], c_1_0);
+    c_1_1 = fma(a_1[2], b_2[1], c_1_1);
+    c_1_2 = fma(a_1[2], b_2[2], c_1_2);
+    c_1_3 = fma(a_1[2], b_2[3], c_1_3);
+    c_1_4 = fma(a_1[2], b_2[4], c_1_4);
+    c_2_0 = fma(a_2[2], b_2[0], c_2_0);
+    c_2_1 = fma(a_2[2], b_2[1], c_2_1);
+    c_2_2 = fma(a_2[2], b_2[2], c_2_2);
+    c_2_3 = fma(a_2[2], b_2[3], c_2_3);
+    c_2_4 = fma(a_2[2], b_2[4], c_2_4);
+    c_3_0 = fma(a_3[2], b_2[0], c_3_0);
+    c_3_1 = fma(a_3[2], b_2[1], c_3_1);
+    c_3_2 = fma(a_3[2], b_2[2], c_3_2);
+    c_3_3 = fma(a_3[2], b_2[3], c_3_3);
+    c_3_4 = fma(a_3[2], b_2[4], c_3_4);
+    c_0_0 = fma(a_0[3], b_3[0], c_0_0);
+    c_0_1 = fma(a_0[3], b_3[1], c_0_1);
+    c_0_2 = fma(a_0[3], b_3[2], c_0_2);
+    c_0_3 = fma(a_0[3], b_3[3], c_0_3);
+    c_0_4 = fma(a_0[3], b_3[4], c_0_4);
+    c_1_0 = fma(a_1[3], b_3[0], c_1_0);
+    c_1_1 = fma(a_1[3], b_3[1], c_1_1);
+    c_1_2 = fma(a_1[3], b_3[2], c_1_2);
+    c_1_3 = fma(a_1[3], b_3[3], c_1_3);
+    c_1_4 = fma(a_1[3], b_3[4], c_1_4);
+    c_2_0 = fma(a_2[3], b_3[0], c_2_0);
+    c_2_1 = fma(a_2[3], b_3[1], c_2_1);
+    c_2_2 = fma(a_2[3], b_3[2], c_2_2);
+    c_2_3 = fma(a_2[3], b_3[3], c_2_3);
+    c_2_4 = fma(a_2[3], b_3[4], c_2_4);
+    c_3_0 = fma(a_3[3], b_3[0], c_3_0);
+    c_3_1 = fma(a_3[3], b_3[1], c_3_1);
+    c_3_2 = fma(a_3[3], b_3[2], c_3_2);
+    c_3_3 = fma(a_3[3], b_3[3], c_3_3);
+    c_3_4 = fma(a_3[3], b_3[4], c_3_4);
+    c_0_0 = fma(a_0[4], b_4[0], c_0_0);
+    c_0_1 = fma(a_0[4], b_4[1], c_0_1);
+    c_0_2 = fma(a_0[4], b_4[2], c_0_2);
+    c_0_3 = fma(a_0[4], b_4[3], c_0_3);
+    c_0_4 = fma(a_0[4], b_4[4], c_0_4);
+    c_1_0 = fma(a_1[4], b_4[0], c_1_0);
+    c_1_1 = fma(a_1[4], b_4[1], c_1_1);
+    c_1_2 = fma(a_1[4], b_4[2], c_1_2);
+    c_1_3 = fma(a_1[4], b_4[3], c_1_3);
+    c_1_4 = fma(a_1[4], b_4[4], c_1_4);
+    c_2_0 = fma(a_2[4], b_4[0], c_2_0);
+    c_2_1 = fma(a_2[4], b_4[1], c_2_1);
+    c_2_2 = fma(a_2[4], b_4[2], c_2_2);
+    c_2_3 = fma(a_2[4], b_4[3], c_2_3);
+    c_2_4 = fma(a_2[4], b_4[4], c_2_4);
+    c_3_0 = fma(a_3[4], b_4[0], c_3_0);
+    c_3_1 = fma(a_3[4], b_4[1], c_3_1);
+    c_3_2 = fma(a_3[4], b_4[2], c_3_2);
+    c_3_3 = fma(a_3[4], b_4[3], c_3_3);
+    c_3_4 = fma(a_3[4], b_4[4], c_3_4);
+    c_0_0 = fma(a_0[5], b_5[0], c_0_0);
+    c_0_1 = fma(a_0[5], b_5[1], c_0_1);
+    c_0_2 = fma(a_0[5], b_5[2], c_0_2);
+    c_0_3 = fma(a_0[5], b_5[3], c_0_3);
+    c_0_4 = fma(a_0[5], b_5[4], c_0_4);
+    c_1_0 = fma(a_1[5], b_5[0], c_1_0);
+    c_1_1 = fma(a_1[5], b_5[1], c_1_1);
+    c_1_2 = fma(a_1[5], b_5[2], c_1_2);
+    c_1_3 = fma(a_1[5], b_5[3], c_1_3);
+    c_1_4 = fma(a_1[5], b_5[4], c_1_4);
+    c_2_0 = fma(a_2[5], b_5[0], c_2_0);
+    c_2_1 = fma(a_2[5], b_5[1], c_2_1);
+    c_2_2 = fma(a_2[5], b_5[2], c_2_2);
+    c_2_3 = fma(a_2[5], b_5[3], c_2_3);
+    c_2_4 = fma(a_2[5], b_5[4], c_2_4);
+    c_3_0 = fma(a_3[5], b_5[0], c_3_0);
+    c_3_1 = fma(a_3[5], b_5[1], c_3_1);
+    c_3_2 = fma(a_3[5], b_5[2], c_3_2);
+    c_3_3 = fma(a_3[5], b_5[3], c_3_3);
+    c_3_4 = fma(a_3[5], b_5[4], c_3_4);
+  }
+  for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)
+  {
+    t* a_0 = &a[lda*0];
+    t* a_1 = &a[lda*1];
+    t* a_2 = &a[lda*2];
+    t* a_3 = &a[lda*3];
+    t* b_0 = &b[ldb*0];
+    c_0_0 = fma(a_0[0], b_0[0], c_0_0);
+    c_0_1 = fma(a_0[0], b_0[1], c_0_1);
+    c_0_2 = fma(a_0[0], b_0[2], c_0_2);
+    c_0_3 = fma(a_0[0], b_0[3], c_0_3);
+    c_0_4 = fma(a_0[0], b_0[4], c_0_4);
+    c_1_0 = fma(a_1[0], b_0[0], c_1_0);
+    c_1_1 = fma(a_1[0], b_0[1], c_1_1);
+    c_1_2 = fma(a_1[0], b_0[2], c_1_2);
+    c_1_3 = fma(a_1[0], b_0[3], c_1_3);
+    c_1_4 = fma(a_1[0], b_0[4], c_1_4);
+    c_2_0 = fma(a_2[0], b_0[0], c_2_0);
+    c_2_1 = fma(a_2[0], b_0[1], c_2_1);
+    c_2_2 = fma(a_2[0], b_0[2], c_2_2);
+    c_2_3 = fma(a_2[0], b_0[3], c_2_3);
+    c_2_4 = fma(a_2[0], b_0[4], c_2_4);
+    c_3_0 = fma(a_3[0], b_0[0], c_3_0);
+    c_3_1 = fma(a_3[0], b_0[1], c_3_1);
+    c_3_2 = fma(a_3[0], b_0[2], c_3_2);
+    c_3_3 = fma(a_3[0], b_0[3], c_3_3);
+    c_3_4 = fma(a_3[0], b_0[4], c_3_4);
+  }
+  c_0[0] = c_0_0;
+  c_0[1] = c_0_1;
+  c_0[2] = c_0_2;
+  c_0[3] = c_0_3;
+  c_0[4] = c_0_4;
+  c_1[0] = c_1_0;
+  c_1[1] = c_1_1;
+  c_1[2] = c_1_2;
+  c_1[3] = c_1_3;
+  c_1[4] = c_1_4;
+  c_2[0] = c_2_0;
+  c_2[1] = c_2_1;
+  c_2[2] = c_2_2;
+  c_2[3] = c_2_3;
+  c_2[4] = c_2_4;
+  c_3[0] = c_3_0;
+  c_3[1] = c_3_1;
+  c_3[2] = c_3_2;
+  c_3[3] = c_3_3;
+  c_3[4] = c_3_4;
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/dataset.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/dataset.h
+// See LICENSE for license details.
+
+#ifndef __DATASET_H
+#define __DATASET_H
+#define ARRAY_SIZE 256 
+
+
+#define DIM_SIZE 16 
+
+
+typedef int data_t;static data_t input1_data[ARRAY_SIZE] = 
+{
+    0,   3,   2,   0,   3,   1,   0,   3,   2,   3,   2,   0,   3,   3,   1,   2,   3,   0,   0,   1, 
+    1,   1,   2,   3,   1,   2,   3,   1,   1,   3,   2,   2,   0,   1,   3,   2,   2,   2,   0,   0, 
+    1,   0,   1,   3,   3,   0,   3,   3,   3,   3,   0,   3,   2,   1,   2,   2,   0,   0,   3,   0, 
+    1,   1,   0,   3,   3,   1,   2,   3,   3,   0,   1,   2,   1,   0,   1,   2,   2,   1,   0,   3, 
+    1,   0,   2,   2,   1,   1,   1,   1,   1,   1,   2,   0,   3,   1,   1,   2,   2,   3,   3,   1, 
+    3,   2,   0,   0,   0,   3,   3,   3,   2,   1,   2,   3,   1,   0,   0,   0,   0,   1,   2,   2, 
+    1,   1,   3,   3,   3,   1,   1,   2,   3,   1,   3,   3,   2,   3,   2,   1,   2,   3,   0,   2, 
+    2,   1,   1,   0,   0,   0,   0,   0,   1,   3,   3,   1,   1,   1,   2,   2,   3,   2,   1,   1, 
+    1,   1,   3,   0,   2,   2,   1,   3,   2,   1,   2,   2,   1,   3,   1,   3,   1,   3,   2,   3, 
+    1,   2,   1,   3,   2,   2,   0,   1,   0,   0,   1,   2,   3,   3,   1,   0,   0,   0,   3,   1, 
+    2,   3,   2,   3,   2,   0,   0,   0,   0,   0,   3,   1,   3,   0,   0,   0,   3,   1,   1,   1, 
+    1,   2,   1,   2,   3,   2,   0,   0,   2,   2,   3,   0,   3,   0,   0,   3,   0,   3,   1,   3, 
+    3,   1,   1,   1,   2,   2,   1,   3,   0,   3,   3,   1,   0,   0,   3,   2
+};
+
+static data_t input2_data[ARRAY_SIZE] = 
+{
+    1,   1,   0,   3,   1,   2,   0,   0,   0,   0,   0,   2,   1,   2,   3,   0,   0,   3,   3,   2, 
+    2,   1,   2,   3,   3,   0,   2,   2,   1,   1,   2,   2,   0,   2,   2,   1,   2,   3,   2,   2, 
+    3,   3,   2,   2,   1,   1,   1,   1,   2,   1,   2,   2,   3,   3,   3,   0,   0,   3,   2,   3, 
+    2,   3,   1,   2,   1,   1,   2,   2,   0,   1,   0,   3,   2,   1,   1,   1,   2,   0,   1,   2, 
+    2,   0,   2,   1,   3,   3,   2,   3,   2,   0,   3,   1,   3,   3,   2,   0,   1,   0,   1,   1, 
+    2,   2,   1,   1,   2,   2,   1,   2,   3,   3,   1,   3,   2,   2,   2,   3,   3,   1,   0,   2, 
+    1,   0,   0,   0,   1,   1,   2,   0,   3,   2,   3,   3,   0,   2,   3,   1,   0,   0,   2,   1, 
+    2,   0,   2,   1,   1,   2,   3,   1,   3,   2,   1,   0,   0,   0,   0,   0,   2,   2,   0,   2, 
+    1,   2,   0,   3,   2,   2,   0,   0,   3,   2,   1,   1,   3,   0,   2,   0,   0,   1,   0,   2, 
+    3,   3,   1,   3,   3,   0,   0,   2,   2,   0,   0,   0,   1,   0,   0,   1,   3,   0,   2,   1, 
+    3,   2,   2,   1,   3,   2,   0,   1,   2,   2,   3,   2,   1,   1,   1,   1,   3,   0,   1,   3, 
+    2,   2,   3,   1,   1,   2,   0,   2,   1,   1,   2,   3,   1,   0,   1,   0,   1,   1,   0,   0, 
+    2,   0,   3,   0,   3,   0,   3,   2,   2,   3,   3,   2,   1,   0,   2,   2
+};
+
+static data_t verify_data[ARRAY_SIZE] = 
+{
+   36,  44,  57,  50,  54,  36,  38,  46,  55,  25,  38,  34,  51,  30,  40,  32,  37,  34,  38,  52, 
+   51,  40,  28,  32,  41,  22,  26,  35,  49,  35,  42,  23,  26,  26,  33,  36,  52,  40,  45,  49, 
+   50,  34,  41,  35,  44,  25,  23,  23,  31,  29,  39,  46,  50,  36,  31,  32,  42,  32,  34,  41, 
+   44,  33,  43,  30,  31,  28,  39,  46,  50,  40,  35,  37,  43,  35,  33,  43,  43,  29,  37,  29, 
+   27,  22,  30,  33,  43,  31,  32,  25,  36,  31,  31,  29,  40,  28,  26,  22,  29,  42,  48,  51, 
+   65,  52,  43,  54,  63,  34,  42,  44,  56,  33,  38,  32,  26,  22,  23,  38,  49,  32,  26,  30, 
+   43,  22,  24,  27,  45,  24,  26,  17,  35,  35,  47,  51,  59,  59,  43,  42,  43,  28,  37,  43, 
+   56,  48,  36,  32,  28,  19,  28,  34,  46,  34,  28,  34,  45,  20,  29,  28,  50,  32,  26,  21, 
+   37,  38,  51,  50,  55,  45,  38,  49,  56,  28,  38,  40,  50,  29,  44,  26,  32,  35,  50,  43, 
+   53,  44,  41,  41,  34,  24,  35,  34,  39,  33,  34,  29,  21,  33,  31,  45,  48,  42,  27,  29, 
+   40,  17,  21,  32,  45,  30,  29,  26,  26,  27,  38,  33,  29,  31,  32,  31,  35,  25,  29,  29, 
+   34,  15,  25,  23,  34,  28,  44,  45,  41,  41,  37,  45,  45,  17,  34,  44,  46,  30,  43,  29, 
+   31,  36,  37,  50,  54,  44,  28,  40,  38,  22,  27,  28,  45,  32,  36,  22
+};
+
+
+#endif //__DATASET_H
\ No newline at end of file
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/matmul.c
+// See LICENSE for license details.
+
+#include "dataset.h"
+#include "util.h"
+#include <stddef.h>
+
+#pragma GCC optimize ("unroll-loops")
+
+void matmul(const size_t coreid, const size_t ncores, const size_t lda,  const data_t A[], const data_t B[], data_t C[])
+{
+  size_t i, j, k;
+  size_t block = lda / ncores;
+  size_t start = block * coreid;
+ 
+  for (i = 0; i < lda; i++) {
+    for (j = start; j < (start+block); j++) {
+      data_t sum = 0;
+      for (k = 0; k < lda; k++)
+        sum += A[j*lda + k] * B[k*lda + i];
+      C[i + j*lda] = sum;
+    }
+  }
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/matmul_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/matmul_gendata.pl
+#!/usr/bin/perl -w
+#==========================================================================
+# matmul_gendata.pl
+#
+# Author : Christopher Batten (cbatten@mit.edu)
+# Date   : April 29, 2005
+#
+(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
+#
+# Simple script which creates an input data set and the reference data
+# for the matmul benchmark.
+#
+ENDMSG
+
+use strict "vars";
+use warnings;
+no  warnings("once");
+use Getopt::Long;
+
+#--------------------------------------------------------------------------
+# Command line processing
+#--------------------------------------------------------------------------
+
+our %opts;
+
+sub usage()
+{
+
+  print "\n";
+  print " Usage: matmul_gendata.pl [options] \n";
+  print "\n";
+  print " Options:\n";
+  print "  --help  print this message\n";
+  print "  --size  size of input data [1000]\n";
+  print "  --seed  random seed [1]\n";
+  print "$usageMsg";
+
+  exit();
+}
+
+sub processCommandLine()
+{
+
+  $opts{"help"} = 0;
+  $opts{"size"} = 1000;
+  $opts{"seed"} = 1;
+  Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
+  $opts{"help"} and usage();
+
+}
+
+#--------------------------------------------------------------------------
+# Helper Functions
+#--------------------------------------------------------------------------
+
+sub printArray
+{
+  my $arrayName = $_[0];
+  my $arrayRef  = $_[1];
+
+  my $numCols = 20;
+  my $arrayLen = scalar(@{$arrayRef});
+
+  print "static data_t ".$arrayName."[ARRAY_SIZE] = \n";
+  print "{\n";
+
+  if ( $arrayLen <= $numCols ) {
+    print "  ";
+    for ( my $i = 0; $i < $arrayLen; $i++ ) {
+      print sprintf("%3d",$arrayRef->[$i]);
+      if ( $i != $arrayLen-1 ) {
+        print ", ";
+      }
+    }
+    print "\n";
+  }
+
+  else {
+    my $numRows = int($arrayLen/$numCols);
+    for ( my $j = 0; $j < $numRows; $j++ ) {
+      print "  ";
+      for ( my $i = 0; $i < $numCols; $i++ ) {
+        my $index = $j*$numCols + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+    if ( $arrayLen > ($numRows*$numCols) ) {
+      print "  ";
+      for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
+        my $index = $numCols*$numRows + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+  }
+
+  print  "};\n\n";
+}
+
+
+
+#--------------------------------------------------------------------------
+# Matmul
+#--------------------------------------------------------------------------
+
+# http://answers.oreilly.com/topic/418-how-to-multiply-matrices-in-perl/
+
+sub mmult {
+    my ($m1,$m2) = @_;
+    my ($m1rows,$m1cols) = matdim($m1);
+    my ($m2rows,$m2cols) = matdim($m2);
+
+    my $result = [  ];
+    my ($i, $j, $k);
+
+    for $i (range($m1rows)) {
+        for $j (range($m2cols)) {
+            for $k (range($m1cols)) {
+                $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j];
+            }
+        }
+    }
+    return $result;
+}
+
+sub range { 0 .. ($_[0] - 1) }
+
+
+sub veclen {
+    my $ary_ref = $_[0];
+    my $type = ref $ary_ref;
+    if ($type ne "ARRAY") { die "$type is bad array ref for $ary_ref" }
+    return scalar(@$ary_ref);
+}
+
+sub matdim {
+    my $matrix = $_[0];
+    my $rows = veclen($matrix);
+    my $cols = veclen($matrix->[0]);
+    return ($rows, $cols);
+}
+
+#--------------------------------------------------------------------------
+# Main
+#--------------------------------------------------------------------------
+
+sub main()
+{
+
+  processCommandLine();
+  srand($opts{"seed"});
+
+  # create random input arrays
+  my $mat_values1;
+  my $mat_values2;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
+      $mat_values1->[$i][$j] = int(rand(4));
+      $mat_values2->[$i][$j] = int(rand(4));
+    }
+  }
+
+  # perform matmul
+  my $mat_results = mmult( $mat_values1, $mat_values2 );
+  
+  # translate 2d arrays to 1d-somethings (I don't know how to code in perl - Chris)
+  my @values1;
+  my @values2;
+  my @results;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    for ( my $j = 0; $j < $opts{"size"}; $j++ ) {
+    my $value1 = $mat_values1->[$i][$j];
+    my $value2 = $mat_values2->[$i][$j];
+    my $result = $mat_results->[$i][$j];
+    push( @values1, $value1 );
+    push( @values2, $value2 );
+    push( @results, $result );
+    }
+  }
+
+  print "\n#ifndef __DATASET_H";
+  print "\n#define __DATASET_H";
+  print "\n\#define ARRAY_SIZE ".($opts{"size"}*$opts{"size"})." \n\n";
+  print "\n\#define DIM_SIZE ".$opts{"size"}." \n\n";
+  print "\ntypedef int data_t;";
+   
+  printArray( "input1_data", \@values1 );
+  printArray( "input2_data", \@values2 );
+  printArray( "verify_data", \@results);
+
+  print "\n#endif //__DATASET_H";
+ 
+}
+
+main();
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/mt-matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-matmul/mt-matmul.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Multi-threaded Matrix Multiply benchmark
+//--------------------------------------------------------------------------
+// TA     : Christopher Celio
+// Student: 
+//
+//
+// This benchmark multiplies two 2-D arrays together and writes the results to
+// a third vector. The input data (and reference data) should be generated
+// using the matmul_gendata.pl perl script and dumped to a file named
+// dataset.h. 
+
+//--------------------------------------------------------------------------
+// Includes 
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#include "dataset.h"
+ 
+
+//--------------------------------------------------------------------------
+// Basic Utilities and Multi-thread Support
+
+#include "util.h"
+
+   
+//--------------------------------------------------------------------------
+// matmul function
+ 
+extern void matmul(const size_t coreid, const size_t ncores, const size_t lda,  const data_t A[], const data_t B[], data_t C[] );
+
+
+//--------------------------------------------------------------------------
+// Main
+//
+// all threads start executing thread_entry(). Use their "coreid" to
+// differentiate between threads (each thread is running on a separate core).
+  
+void thread_entry(int cid, int nc)
+{
+   static data_t results_data[ARRAY_SIZE];
+
+   stats(matmul(cid, nc, DIM_SIZE, input1_data, input2_data, results_data); barrier(nc), DIM_SIZE/DIM_SIZE/DIM_SIZE);
+ 
+   int res = verify(ARRAY_SIZE, results_data, verify_data);
+
+   exit(res);
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/dataset.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/dataset.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/mt-vvadd.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/mt-vvadd.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Vector-vector add benchmark
+//--------------------------------------------------------------------------
+// Author  : Andrew Waterman
+// TA      : Christopher Celio
+// Student : 
+//
+// This benchmark adds two vectors and writes the results to a
+// third vector. The input data (and reference data) should be
+// generated using the vvadd_gendata.pl perl script and dumped
+// to a file named dataset.h 
+
+//--------------------------------------------------------------------------
+// Includes 
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#include "dataset.h"
+ 
+  
+//--------------------------------------------------------------------------
+// Basic Utilities and Multi-thread Support
+
+#include "util.h"
+   
+ 
+//--------------------------------------------------------------------------
+// vvadd function
+
+extern void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z);
+
+
+//--------------------------------------------------------------------------
+// Main
+//
+// all threads start executing thread_entry(). Use their "coreid" to
+// differentiate between threads (each thread is running on a separate core).
+  
+void thread_entry(int cid, int nc)
+{
+   // static allocates data in the binary, which is visible to both threads
+   static data_t results_data[DATA_SIZE];
+   
+   // First do out-of-place vvadd
+   barrier(nc);
+   stats(vvadd(cid, nc, DATA_SIZE, input1_data, input2_data, results_data); barrier(nc), DATA_SIZE);
+ 
+   if(cid == 0) {
+     int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+     if(res) exit(res);
+   }
+
+   // Second do in-place vvadd
+   // Copying input
+   size_t i;
+   if(cid == 0) {
+     for (i = 0; i < DATA_SIZE; i++)
+           results_data[i] = input1_data[i];
+   }
+   barrier(nc);
+   stats(vvadd(cid, nc, DATA_SIZE, results_data, input2_data, results_data); barrier(nc), DATA_SIZE);
+ 
+   if(cid == 0) {
+     int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+     if(res) exit(res);
+   }
+   
+   barrier(nc);
+   exit(0);
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/vvadd.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/vvadd.c
+// See LICENSE for license details.
+
+#include "stdlib.h"
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// vvadd function
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+   size_t i;
+
+   // interleave accesses
+   for (i = coreid; i < n; i+=ncores)
+   {
+      z[i] = x[i] + y[i];
+   }
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/vvadd_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/mt-vvadd/vvadd_gendata.pl
+#!/usr/bin/perl -w
+#==========================================================================
+# vvadd_gendata.pl
+#
+# Author : Christopher Batten (cbatten@mit.edu)
+# Date   : April 29, 2005
+#
+(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
+#
+# Simple script which creates an input data set and the reference data
+# for the vvadd benchmark.
+#
+ENDMSG
+
+use strict "vars";
+use warnings;
+no  warnings("once");
+use Getopt::Long;
+
+#--------------------------------------------------------------------------
+# Command line processing
+#--------------------------------------------------------------------------
+
+our %opts;
+
+sub usage()
+{
+
+  print "\n";
+  print " Usage: vvadd_gendata.pl [options] \n";
+  print "\n";
+  print " Options:\n";
+  print "  --help  print this message\n";
+  print "  --size  size of input data [1000]\n";
+  print "  --seed  random seed [1]\n";
+  print "$usageMsg";
+
+  exit();
+}
+
+sub processCommandLine()
+{
+
+  $opts{"help"} = 0;
+  $opts{"size"} = 1000;
+  $opts{"seed"} = 1;
+  Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
+  $opts{"help"} and usage();
+
+}
+
+#--------------------------------------------------------------------------
+# Helper Functions
+#--------------------------------------------------------------------------
+
+sub printArray
+{
+  my $arrayName = $_[0];
+  my $arrayRef  = $_[1];
+
+  my $numCols = 20;
+  my $arrayLen = scalar(@{$arrayRef});
+
+  print "static data_t ".$arrayName."[DATA_SIZE] = \n";
+  print "{\n";
+
+  if ( $arrayLen <= $numCols ) {
+    print "  ";
+    for ( my $i = 0; $i < $arrayLen; $i++ ) {
+      print sprintf("%3.2f",$arrayRef->[$i]);
+      if ( $i != $arrayLen-1 ) {
+        print ", ";
+      }
+    }
+    print "\n";
+  }
+
+  else {
+    my $numRows = int($arrayLen/$numCols);
+    for ( my $j = 0; $j < $numRows; $j++ ) {
+      print "  ";
+      for ( my $i = 0; $i < $numCols; $i++ ) {
+        my $index = $j*$numCols + $i;
+        print sprintf("%3.2f",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+    if ( $arrayLen > ($numRows*$numCols) ) {
+      print "  ";
+      for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
+        my $index = $numCols*$numRows + $i;
+        print sprintf("%3.2f",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+  }
+
+  print  "};\n\n";
+}
+
+#--------------------------------------------------------------------------
+# Main
+#--------------------------------------------------------------------------
+
+sub main()
+{
+
+  processCommandLine();
+  srand($opts{"seed"});
+
+  my @values1;
+  my @values2;
+  my @sum;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    my $value1 = int(rand(19));
+    my $value2 = int(rand(19));
+    push( @values1, $value1 );
+    push( @values2, $value2 );
+    push( @sum, $value1 + $value2 );
+  }
+
+
+  print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
+  printArray( "input1_data", \@values1 );
+  printArray( "input2_data", \@values2 );
+  printArray( "verify_data", \@sum );
+
+}
+
+main();
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/dataset1.h
+// See LICENSE for license details.
+
+
+#define DATA_SIZE 100 
+
+int input_data1[DATA_SIZE] = 
+{
+   41, 454, 833, 335, 564,   1, 187, 989, 749, 365, 350, 572, 132,  64, 949, 153, 584, 216, 805, 140, 
+  621, 210,   6, 572, 931, 339, 890, 593, 392, 898, 694, 228, 961,  12, 110, 883, 116, 750, 296, 646, 
+  426, 500, 314, 436, 659, 701, 774, 812, 319, 981, 678, 150, 875, 696, 376, 564, 474, 272, 938, 258, 
+  539, 647, 569, 509, 203,  88, 280, 703, 759, 669, 606, 375, 511, 551, 657, 936, 195, 592,  81, 569, 
+  267, 952, 229, 800, 337, 584, 944, 643, 902, 368, 241, 489, 913, 328, 826, 313, 933, 592, 985, 388
+};
+
+int input_data2[DATA_SIZE] = 
+{
+  195, 543, 960, 649, 566, 979, 350, 997, 649, 814, 657,  79, 181, 208, 111, 998, 859, 629,  65, 847, 
+  288, 704, 349, 997, 141, 253, 905, 715, 886, 430, 264, 415, 576, 538, 979, 700, 761,   4, 241, 494, 
+  478, 100, 499, 864, 403, 693, 222, 416, 444, 296, 721, 285, 676, 620, 317,  78, 224, 351, 937, 540, 
+  288, 646, 119, 169, 615, 527, 606, 289, 389, 796, 351, 801, 455, 720, 278, 758, 367, 745, 358,  92, 
+  584, 989,  62, 271, 985, 853, 403, 788, 346, 531, 517, 222, 559, 461, 908, 241, 775, 358, 255, 332
+};
+
+int verify_data[DATA_SIZE] = 
+{
+  7995, 246522, 799680, 217415, 319224, 979, 65450, 986033, 486101, 297110, 229950, 45188, 23892, 13312, 105339, 152694, 501656, 135864, 52325, 118580, 
+  178848, 147840, 2094, 570284, 131271, 85767, 805450, 423995, 347312, 386140, 183216, 94620, 553536, 6456, 107690, 618100, 88276, 3000, 71336, 319124, 
+  203628, 50000, 156686, 376704, 265577, 485793, 171828, 337792, 141636, 290376, 488838, 42750, 591500, 431520, 119192, 43992, 106176, 95472, 878906, 139320, 
+  155232, 417962, 67711, 86021, 124845, 46376, 169680, 203167, 295251, 532524, 212706, 300375, 232505, 396720, 182646, 709488, 71565, 441040, 28998, 52348, 
+  155928, 941528, 14198, 216800, 331945, 498152, 380432, 506684, 312092, 195408, 124597, 108558, 510367, 151208, 750008, 75433, 723075, 211936, 251175, 128816
+};
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply.c
+// See LICENSE for license details.
+
+// *************************************************************************
+// multiply function (c version)
+// -------------------------------------------------------------------------
+
+int multiply( int x, int y )
+{
+
+ int i;
+ int result = 0;
+
+ for (i = 0; i < 32; i++) {
+   if ((x & 0x1) == 1)
+     result = result + y;
+       
+   x = x >> 1;
+   y = y << 1;
+ } 
+ 
+ return result;
+
+}
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply.h
+// See LICENSE for license details.
+
+//**************************************************************************
+// Software multiply function
+//--------------------------------------------------------------------------
+
+// Simple C version
+int multiply(int x, int y);
+
+// Simple assembly version
+int multiply_asm(int x, int y);
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply_gendata.pl
+#!/usr/bin/perl -w
+#==========================================================================
+# multiply_gendata.pl
+#
+# Author : Christopher Batten (cbatten@mit.edu)
+# Date   : May 9, 2005
+#
+(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
+#
+# Simple script which creates an input data set and the reference data 
+# for the multiply benchmark.
+#
+ENDMSG
+
+use strict "vars";
+use warnings;
+no  warnings("once");
+use Getopt::Long;
+
+#--------------------------------------------------------------------------
+# Command line processing
+#--------------------------------------------------------------------------
+
+our %opts;
+
+sub usage()
+{
+
+  print "\n";
+  print " Usage: multiply_gendata.pl [options] \n";
+  print "\n";
+  print " Options:\n";
+  print "  --help  print this message\n";
+  print "  --size  size of input data [750]\n";
+  print "  --seed  random seed [1]\n";
+  print "$usageMsg";
+
+  exit();
+}
+
+sub processCommandLine()
+{
+
+  $opts{"help"} = 0;
+  $opts{"size"} = 750;
+  $opts{"seed"} = 1;
+  Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
+  $opts{"help"} and usage();
+
+}
+
+#--------------------------------------------------------------------------
+# Helper Functions
+#--------------------------------------------------------------------------
+
+sub printArray
+{
+  my $arrayName = $_[0];
+  my $arrayRef  = $_[1];
+
+  my $numCols = 20;
+  my $arrayLen = scalar(@{$arrayRef});
+
+  print "int ".$arrayName."[DATA_SIZE] = \n";
+  print "{\n";
+
+  if ( $arrayLen <= $numCols ) {
+    print "  ";
+    for ( my $i = 0; $i < $arrayLen; $i++ ) {
+      print sprintf("%3d",$arrayRef->[$i]);
+      if ( $i != $arrayLen-1 ) {
+        print ", ";
+      }
+    }
+    print "\n";
+  }
+  
+  else {
+    my $numRows = int($arrayLen/$numCols);
+    for ( my $j = 0; $j < $numRows; $j++ ) {
+      print "  ";
+      for ( my $i = 0; $i < $numCols; $i++ ) {
+        my $index = $j*$numCols + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+    if ( $arrayLen > ($numRows*$numCols) ) {
+      print "  ";
+      for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
+        my $index = $numCols*$numRows + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+  }
+
+  print  "};\n\n";
+}
+
+#--------------------------------------------------------------------------
+# Main
+#--------------------------------------------------------------------------
+
+sub main()
+{
+
+  processCommandLine();
+  srand($opts{"seed"});
+  
+  my @values1;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    push( @values1, int(rand(999)) );
+  }
+  
+  my @values2;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    push( @values2, int(rand(999)) );
+  }
+
+  my @multiply;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    $multiply[$i] = $values1[$i] * $values2[$i];
+  }
+
+  print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
+  printArray( "input_data1", \@values1 );
+  printArray( "input_data2", \@values2 );
+  printArray( "verify_data", \@multiply );
+
+}
+
+main();
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/multiply/multiply_main.c
+// See LICENSE for license details.
+
+// *************************************************************************
+// multiply filter bencmark
+// -------------------------------------------------------------------------
+//
+// This benchmark tests the software multiply implemenation. The
+// input data (and reference data) should be generated using the
+// multiply_gendata.pl perl script and dumped to a file named
+// dataset1.h
+
+#include "util.h"
+
+#include "multiply.h"
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#include "dataset1.h"
+
+//--------------------------------------------------------------------------
+// Main
+
+int main( int argc, char* argv[] )
+{
+  int i;
+  int results_data[DATA_SIZE];
+
+#if PREALLOCATE
+  for (i = 0; i < DATA_SIZE; i++)
+  {
+    results_data[i] = multiply( input_data1[i], input_data2[i] );
+  }
+#endif
+
+  setStats(1);
+  for (i = 0; i < DATA_SIZE; i++)
+  {
+    results_data[i] = multiply( input_data1[i], input_data2[i] );
+  }
+  setStats(0);
+
+  // Check the results
+  return verify( DATA_SIZE, results_data, verify_data );
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/pmp/pmp.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/pmp/pmp.c
+// See LICENSE for license details.
+
+// Test of PMP functionality.
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "util.h"
+
+volatile int trap_expected;
+volatile int granule;
+
+#define INLINE inline __attribute__((always_inline))
+
+uintptr_t handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
+{
+  if (cause == CAUSE_ILLEGAL_INSTRUCTION)
+    exit(0); // no PMP support
+
+  if (!trap_expected || cause != CAUSE_LOAD_ACCESS)
+    exit(1);
+  trap_expected = 0;
+  return epc + insn_len(epc);
+}
+
+#define SCRATCH RISCV_PGSIZE
+uintptr_t scratch[RISCV_PGSIZE / sizeof(uintptr_t)] __attribute__((aligned(RISCV_PGSIZE)));
+uintptr_t l1pt[RISCV_PGSIZE / sizeof(uintptr_t)] __attribute__((aligned(RISCV_PGSIZE)));
+uintptr_t l2pt[RISCV_PGSIZE / sizeof(uintptr_t)] __attribute__((aligned(RISCV_PGSIZE)));
+#if __riscv_xlen == 64
+uintptr_t l3pt[RISCV_PGSIZE / sizeof(uintptr_t)] __attribute__((aligned(RISCV_PGSIZE)));
+#else
+#define l3pt l2pt
+#endif
+
+static void init_pt()
+{
+  l1pt[0] = ((uintptr_t)l2pt >> RISCV_PGSHIFT << PTE_PPN_SHIFT) | PTE_V;
+  l3pt[SCRATCH / RISCV_PGSIZE] = ((uintptr_t)scratch >> RISCV_PGSHIFT << PTE_PPN_SHIFT) | PTE_A | PTE_D | PTE_V | PTE_R | PTE_W;
+#if __riscv_xlen == 64
+  l2pt[0] = ((uintptr_t)l3pt >> RISCV_PGSHIFT << PTE_PPN_SHIFT) | PTE_V;
+  uintptr_t vm_choice = SATP_MODE_SV39;
+#else
+  uintptr_t vm_choice = SATP_MODE_SV32;
+#endif
+  write_csr(sptbr, ((uintptr_t)l1pt >> RISCV_PGSHIFT) |
+                   (vm_choice * (SATP_MODE & ~(SATP_MODE<<1))));
+  write_csr(pmpaddr2, -1);
+  write_csr(pmpcfg0, (PMP_NAPOT | PMP_R) << 16);
+}
+
+INLINE uintptr_t va2pa(uintptr_t va)
+{
+  if (va < SCRATCH || va >= SCRATCH + RISCV_PGSIZE)
+    exit(3);
+  return va - SCRATCH + (uintptr_t)scratch;
+}
+
+typedef struct {
+  uintptr_t cfg;
+  uintptr_t a0;
+  uintptr_t a1;
+} pmpcfg_t;
+
+INLINE int pmp_ok(pmpcfg_t p, uintptr_t addr, uintptr_t size)
+{
+  if ((p.cfg & PMP_A) == 0)
+    return 1;
+
+  if ((p.cfg & PMP_A) != PMP_TOR) {
+    uintptr_t range = 1;
+
+    if ((p.cfg & PMP_A) == PMP_NAPOT) {
+      range <<= 1;
+      for (uintptr_t i = 1; i; i <<= 1) {
+        if ((p.a1 & i) == 0)
+          break;
+        p.a1 &= ~i;
+        range <<= 1;
+      }
+    }
+
+    p.a0 = p.a1;
+    p.a1 = p.a0 + range;
+  }
+
+  p.a0 *= granule;
+  p.a1 *= granule;
+  addr = va2pa(addr);
+
+  uintptr_t hits = 0;
+  for (uintptr_t i = 0; i < size; i += granule) {
+    if (p.a0 <= addr + i && addr + i < p.a1)
+      hits += granule;
+  }
+
+  return hits == 0 || hits >= size;
+}
+
+INLINE void test_one(uintptr_t addr, uintptr_t size)
+{
+  uintptr_t new_mstatus = (read_csr(mstatus) & ~MSTATUS_MPP) | (MSTATUS_MPP & (MSTATUS_MPP >> 1)) | MSTATUS_MPRV;
+  switch (size) {
+    case 1: asm volatile ("csrrw %0, mstatus, %0; lb x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
+    case 2: asm volatile ("csrrw %0, mstatus, %0; lh x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
+    case 4: asm volatile ("csrrw %0, mstatus, %0; lw x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
+#if __riscv_xlen >= 64
+    case 8: asm volatile ("csrrw %0, mstatus, %0; ld x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
+#endif
+    default: __builtin_unreachable();
+  }
+}
+
+INLINE void test_all_sizes(pmpcfg_t p, uintptr_t addr)
+{
+  for (size_t size = 1; size <= sizeof(uintptr_t); size *= 2) {
+    if (addr & (size - 1))
+      continue;
+    trap_expected = !pmp_ok(p, addr, size);
+    test_one(addr, size);
+    if (trap_expected)
+      exit(2);
+  }
+}
+
+INLINE void test_range_once(pmpcfg_t p, uintptr_t base, uintptr_t range)
+{
+  for (uintptr_t addr = base; addr < base + range; addr += granule)
+    test_all_sizes(p, addr);
+}
+
+INLINE pmpcfg_t set_pmp(pmpcfg_t p)
+{
+  uintptr_t cfg0 = read_csr(pmpcfg0);
+  write_csr(pmpcfg0, cfg0 & ~0xff00);
+  write_csr(pmpaddr0, p.a0);
+  write_csr(pmpaddr1, p.a1);
+  write_csr(pmpcfg0, ((p.cfg << 8) & 0xff00) | (cfg0 & ~0xff00));
+  asm volatile ("sfence.vma" ::: "memory");
+  return p;
+}
+
+INLINE pmpcfg_t set_pmp_range(uintptr_t base, uintptr_t range)
+{
+  pmpcfg_t p;
+  p.cfg = PMP_TOR | PMP_R;
+  p.a0 = base >> PMP_SHIFT;
+  p.a1 = (base + range) >> PMP_SHIFT;
+  return set_pmp(p);
+}
+
+INLINE pmpcfg_t set_pmp_napot(uintptr_t base, uintptr_t range)
+{
+  pmpcfg_t p;
+  p.cfg = PMP_R | (range > granule ? PMP_NAPOT : PMP_NA4);
+  p.a0 = 0;
+  p.a1 = (base + (range/2 - 1)) >> PMP_SHIFT;
+  return set_pmp(p);
+}
+
+static void test_range(uintptr_t addr, uintptr_t range)
+{
+  pmpcfg_t p = set_pmp_range(va2pa(addr), range);
+  test_range_once(p, addr, range);
+
+  if ((range & (range - 1)) == 0 && (addr & (range - 1)) == 0) {
+    p = set_pmp_napot(va2pa(addr), range);
+    test_range_once(p, addr, range);
+  }
+}
+
+static void test_ranges(uintptr_t addr, uintptr_t size)
+{
+  for (uintptr_t range = granule; range <= size; range += granule)
+    test_range(addr, range);
+}
+
+static void exhaustive_test(uintptr_t addr, uintptr_t size)
+{
+  for (uintptr_t base = addr; base < addr + size; base += granule)
+    test_ranges(base, size - (base - addr));
+}
+
+static void detect_granule()
+{
+  write_csr(pmpcfg0, NULL);
+  write_csr(pmpaddr0, 0xffffffffffffffffULL);
+  uintptr_t ret = read_csr(pmpaddr0);
+  int g = 2;
+  for(uintptr_t i = 1; i; i<<=1) {
+    if((ret & i) != 0) 
+      break;
+    g++;
+  }
+  granule = 1UL << g;
+}
+
+int main()
+{
+  detect_granule();
+  init_pt();
+
+  const int max_exhaustive = 32;
+  exhaustive_test(SCRATCH, max_exhaustive);
+  exhaustive_test(SCRATCH + RISCV_PGSIZE - max_exhaustive, max_exhaustive);
+
+  test_range(SCRATCH, RISCV_PGSIZE);
+  test_range(SCRATCH, RISCV_PGSIZE / 2);
+  test_range(SCRATCH + RISCV_PGSIZE / 2, RISCV_PGSIZE / 2);
+
+  return 0;
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/dataset1.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/qsort_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/qsort_gendata.pl
+#!/usr/bin/perl -w
+#==========================================================================
+# qsort_gendata.pl
+#
+# Author : Christopher Batten (cbatten@mit.edu)
+# Date   : April 29, 2005
+#
+(our $usageMsg = <<'ENDMSG') =~ s/^\#//gm;
+#
+# Simple script which creates an input data set and the reference data
+# for the qsort benchmark.
+#
+ENDMSG
+
+use strict "vars";
+use warnings;
+no  warnings("once");
+use Getopt::Long;
+
+#--------------------------------------------------------------------------
+# Command line processing
+#--------------------------------------------------------------------------
+
+our %opts;
+
+sub usage()
+{
+
+  print "\n";
+  print " Usage: qsort_gendata.pl [options] \n";
+  print "\n";
+  print " Options:\n";
+  print "  --help  print this message\n";
+  print "  --size  size of input data [250]\n";
+  print "  --seed  random seed [1]\n";
+  print "$usageMsg";
+
+  exit();
+}
+
+sub processCommandLine()
+{
+
+  $opts{"help"} = 0;
+  $opts{"size"} = 250;
+  $opts{"seed"} = 1;
+  Getopt::Long::GetOptions( \%opts, 'help|?', 'size:i', 'seed:i' ) or usage();
+  $opts{"help"} and usage();
+
+}
+
+#--------------------------------------------------------------------------
+# Helper Functions
+#--------------------------------------------------------------------------
+
+sub printArray
+{
+  my $arrayName = $_[0];
+  my $arrayRef  = $_[1];
+
+  my $numCols = 20;
+  my $arrayLen = scalar(@{$arrayRef});
+
+  print "type ".$arrayName."[DATA_SIZE] = \n";
+  print "{\n";
+
+  if ( $arrayLen <= $numCols ) {
+    print "  ";
+    for ( my $i = 0; $i < $arrayLen; $i++ ) {
+      print sprintf("%3d",$arrayRef->[$i]);
+      if ( $i != $arrayLen-1 ) {
+        print ", ";
+      }
+    }
+    print "\n";
+  }
+
+  else {
+    my $numRows = int($arrayLen/$numCols);
+    for ( my $j = 0; $j < $numRows; $j++ ) {
+      print "  ";
+      for ( my $i = 0; $i < $numCols; $i++ ) {
+        my $index = $j*$numCols + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+    if ( $arrayLen > ($numRows*$numCols) ) {
+      print "  ";
+      for ( my $i = 0; $i < ($arrayLen-($numRows*$numCols)); $i++ ) {
+        my $index = $numCols*$numRows + $i;
+        print sprintf("%3d",$arrayRef->[$index]);
+        if ( $index != $arrayLen-1 ) {
+          print ", ";
+        }
+      }
+      print "\n";
+    }
+
+  }
+
+  print  "};\n\n";
+}
+
+#--------------------------------------------------------------------------
+# Main
+#--------------------------------------------------------------------------
+
+sub main()
+{
+
+  processCommandLine();
+  srand($opts{"seed"});
+
+  my @values;
+  for ( my $i = 0; $i < $opts{"size"}; $i++ ) {
+    push( @values, int(rand((1<<31)-1)) );
+  }
+  my @sorted = sort { $a <=> $b } @values;
+
+  print "\n\#define DATA_SIZE ".$opts{"size"}." \n\n";
+  printArray( "input_data", \@values );
+  printArray( "verify_data", \@sorted );
+
+}
+
+main();
+
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/qsort_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/qsort/qsort_main.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Quicksort benchmark
+//--------------------------------------------------------------------------
+//
+// This benchmark uses quicksort to sort an array of integers. The
+// implementation is largely adapted from Numerical Recipes for C. The
+// input data (and reference data) should be generated using the
+// qsort_gendata.pl perl script and dumped to a file named
+// dataset1.h.
+
+#include "util.h"
+#include <string.h>
+#include <assert.h>
+
+// The INSERTION_THRESHOLD is the size of the subarray when the
+// algorithm switches to using an insertion sort instead of
+// quick sort.
+
+#define INSERTION_THRESHOLD 10
+
+// NSTACK is the required auxiliary storage.
+// It must be at least 2*lg(DATA_SIZE)
+
+#define NSTACK 50
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#define type int
+#include "dataset1.h"
+
+// Swap macro for swapping two values.
+
+#define SWAP(a,b) do { typeof(a) temp=(a);(a)=(b);(b)=temp; } while (0)
+#define SWAP_IF_GREATER(a, b) do { if ((a) > (b)) SWAP(a, b); } while (0)
+
+//--------------------------------------------------------------------------
+// Quicksort function
+
+static void insertion_sort(size_t n, type arr[])
+{
+  type *i, *j;
+  type value;
+  for (i = arr+1; i < arr+n; i++)
+  {
+    value = *i;
+    j = i;
+    while (value < *(j-1))
+    {
+      *j = *(j-1);
+      if (--j == arr)
+        break;
+    }
+    *j = value;
+  }
+}
+
+static void selection_sort(size_t n, type arr[])
+{
+  for (type* i = arr; i < arr+n-1; i++)
+    for (type* j = i+1; j < arr+n; j++)
+      SWAP_IF_GREATER(*i, *j);
+}
+
+void sort(size_t n, type arr[])
+{
+  type* ir = arr+n;
+  type* l = arr+1;
+  type* stack[NSTACK];
+  type** stackp = stack;
+
+  for (;;)
+  {
+    // Insertion sort when subarray small enough.
+    if ( ir-l < INSERTION_THRESHOLD )
+    {
+      insertion_sort(ir - l + 1, l - 1);
+
+      if ( stackp == stack ) break;
+
+      // Pop stack and begin a new round of partitioning.
+      ir = *stackp--;
+      l = *stackp--;
+    }
+    else
+    {
+      // Choose median of left, center, and right elements as
+      // partitioning element a. Also rearrange so that a[l-1] <= a[l] <= a[ir-].
+      SWAP(arr[((l-arr) + (ir-arr))/2-1], l[0]);
+      SWAP_IF_GREATER(l[-1], ir[-1]);
+      SWAP_IF_GREATER(l[0], ir[-1]);
+      SWAP_IF_GREATER(l[-1], l[0]);
+
+      // Initialize pointers for partitioning.
+      type* i = l+1;
+      type* j = ir;
+
+      // Partitioning element.
+      type a = l[0];
+
+      for (;;) {                    // Beginning of innermost loop.
+        while (*i++ < a);           // Scan up to find element > a.
+        while (*(j-- - 2) > a);     // Scan down to find element < a.
+        if (j < i) break;           // Pointers crossed. Partitioning complete.
+        SWAP(i[-1], j[-1]);         // Exchange elements.
+      }                             // End of innermost loop.
+
+      // Insert partitioning element.
+      l[0] = j[-1];
+      j[-1] = a;
+      stackp += 2;
+
+      // Push pointers to larger subarray on stack,
+      // process smaller subarray immediately.
+
+      if ( ir-i+1 >= j-l )
+      {
+        stackp[0] = ir;
+        stackp[-1] = i;
+        ir = j-1;
+      }
+      else
+      {
+        stackp[0] = j-1;
+        stackp[-1] = l;
+        l = i;
+      }
+    }
+  }
+}
+
+//--------------------------------------------------------------------------
+// Main
+
+int main( int argc, char* argv[] )
+{
+#if PREALLOCATE
+  // If needed we preallocate everything in the caches
+  sort(DATA_SIZE, verify_data);
+  if (verify(DATA_SIZE, input_data, input_data))
+    return 1;
+#endif
+
+  // Do the sort
+  setStats(1);
+  sort( DATA_SIZE, input_data );
+  setStats(0);
+
+  // Check the results
+  return verify( DATA_SIZE, input_data, verify_data );
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/readme.txt
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/readme.txt
+*************************************************************************
+Benchmarks for RISCV Processor
+-------------------------------------------------------------------------
+
+The benchmarks make use of the RISCV C compiler toolchain. You will need
+to include a bmark.mk makefile fragment in each benchmark directory. The
+fragment should include the object files and a rule to actually link
+these object files into an executable. There are a couple important
+points to make about the toolchain.
+
+ + The toolchain sets the stack pointer to memory address 0x20000 so your
+   main memory _must_ be larger than 0x20000 bytes or else the stack will
+   get wrapped around and overwrite program data.
+
+ + The stack grows down from 0x20000 and your program is loaded at 0x1000.
+   If you have a very large program and have lots of very big arrays
+   declared on the stack your stack could overwrite your program. Be aware.
+
+ + You cannot use standard clib functions (like memcopy or strcat). You
+   cannot use system calls and thus cannot use printf.
+
+ + You cannot access the simulated command line - ie you cannot use argc
+   and argv within main.
+
+ + You may have to increase the timeout check in your test harness to
+   allow longer programs to run (you can do this from the command line
+   option +max-cycles with the standard test harness)
+
+ + The compiler loads the program at 0x1000. It does not insert exception
+   setup code. So if you are careful with what C you use it will only
+   generate code in the riscv lab1 subset. If you use multiplies, shorts,
+   and chars it could generate mul, lh, and lb instructions. Be aware.
+
+ + You can write assembly in C - you need to do this to write tohost to 1
+   to indicate when the benchmark is done. Look at the example
+   benchmarks to see how this is done. You can find more information
+   about how to write assembly in C here:
+   http://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
+
+ + Debugging C compiled code on the RISCV processor is a real pain. It is
+   hard to associate the assembly with the C code and there is no
+   debugger. So if you encounter a bug in your processor when running a C
+   benchmark you can try to debug it, but you might have better luck
+   adding more assembly tests to your test suite.
+
+ + To avoid having the compiler try and use a global pointer (ie using
+   register 28 to point to a space where small global variables are
+   stored) you need to use the -G 0 command line option.
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/rsort/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/rsort/dataset1.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/rsort/rsort.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/rsort/rsort.c
+// See LICENSE for license details.
+
+//**************************************************************************
+// Quicksort benchmark
+//--------------------------------------------------------------------------
+//
+// This benchmark uses quicksort to sort an array of integers. The
+// implementation is largely adapted from Numerical Recipes for C. The
+// input data (and reference data) should be generated using the
+// qsort_gendata.pl perl script and dumped to a file named
+// dataset1.h
+
+#include "util.h"
+#include <string.h>
+#include <limits.h>
+
+//--------------------------------------------------------------------------
+// Input/Reference Data
+
+#define type unsigned int
+#include "dataset1.h"
+
+#define LOG_BASE 8
+#define BASE (1 << LOG_BASE)
+
+#if 0
+# define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc)
+#else
+# define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc))
+#endif
+
+void sort(size_t n, type* arrIn, type* scratchIn)
+{
+  size_t log_exp = 0;
+  size_t buckets[BASE];
+  size_t *bucket = buckets;
+  asm("":"+r"(bucket));
+  type *arr = arrIn, *scratch = scratchIn, *p;
+  size_t *b;
+ 
+  while (log_exp < CHAR_BIT * sizeof(type))
+  {
+    for (b = bucket; b < bucket + BASE; b++)
+      *b = 0;
+
+    for (p = arr; p < &arr[n-3]; p += 4)
+    {
+      type a0 = p[0];
+      type a1 = p[1];
+      type a2 = p[2];
+      type a3 = p[3];
+      fetch_add(&bucket[(a0 >> log_exp) % BASE], 1);
+      fetch_add(&bucket[(a1 >> log_exp) % BASE], 1);
+      fetch_add(&bucket[(a2 >> log_exp) % BASE], 1);
+      fetch_add(&bucket[(a3 >> log_exp) % BASE], 1);
+    }
+    for ( ; p < &arr[n]; p++)
+      bucket[(*p >> log_exp) % BASE]++;
+
+    size_t prev = bucket[0];
+    prev += fetch_add(&bucket[1], prev);
+    for (b = &bucket[2]; b < bucket + BASE; b += 2)
+    {
+      prev += fetch_add(&b[0], prev);
+      prev += fetch_add(&b[1], prev);
+    }
+    static_assert(BASE % 2 == 0);
+
+    for (p = &arr[n-1]; p >= &arr[3]; p -= 4)
+    {
+      type a0 = p[-0];
+      type a1 = p[-1];
+      type a2 = p[-2];
+      type a3 = p[-3];
+      size_t* pb0 = &bucket[(a0 >> log_exp) % BASE];
+      size_t* pb1 = &bucket[(a1 >> log_exp) % BASE];
+      size_t* pb2 = &bucket[(a2 >> log_exp) % BASE];
+      size_t* pb3 = &bucket[(a3 >> log_exp) % BASE];
+      type* s0 = scratch + fetch_add(pb0, -1);
+      type* s1 = scratch + fetch_add(pb1, -1);
+      type* s2 = scratch + fetch_add(pb2, -1);
+      type* s3 = scratch + fetch_add(pb3, -1);
+      s0[-1] = a0;
+      s1[-1] = a1;
+      s2[-1] = a2;
+      s3[-1] = a3;
+    }
+    for ( ; p >= &arr[0]; p--)
+      scratch[--bucket[(*p >> log_exp) % BASE]] = *p;
+
+    type* tmp = arr;
+    arr = scratch;
+    scratch = tmp;
+
+    log_exp += LOG_BASE;
+  }
+  if (arr != arrIn)
+    memcpy(arr, scratch, n*sizeof(type));
+}
+
+//--------------------------------------------------------------------------
+// Main
+
+int main( int argc, char* argv[] )
+{
+  static type scratch[DATA_SIZE];
+
+#if PREALLOCATE
+  // If needed we preallocate everything in the caches
+  sort(DATA_SIZE, verify_data, scratch);
+  if (verify(DATA_SIZE, input_data, input_data))
+    return 1;
+#endif
+
+  // Do the sort
+  setStats(1);
+  sort(DATA_SIZE, input_data, scratch);
+  setStats(0);
+
+  // Check the results
+  return verify( DATA_SIZE, input_data, verify_data );
+}
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/dataset1.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/spmv_gendata.scala
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/spmv_gendata.scala
+#!/usr/bin/env scala
+!#
+
+val m = args(0).toInt
+val n = args(1).toInt
+val approx_nnz = args(2).toInt
+
+val pnnz = approx_nnz.toDouble/(m*n)
+val idx = collection.mutable.ArrayBuffer[Int]()
+val p = collection.mutable.ArrayBuffer(0)
+
+for (i <- 0 until m) {
+  for (j <- 0 until n) {
+    if (util.Random.nextDouble < pnnz)
+      idx += j
+  }
+  p += idx.length
+}
+
+val nnz = idx.length
+val v = Array.tabulate(n)(i => util.Random.nextInt(1000))
+val d = Array.tabulate(nnz)(i => util.Random.nextInt(1000))
+
+def printVec(t: String, name: String, data: Seq[Int]) = {
+  println("const " + t + " " + name + "[" + data.length + "] = {")
+  println("  "+data.map(_.toString).reduceLeft(_+",\n  "+_))
+  println("};")
+}
+
+def spmv(p: Seq[Int], d: Seq[Int], idx: Seq[Int], v: Seq[Int]) = {
+  val y = collection.mutable.ArrayBuffer[Int]()
+  for (i <- 0 until p.length-1) {
+    var yi = 0
+    for (k <- p(i) until p(i+1))
+      yi = yi + d(k)*v(idx(k))
+    y += yi
+  }
+  y
+}
+
+println("#define R " + m)
+println("#define C " + n)
+println("#define NNZ " + nnz)
+printVec("double", "val", d)
+printVec("int", "idx", idx)
+printVec("double", "x", v)
+printVec("int", "ptr", p)
+printVec("double", "verify_data", spmv(p, d, idx, v))
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/spmv_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/spmv/spmv_main.c
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/towers/towers_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/towers/towers_main.c
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/dataset1-large.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/dataset1-large.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/dataset1.h
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/dataset1.h
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/vvadd_gendata.pl
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/vvadd_gendata.pl
--- a/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/vvadd_main.c
+++ b/examples/hdl4se_riscv/riscv-tests/benchmarks/vvadd/vvadd_main.c
--- a/examples/hdl4se_riscv/riscv-tests/configure
+++ b/examples/hdl4se_riscv/riscv-tests/configure
--- a/examples/hdl4se_riscv/riscv-tests/configure.ac
+++ b/examples/hdl4se_riscv/riscv-tests/configure.ac
--- a/examples/hdl4se_riscv/riscv-tests/isa/.gitignore
+++ b/examples/hdl4se_riscv/riscv-tests/isa/.gitignore
+rv*-*
--- a/examples/hdl4se_riscv/riscv-tests/isa/Makefile
+++ b/examples/hdl4se_riscv/riscv-tests/isa/Makefile
--- a/examples/hdl4se_riscv/riscv-tests/isa/macros/scalar/test_macros.h
+++ b/examples/hdl4se_riscv/riscv-tests/isa/macros/scalar/test_macros.h
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/breakpoint.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/breakpoint.S
+# See LICENSE for license details.
+
+#include "riscv_test.h"
+#undef RVTEST_RV64M
+#define RVTEST_RV64M RVTEST_RV32M
+#define __MACHINE_MODE
+
+#include "../rv64mi/breakpoint.S"
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/csr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/csr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/illegal.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/illegal.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/ma_addr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/ma_addr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/ma_fetch.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/ma_fetch.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/mcsr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/mcsr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/sbreak.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/sbreak.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/scall.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/scall.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/shamt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32mi/shamt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/csr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/csr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/dirty.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/dirty.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/ma_fetch.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/ma_fetch.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/sbreak.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/sbreak.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/scall.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/scall.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32si/wfi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32si/wfi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoadd_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoadd_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoand_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoand_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomax_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomax_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomaxu_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomaxu_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomin_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amomin_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amominu_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amominu_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoor_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoor_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoswap_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoswap_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoxor_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/amoxor_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/lrsc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ua/lrsc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uc/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uc/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uc/rvc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uc/rvc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ud/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uf/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/add.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/add.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/addi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/addi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/and.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/and.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/andi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/andi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/auipc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/auipc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/beq.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/beq.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bge.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bge.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bgeu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bgeu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/blt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/blt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bltu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bltu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bne.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/bne.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/fence_i.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/fence_i.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/jal.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/jal.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/jalr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/jalr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lb.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lb.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lbu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lbu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lhu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lhu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lui.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lui.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/lw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/or.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/or.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/ori.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/ori.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sb.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sb.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/simple.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/simple.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sll.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sll.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slli.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slli.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slti.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/slti.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sltiu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sltiu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sltu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sltu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sra.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sra.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srai.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srai.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srl.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srl.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srli.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/srli.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sub.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sub.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/sw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/xor.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/xor.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/xori.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32ui/xori.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/div.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/div.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/divu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/divu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mul.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mul.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulhsu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulhsu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulhu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/mulhu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/rem.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/rem.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32um/remu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32um/remu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv32uzfh/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/access.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/access.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/breakpoint.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/breakpoint.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/csr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/csr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/illegal.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/illegal.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/ma_addr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/ma_addr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/ma_fetch.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/ma_fetch.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/mcsr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/mcsr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/sbreak.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/sbreak.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/scall.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64mi/scall.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/csr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/csr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/dirty.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/dirty.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/icache-alias.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/icache-alias.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/ma_fetch.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/ma_fetch.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/sbreak.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/sbreak.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/scall.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/scall.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64si/wfi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64si/wfi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ssvnapot/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ssvnapot/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ssvnapot/napot.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ssvnapot/napot.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoadd_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoadd_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoadd_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoadd_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoand_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoand_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoand_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoand_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomax_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomax_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomax_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomax_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomaxu_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomaxu_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomaxu_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomaxu_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomin_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomin_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomin_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amomin_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amominu_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amominu_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amominu_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amominu_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoor_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoor_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoor_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoor_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoswap_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoswap_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoswap_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoswap_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoxor_d.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoxor_d.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoxor_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/amoxor_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/lrsc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ua/lrsc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uc/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uc/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uc/rvc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uc/rvc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/structural.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ud/structural.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uf/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/add.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/add.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addiw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addiw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/addw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/and.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/and.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/andi.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/andi.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/auipc.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/auipc.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/beq.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/beq.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bge.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bge.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bgeu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bgeu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/blt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/blt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bltu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bltu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bne.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/bne.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/fence_i.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/fence_i.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/jal.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/jal.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/jalr.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/jalr.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lb.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lb.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lbu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lbu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/ld.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/ld.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lhu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lhu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lui.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lui.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lwu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/lwu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/or.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/or.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/ori.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/ori.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sb.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sb.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/simple.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/simple.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sll.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sll.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slli.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slli.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slliw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slliw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sllw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sllw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slti.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/slti.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sltiu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sltiu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sltu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sltu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sra.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sra.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srai.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srai.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sraiw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sraiw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sraw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sraw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srl.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srl.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srli.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srli.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srliw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srliw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srlw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/srlw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sub.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sub.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/subw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/subw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/sw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/xor.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/xor.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/xori.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64ui/xori.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/div.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/div.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divuw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divuw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/divw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mul.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mul.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulh.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulh.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulhsu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulhsu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulhu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulhu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/mulw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/rem.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/rem.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remu.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remu.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remuw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remuw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remw.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64um/remw.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/Makefrag
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/Makefrag
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fclass.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fclass.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcmp.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcmp.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcvt.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcvt.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcvt_w.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fcvt_w.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fdiv.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fdiv.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fmadd.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fmadd.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fmin.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/fmin.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/ldst.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/ldst.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/move.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/move.S
--- a/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/recoding.S
+++ b/examples/hdl4se_riscv/riscv-tests/isa/rv64uzfh/recoding.S
--- a/examples/hdl4se_riscv/riscv-tests/mt/.gitignore
+++ b/examples/hdl4se_riscv/riscv-tests/mt/.gitignore
--- a/examples/hdl4se_riscv/riscv-tests/mt/Makefile
+++ b/examples/hdl4se_riscv/riscv-tests/mt/Makefile
--- a/examples/hdl4se_riscv/riscv-tests/mt/ad_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ad_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ae_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ae_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/af_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/af_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ag_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ag_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ai_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ai_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ak_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ak_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/al_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/al_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/am_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/am_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/an_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/an_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ap_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ap_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/aq_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/aq_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ar_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ar_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/at_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/at_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/av_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/av_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ay_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ay_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/az_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/az_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bb_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bb_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bc_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bc_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bf_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bf_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bh_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bh_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bj_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bj_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bk_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bk_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bm_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bm_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bo_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bo_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/br_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/br_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/bs_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/bs_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ce_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ce_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cf_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cf_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cg_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cg_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ci_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ci_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ck_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ck_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cl_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cl_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cm_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cm_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cs_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cs_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cv_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cv_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/cy_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/cy_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/dc_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/dc_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/df_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/df_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/dm_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/dm_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/do_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/do_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/dr_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/dr_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/ds_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/ds_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/du_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/du_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/dv_matmul.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/dv_matmul.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/vvadd0.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/vvadd0.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/vvadd1.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/vvadd1.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/vvadd2.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/vvadd2.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/vvadd3.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/vvadd3.c
--- a/examples/hdl4se_riscv/riscv-tests/mt/vvadd4.c
+++ b/examples/hdl4se_riscv/riscv-tests/mt/vvadd4.c
--- a/examples/hdl4se_riscv/test_code/console.c
+++ b/examples/hdl4se_riscv/test_code/console.c
--- a/examples/hdl4se_riscv/test_code/console.s
+++ b/examples/hdl4se_riscv/test_code/console.s
--- a/examples/hdl4se_riscv/test_code/console.sh
+++ b/examples/hdl4se_riscv/test_code/console.sh
--- a/examples/hdl4se_riscv/test_code/csr.S
+++ b/examples/hdl4se_riscv/test_code/csr.S
--- a/examples/hdl4se_riscv/test_code/test.cod
+++ b/examples/hdl4se_riscv/test_code/test.cod
--- a/examples/hdl4se_riscv/test_code/test.elf
+++ b/examples/hdl4se_riscv/test_code/test.elf
--- a/examples/hdl4se_riscv/test_code/test.hex
+++ b/examples/hdl4se_riscv/test_code/test.hex
--- a/examples/hdl4se_riscv/test_code/test.info
+++ b/examples/hdl4se_riscv/test_code/test.info
--- a/examples/hdl4se_riscv/test_code/test.mif
+++ b/examples/hdl4se_riscv/test_code/test.mif
--- a/examples/hdl4se_riscv/test_code/test.txt
+++ b/examples/hdl4se_riscv/test_code/test.txt