|
Message
From: cvs at opencores.org<cvs@o...>
Date: Sat Apr 14 20:38:11 CEST 2007
Subject: [cvs-checkins] MODIFIED: jop ...
Date: 00/07/04 14:20:38 Modified: jop/vhdl/memory mem_sc.vhd Log: Hardware implementation of iaload and iastore Revision Changes Path 1.7 jop/vhdl/memory/mem_sc.vhd http://www.opencores.org/cvsweb.shtml/jop/vhdl/memory/mem_sc.vhd.diff?r1=1.6&r2=1.7 (In the diff below, changes in quantity of whitespace are not shown.) Index: mem_sc.vhd =================================================================== RCS file: /cvsroot/martin/jop/vhdl/memory/mem_sc.vhd,v retrieving revision 1.6 retrieving revision 1.7 diff -u -b -r1.6 -r1.7 --- mem_sc.vhd 13 Apr 2007 17:17:22 -0000 1.6 +++ mem_sc.vhd 14 Apr 2007 18:38:11 -0000 1.7 @@ -13,6 +13,7 @@ -- 2006-06-15 removed unnecessary state in BC load -- len decrement in bc_rn and exit from bc_wr -- 2007-04-13 Changed memory connection to records +-- 2007-04-14 xaload and xastore in hardware -- Library IEEE; @@ -31,7 +32,13 @@ clk, reset : in std_logic; - din : in std_logic_vector(31 downto 0); + ain : in std_logic_vector(31 downto 0); -- TOS + bin : in std_logic_vector(31 downto 0); -- NOS + +-- exceptions + + np_exc : out std_logic; + ab_exc : out std_logic; -- extension connection mem_in : in mem_in_type; @@ -103,16 +110,41 @@ -- type state_type is ( idl, rd1, wr1, - bc_cc, bc_r1, bc_w, bc_rn, bc_wr, bc_wl + bc_cc, bc_r1, bc_w, bc_rn, bc_wr, bc_wl, + iald0, iald1, iald2, iald3, iald4, + iasrd, ialrb, + iast0, iaswb, iasrb, iasst, iasw, + npexc, abexc, excw ); signal state : state_type; signal next_state : state_type; - signal mem_wr_addr : std_logic_vector(MEM_ADDR_SIZE-1 downto 0); + -- length should be 'real' RAM size and not RAM + Flash + NAND + -- should also be considered in the cacheable range + + -- addr_reg used to 'store' the address for wr, bc load, and array access + signal addr_reg : unsigned(MEM_ADDR_SIZE-1 downto 0); + + -- MUX for SimpCon address and write data signal ram_addr : std_logic_vector(MEM_ADDR_SIZE-1 downto 0); + signal ram_wr_data : std_logic_vector(31 downto 0); + + signal bcl_arr_bsy : std_logic; + + +-- +-- signals for array access +-- + signal index : std_logic_vector(MEM_ADDR_SIZE-1 downto 0); -- array index + signal addr_calc : unsigned(MEM_ADDR_SIZE-1 downto 0); -- adder + signal value : std_logic_vector(31 downto 0); -- store value - signal bcl_bsy : std_logic; + signal iastore_nxt : std_logic; + signal was_a_store : std_logic; + signal arr_wr : std_logic; + signal null_pointer : std_logic; + signal bounds_error : std_logic; -- -- values for bytecode read/cache @@ -121,14 +153,13 @@ -- start is address in external memory (rest of the word) -- signal bc_len : unsigned(jpc_width-3 downto 0); -- length of method in words - signal bc_mem_start : unsigned(17 downto 0); -- memory address of bytecode - signal inc_mem_start : std_logic; + signal inc_addr_reg : std_logic; signal dec_len : std_logic; signal bc_wr_addr : unsigned(jpc_width-3 downto 0); -- address for jbc (in words!) signal bc_wr_data : std_logic_vector(31 downto 0); -- write data for jbc signal bc_wr_ena : std_logic; - signal bc_rd : std_logic;
+ signal bc_arr_rd : std_logic;
--
-- signals for cache connection
@@ -139,10 +170,25 @@
begin
- mem_out.bsy <= '1' when sc_mem_in.rdy_cnt=3 or bcl_bsy='1' else '0';
+process(sc_mem_in, bcl_arr_bsy, state)
+begin
+ mem_out.bsy <= '0';
+ if sc_mem_in.rdy_cnt=3 then
+ mem_out.bsy <= '1';
+ else
+ if state/=ialrb and state/=iasw
+ and state/=iasst and bcl_arr_bsy='1' then
+ mem_out.bsy <= '1';
+ end if;
+ end if;
+end process;
mem_out.bcstart <= std_logic_vector(to_unsigned(0, 32-jpc_width)) & cache_bcstart & "00";
+
+ np_exc <= null_pointer;
+ ab_exc <= bounds_error;
+
-- change byte order for jbc memory (high byte first)
bc_wr_data <= sc_mem_in.rd_data(7 downto 0) &
sc_mem_in.rd_data(15 downto 8) &
@@ -152,7 +198,7 @@
cmp_cache: cache generic map (jpc_width, block_bits) port map(
clk, reset,
- std_logic_vector(bc_len), std_logic_vector(bc_mem_start),
+ std_logic_vector(bc_len), std_logic_vector(addr_reg(17 downto 0)),
mem_in.bc_rd,
cache_bcstart,
cache_rdy, cache_in_cache
@@ -175,9 +221,9 @@
sc_mem_out.address <= ram_addr;
- sc_mem_out.wr_data <= din;
- sc_mem_out.rd <= mem_in.rd or bc_rd;
- sc_mem_out.wr <= mem_in.wr;
+ sc_mem_out.wr_data <= ram_wr_data;
+ sc_mem_out.rd <= mem_in.rd or bc_arr_rd;
+ sc_mem_out.wr <= mem_in.wr or arr_wr;
mem_out.dout <= sc_mem_in.rd_data;
@@ -188,63 +234,101 @@
-- command?
-- - see jvm.asm...
--
+-- and array access stores
+--
process(clk, reset)
begin
if reset='1' then
- mem_wr_addr <= (others => '0');
+ addr_reg <= (others => '0');
+ index <= (others => '0');
+ value <= (others => '0');
+ iastore_nxt <= '0';
+ was_a_store <= '0';
+ bc_len <= (others => '0');
+
elsif rising_edge(clk) then
if mem_in.addr_wr='1' then
- mem_wr_addr <= din(MEM_ADDR_SIZE-1 downto 0); -- store write address
- end if;
+ addr_reg <= unsigned(ain(MEM_ADDR_SIZE-1 downto 0));
end if;
-end process;
-process(clk, reset)
-begin
- if reset='1' then
- bc_len <= (others => '0');
- bc_mem_start <= (others => '0');
- elsif rising_edge(clk) then
if mem_in.bc_rd='1' then
- bc_len <= unsigned(din(jpc_width-3 downto 0));
- bc_mem_start <= unsigned(din(27 downto 10));
+ bc_len <= unsigned(ain(jpc_width-3 downto 0));
+ addr_reg(17 downto 0) <= unsigned(ain(27 downto 10));
+
+ -- addr_bits is 17
+ if MEM_ADDR_SIZE>18 then
+ addr_reg(MEM_ADDR_SIZE-1 downto 18) <= (others => '0');
+ end if;
else
- if inc_mem_start='1' then
- bc_mem_start <= bc_mem_start+1;
+ if inc_addr_reg='1' then
+ addr_reg <= addr_reg+1;
end if;
if dec_len='1' then
bc_len <= bc_len-1;
end if;
end if;
+ iastore_nxt <= '0';
+ -- save array address and index
+ if mem_in.iaload='1' or iastore_nxt='1' then
+ addr_reg <= unsigned(bin(MEM_ADDR_SIZE-1 downto 0)); -- store address for store and np check
+ index <= ain(MEM_ADDR_SIZE-1 downto 0); -- store array index
+ end if;
+ if mem_in.iastore='1' then
+ value <= ain;
+ -- get reference and index in next cycle
+ iastore_nxt <= '1';
+ end if;
+
+ if mem_in.iaload='1' then
+ was_a_store <= '0';
+ elsif mem_in.iastore='1' then
+ was_a_store <= '1';
+ end if;
+
+ if state=iald3 then
+ addr_reg <= addr_calc;
+ end if;
end if;
end process;
+
--
-- RAM address MUX (combinational)
--
-process(din, mem_wr_addr, bc_mem_start, mem_in)
+process(ain, addr_reg, mem_in)
begin
if mem_in.rd='1' then
- ram_addr <= din(MEM_ADDR_SIZE-1 downto 0);
- elsif mem_in.wr='1' then
- ram_addr <= mem_wr_addr;
+ ram_addr <= ain(MEM_ADDR_SIZE-1 downto 0);
else
- -- default use the bc address (simpled MUX selection)
- ram_addr(17 downto 0) <= std_logic_vector(bc_mem_start);
- -- addr_bits is 17
- if MEM_ADDR_SIZE>18 then
- ram_addr(MEM_ADDR_SIZE-1 downto 18) <= (others => '0');
+ -- default is the registered address
+ -- for wr, bc load, and array access
+ ram_addr <= std_logic_vector(addr_reg(MEM_ADDR_SIZE-1 downto 0));
end if;
+end process;
+
+--
+-- RAM write data MUX (combinational)
+--
+process(ain, addr_reg, mem_in)
+begin
+ if mem_in.wr='1' then
+ ram_wr_data <= ain;
+ else
+ -- default is the registered value
+ ram_wr_data <= value;
end if;
end process;
+ addr_calc <= unsigned(sc_mem_in.rd_data(MEM_ADDR_SIZE-1 downto 0))+unsigned(index);
+
+
--
-- next state logic
--
process(state, mem_in, sc_mem_in.rdy_cnt,
- cache_rdy, cache_in_cache, bc_len)
+ cache_rdy, cache_in_cache, bc_len, addr_calc)
begin
next_state <= state;
@@ -258,6 +342,10 @@
next_state <= wr1;
elsif mem_in.bc_rd='1' then
next_state <= bc_cc;
+ elsif mem_in.iaload='1' then
+ next_state <= iald0;
+ elsif mem_in.iastore='1' then
+ next_state <= iast0;
end if;
-- after a read the idl state is the result cycle
@@ -330,10 +418,120 @@
next_state <= idl;
end if;
+--
+-- array access
+--
+ when iast0 =>
+ -- just one cycle wait to store the value
+ next_state <= iald0;
+
+ --
+ -- iald0 to iald3 are shared with iastore
+ --
+ when iald0 =>
+ if addr_reg=0 then
+ next_state <= npexc;
+ elsif index(MEM_ADDR_SIZE-1)='1' then
+ next_state <= abexc;
+ else
+ next_state <= iald1;
+ end if;
+
+ when iald1 =>
+ -- w. pipeline level 2
+ -- would waste one cycle in a single cycle memory (similar
+ -- to bc load) - SimpCon rd comes from registered bc_arr_rd.
+ if sc_mem_in.rdy_cnt/=3 then
+ next_state <= iald2;
+ end if;
+
+ when iald2 =>
+ next_state <= iald3;
+
+ when iald3 =>
+ next_state <= iald4;
+------ that's now load specific!
+-- we start loading before we know the upper bound exception!
+-- is there an issue with read peripherals????
+ if was_a_store='1' then
+ next_state <= iaswb;
+ -- w. pipeline level 2
+ elsif sc_mem_in.rdy_cnt/=3 then
+ next_state <= iasrd;
+ end if;
+
+ when iald4 =>
+ if sc_mem_in.rdy_cnt/=3 then
+ next_state <= iasrd;
+ end if;
+
+ -- rdy_cnt is less than 3 we can move on
+ when iasrd =>
+ next_state <= ialrb;
+
+ when ialrb =>
+ -- can we optimize this when we increment index at some state?
+ if unsigned(index) >= unsigned(sc_mem_in.rd_data(MEM_ADDR_SIZE-1 downto 0)) then
+ next_state <= abexc;
+ -- either 1 or 0
+ elsif sc_mem_in.rdy_cnt(1)='0' then
+ next_state <= idl;
+ end if;
+
+ when iaswb =>
+ if sc_mem_in.rdy_cnt(1)='0' then
+ next_state <= iasrb;
+ end if;
+
+ when iasrb =>
+ next_state <= iasst;
+ -- can we optimize this when we increment index at some state?
+ if unsigned(index) >= unsigned(sc_mem_in.rd_data(MEM_ADDR_SIZE-1 downto 0)) then
+ next_state <= abexc;
+ end if;
+
+ when iasst =>
+ next_state <= iasw;
+ if sc_mem_in.rdy_cnt(1)='0' then
+ next_state <= idl;
+ end if;
+
+ when iasw =>
+ -- either 1 or 0
+ if sc_mem_in.rdy_cnt(1)='0' then
+ next_state <= idl;
+ end if;
+
+ when npexc =>
+ next_state <= excw;
+
+ when abexc =>
+ next_state <= excw;
+
+ when excw =>
+ if sc_mem_in.rdy_cnt="00" then
+ next_state <= idl;
+ end if;
+
end case;
end process;
--
+-- state machine combinatorial output
+-- from next_state
+-- read for single cycle memory could be
+-- speed up
+--
+process(next_state)
+begin
+ arr_wr <= '0';
+ if next_state=iasst then
+ arr_wr <= '1';
+ end if;
+
+end process;
+
+--
-- state machine register
-- output register
--
@@ -343,47 +541,52 @@
if (reset='1') then
state <= idl;
bc_wr_ena <= '0';
- inc_mem_start <= '0';
+ inc_addr_reg <= '0';
dec_len <= '0';
- bc_rd <= '0';
- bcl_bsy <= '0';
+ bc_arr_rd <= '0';
+ bcl_arr_bsy <= '0';
+ null_pointer <= '0';
+ bounds_error <= '0';
+
elsif rising_edge(clk) then
state <= next_state;
bc_wr_ena <= '0';
- inc_mem_start <= '0';
+ inc_addr_reg <= '0';
dec_len <= '0';
- bc_rd <= '0';
+ bc_arr_rd <= '0';
+ null_pointer <= '0';
+ bounds_error <= '0';
case next_state is
when idl =>
- bcl_bsy <= '0';
+ bcl_arr_bsy <= '0';
when rd1 =>
when wr1 =>
when bc_cc =>
- bcl_bsy <= '1';
+ bcl_arr_bsy <= '1';
-- cache check
when bc_r1 =>
-- setup data
bc_wr_addr <= unsigned(cache_bcstart);
-- first memory read
- inc_mem_start <= '1';
- bc_rd <= '1';
+ inc_addr_reg <= '1';
+ bc_arr_rd <= '1';
when bc_w =>
-- wait
when bc_rn =>
-- following memory reads
- inc_mem_start <= '1';
+ inc_addr_reg <= '1';
dec_len <= '1';
- bc_rd <= '1';
+ bc_arr_rd <= '1';
when bc_wr =>
-- BC write
@@ -392,6 +595,44 @@
when bc_wl =>
-- wait for last (unnecessary read)
+ when iast0 =>
+ bcl_arr_bsy <= '1';
+
+ when iald0 =>
+ bc_arr_rd <= '1';
+ bcl_arr_bsy <= '1';
+ inc_addr_reg <= '1';
+
+ when iald1 =>
+
+ when iald2 =>
+ bc_arr_rd <= '1';
+
+ when iald3 =>
+
+ when iald4 =>
+
+ when iasrd =>
+ bc_arr_rd <= '1';
+
+ when ialrb =>
+
+ when iaswb =>
+
+ when iasrb =>
+
+ when iasst =>
+
+ when iasw =>
+
+ when npexc =>
+ null_pointer <= '1';
+
+ when abexc =>
+ bounds_error <= '1';
+
+ when excw =>
+
end case;
-- increment in state write
|
 |