|
Message
From: cvs at opencores.org<cvs@o...>
Date: Thu Nov 24 21:51:40 CET 2005
Subject: [cvs-checkins] MODIFIED: jop ...
Date: 00/05/11 24:21:51 Added: jop/vhdl/memory jbc_generic.vhd mem_sc.vhd sc_sram32.vhd Log: Use mem_sc and sc_sram32 with block cache for S3. No more Spartan specific main memory module. Revision Changes Path 1.1 jop/vhdl/memory/jbc_generic.vhd http://www.opencores.org/cvsweb.shtml/jop/vhdl/memory/jbc_generic.vhd?rev=1.1&content-type=text/x-cvsweb-markup Index: jbc_generic.vhd =================================================================== --LIBRARY ieee; --USE ieee.std_logic_1164.ALL; --ENTITY ram IS --PORT ( --clock: IN STD_LOGIC; --data: IN STD_LOGIC_VECTOR (7 DOWNTO 0); --write_address: IN INTEGER RANGE 0 to 31; --read_address: IN INTEGER RANGE 0 to 31; --we: IN STD_LOGIC; --q: OUT STD_LOGIC_VECTOR (7 DOWNTO 0) --); --END ram; --ARCHITECTURE rtl OF ram IS --TYPE MEM IS ARRAY(0 TO 31) OF STD_LOGIC_VECTOR(7 DOWNTO 0); --SIGNAL ram_block: MEM; --BEGIN --PROCESS (clock) --BEGIN --IF (clock'event AND clock = '1') THEN --IF (we = '1') THEN --ram_block(write_address) <= data; --END IF; --q <= ram_block(read_address); ---- VHDL semantics imply that q doesn't get data ---- in this clock cycle --END IF; --END PROCESS; --END rtl; -- -- cyc_jbc.vhd -- -- bytecode memory/cache for JOP3 -- Version for Altera Cyclone -- -- address, data in are registered -- data out is unregistered -- -- -- Changes: -- 2003-08-14 load start address with jpc_wr and do autoincrement -- load 32 bit data and do the 4 byte writes serial -- 2005-02-17 extrected again from mem32.vhd -- 2005-05-03 address width is jpc_width -- -- library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; entity jbc is generic (jpc_width : integer := 10); port ( clk : in std_logic; data : in std_logic_vector(31 downto 0); rd_addr : in std_logic_vector(jpc_width-1 downto 0); wr_addr : in std_logic_vector(jpc_width-3 downto 0); wr_en : in std_logic; q : out std_logic_vector(7 downto 0) ); end jbc; -- -- registered and delayed wraddress, wren -- registered din -- registered rdaddress -- unregistered dout -- architecture rtl of jbc is constant nwords : integer := 2**(jpc_width-2); type mem is array(0 to nwords-1) of std_logic_vector(31 downto 0); signal ram_block: mem; signal d: std_logic_vector(31 downto 0); signal wra0, wra1, wra2, wra3 : std_logic_vector(jpc_width-1 downto 0); signal rda_reg : std_logic_vector(jpc_width-1 downto 0); begin --BEGIN --PROCESS (clock) --BEGIN --IF (clock'event AND clock = '1') THEN --IF (we = '1') THEN
--ram_block(write_address) <= data;
--END IF;
--q <= ram_block(read_address);
---- VHDL semantics imply that q doesn't get data
---- in this clock cycle
--END IF;
--END PROCESS;
--END rtl;
wra0 <= wr_addr & "00";
wra1 <= wr_addr & "01";
wra2 <= wr_addr & "10";
wra3 <= wr_addr & "11";
d <= ram_block(to_integer(unsigned(rda_reg(jpc_width-1 downto 2))));
process(clk)
begin
if rising_edge(clk) then
if wr_en='1' then
ram_block(to_integer(unsigned(wr_addr))) <= data;
-- ram_block(to_integer(unsigned(wra1))) <= data(15 downto 8);
-- ram_block(to_integer(unsigned(wra2))) <= data(23 downto 16);
-- ram_block(to_integer(unsigned(wra3))) <= data(31 downto 24);
end if;
rda_reg <= rd_addr;
-- q <= ram_block(to_integer(unsigned(rd_addr(jpc_width-1 downto 0))));
-- VHDL semantics imply that q doesn't get data
-- in this clock cycle
end if;
end process;
process(rda_reg, d)
begin
case rda_reg(1 downto 0) is
when "11" =>
q <= d(31 downto 24);
when "10" =>
q <= d(23 downto 16);
when "01" =>
q <= d(15 downto 8);
when "00" =>
q <= d(7 downto 0);
when others =>
null;
end case;
end process;
----
---- generated with Quartus wizzard:
----
-- COMPONENT altsyncram
-- GENERIC (
-- intended_device_family : STRING;
-- operation_mode : STRING;
-- width_a : NATURAL;
-- widthad_a : NATURAL;
-- numwords_a : NATURAL;
-- width_b : NATURAL;
-- widthad_b : NATURAL;
-- numwords_b : NATURAL;
-- lpm_type : STRING;
-- width_byteena_a : NATURAL;
-- outdata_reg_b : STRING;
-- indata_aclr_a : STRING;
-- wrcontrol_aclr_a : STRING;
-- address_aclr_a : STRING;
-- address_reg_b : STRING;
-- address_aclr_b : STRING;
-- outdata_aclr_b : STRING;
-- read_during_write_mode_mixed_ports : STRING
-- );
-- PORT (
-- wren_a : IN STD_LOGIC ;
-- clock0 : IN STD_LOGIC ;
-- address_a : IN STD_LOGIC_VECTOR (jpc_width-3 DOWNTO 0);
-- address_b : IN STD_LOGIC_VECTOR (jpc_width-1 DOWNTO 0);
-- q_b : OUT STD_LOGIC_VECTOR (7 DOWNTO 0);
-- data_a : IN STD_LOGIC_VECTOR (31 DOWNTO 0)
-- );
-- END COMPONENT;
--
--begin
--
-- alt_jbc : altsyncram
-- GENERIC MAP (
-- intended_device_family => "Cyclone",
-- operation_mode => "DUAL_PORT",
-- width_a => 32,
-- widthad_a => jpc_width-2,
-- numwords_a => 2**(jpc_width-2),
-- width_b => 8,
-- widthad_b => jpc_width,
-- numwords_b => 2**jpc_width,
-- lpm_type => "altsyncram",
-- width_byteena_a => 1,
-- outdata_reg_b => "UNREGISTERED",
-- indata_aclr_a => "NONE",
-- wrcontrol_aclr_a => "NONE",
-- address_aclr_a => "NONE",
-- address_reg_b => "CLOCK0",
-- address_aclr_b => "NONE",
-- outdata_aclr_b => "NONE",
-- read_during_write_mode_mixed_ports => "DONT_CARE"
-- )
-- PORT MAP (
-- wren_a => wr_en,
-- clock0 => clk,
-- address_a => wr_addr,
-- address_b => rd_addr,
-- data_a => data,
-- q_b => q
-- );
--
--
end rtl;
1.1 jop/vhdl/memory/mem_sc.vhd
http://www.opencores.org/cvsweb.shtml/jop/vhdl/memory/mem_sc.vhd?rev=1.1&content-type=text/x-cvsweb-markup
Index: mem_sc.vhd
===================================================================
--
-- mem_sc.vhd
--
-- external memory interface with SimpCon
--
--
-- todo:
--
-- 2005-11-22 first version adapted from mem(_wb)
--
Library IEEE;
use IEEE.std_logic_1164.all;
use ieee.numeric_std.all;
use work.jop_types.all;
entity mem_sc is
generic (jpc_width : integer; block_bits : integer; addr_bits : integer);
port (
-- jop interface
clk, reset : in std_logic;
din : in std_logic_vector(31 downto 0);
mem_rd : in std_logic;
mem_wr : in std_logic;
mem_addr_wr : in std_logic;
mem_bc_rd : in std_logic;
dout : out std_logic_vector(31 downto 0);
bcstart : out std_logic_vector(31 downto 0); -- start of method in bc cache
bsy : out std_logic;
-- jbc connections
jbc_addr : in std_logic_vector(jpc_width-1 downto 0);
jbc_data : out std_logic_vector(7 downto 0);
-- SimpCon interface
addr : out std_logic_vector(addr_bits-1 downto 0);
wr_data : out std_logic_vector(31 downto 0);
rd, wr : out std_logic;
rd_data : in std_logic_vector(31 downto 0);
bsy_cnt : in unsigned(1 downto 0)
);
end mem_sc;
architecture rtl of mem_sc is
component cache is
generic (jpc_width : integer; block_bits : integer);
port (
clk, reset : in std_logic;
bc_len : in std_logic_vector(jpc_width-3 downto 0); -- length of method in words
bc_addr : in std_logic_vector(17 downto 0); -- memory address of bytecode
find : in std_logic; -- start lookup
bcstart : out std_logic_vector(jpc_width-3 downto 0); -- start of method in bc cache
rdy : out std_logic; -- lookup finished
in_cache : out std_logic -- method is in cache
);
end component;
--
-- jbc component (use technology specific vhdl-file cyc_jbc,...)
--
-- ajbc,xjbc are OLD!
-- check if ajbc.vhd can still be used (multicycle write!)
--
-- dual port ram
-- wraddr and wrena registered
-- rdaddr is registered
-- indata registered
-- outdata is unregistered
--
component jbc is
generic (jpc_width : integer);
port (
clk : in std_logic;
data : in std_logic_vector(31 downto 0);
rd_addr : in std_logic_vector(jpc_width-1 downto 0);
wr_addr : in std_logic_vector(jpc_width-3 downto 0);
wr_en : in std_logic;
q : out std_logic_vector(7 downto 0)
);
end component;
--
-- signals for mem interface
--
type state_type is (
idl, rd1, wr1,
bc_cc, bc_sa, bc_r1, bc_w, bc_rn, bc_wr, bc_wl
);
signal state : state_type;
signal next_state : state_type;
signal mem_wr_addr : std_logic_vector(addr_bits-1 downto 0);
signal ram_addr : std_logic_vector(addr_bits-1 downto 0);
signal mem_bsy : std_logic;
signal bcl_bsy : std_logic;
--
-- values for bytecode read/cache
--
-- len is in words, 10 bits range is 'hardcoded' in JOPWriter.java
-- start is address in external memory (rest of the word)
--
signal bc_len : unsigned(jpc_width-3 downto 0); -- length of method in words
signal bc_mem_start : unsigned(17 downto 0); -- memory address of bytecode
signal inc_mem_start : std_logic;
signal dec_len : std_logic;
signal bc_wr_addr : unsigned(jpc_width-3 downto 0); -- address for jbc (in words!)
signal bc_wr_data : std_logic_vector(31 downto 0); -- write data for jbc
signal bc_wr_ena : std_logic;
signal bc_rd : std_logic;
--
-- signals for cache connection
--
signal cache_rdy : std_logic;
signal cache_in_cache : std_logic;
signal cache_bcstart : std_logic_vector(jpc_width-3 downto 0);
begin
mem_bsy <= '1' when bsy_cnt=3 or bcl_bsy='1' else '0';
bsy <= mem_bsy;
bcstart <= std_logic_vector(to_unsigned(0, 32-jpc_width)) & cache_bcstart & "00";
-- change byte order for jbc memory (high byte first)
bc_wr_data <= rd_data(7 downto 0) &
rd_data(15 downto 8) &
rd_data(23 downto 16) &
rd_data(31 downto 24);
cmp_cache: cache generic map (jpc_width, block_bits) port map(
clk, reset,
std_logic_vector(bc_len), std_logic_vector(bc_mem_start),
mem_bc_rd,
cache_bcstart,
cache_rdy, cache_in_cache
);
cmp_jbc: jbc generic map (jpc_width)
port map(
clk => clk,
data => bc_wr_data,
wr_en => bc_wr_ena,
wr_addr => std_logic_vector(bc_wr_addr),
rd_addr => jbc_addr,
q => jbc_data
);
--
-- SimpCon connections
--
addr <= ram_addr;
wr <= mem_wr;
rd <= mem_rd or bc_rd;
wr_data <= din;
dout <= rd_data;
--
-- Store the write address
-- TODO: wouldn't it be easier to use A and B
-- for data and address with a single write
-- command?
-- - see jvm.asm...
--
process(clk, reset)
begin
if reset='1' then
mem_wr_addr <= (others => '0');
elsif rising_edge(clk) then
if mem_addr_wr='1' then
mem_wr_addr <= din(addr_bits-1 downto 0); -- store write address
end if;
end if;
end process;
process(clk, reset)
begin
if reset='1' then
bc_len <= (others => '0');
bc_mem_start <= (others => '0');
elsif rising_edge(clk) then
if mem_bc_rd='1' then
bc_len <= unsigned(din(jpc_width-3 downto 0));
bc_mem_start <= unsigned(din(27 downto 10));
else
if inc_mem_start='1' then
bc_mem_start <= bc_mem_start+1;
end if;
if dec_len='1' then
bc_len <= bc_len-1;
end if;
end if;
end if;
end process;
--
-- RAM address MUX (combinational)
--
process(din, mem_wr_addr, bc_mem_start, mem_rd, mem_wr)
begin
if mem_rd='1' then
ram_addr <= din(addr_bits-1 downto 0);
elsif mem_wr='1' then
ram_addr <= mem_wr_addr;
else
-- default use the bc address (simpled MUX selection)
ram_addr(17 downto 0) <= std_logic_vector(bc_mem_start);
-- addr_bits is 17
-- ram_addr(addr_bits-1 downto 18) <= (others => '0');
end if;
end process;
--
-- next state logic
--
process(state, mem_rd, mem_wr, mem_bc_rd, bsy_cnt,
cache_rdy, cache_in_cache, bc_len)
begin
next_state <= state;
case state is
when idl =>
if mem_rd='1' then
next_state <= rd1;
elsif mem_wr='1' then
next_state <= wr1;
elsif mem_bc_rd='1' then
next_state <= bc_cc;
end if;
-- after a read the idl state is the result cycle
-- where the data is available
when rd1 =>
-- either 1 or 0
if bsy_cnt(1)='0' then
next_state <= idl;
end if;
-- We could avoid the idl state after wr1 to
-- get back to back wr/wr or wr/rd.
-- However, it is not used in JOP (at the moment).
when wr1 =>
-- either 1 or 0
if bsy_cnt(1)='0' then
next_state <= idl;
end if;
--
-- bytecode read
--
-- cache lookup
when bc_cc =>
if cache_rdy = '1' then
if cache_in_cache = '1' then
next_state <= idl;
else
next_state <= bc_sa;
end if;
end if;
-- not in cache
when bc_sa =>
next_state <= bc_r1;
-- start first read
when bc_r1 =>
next_state <= bc_w;
-- wait
when bc_w =>
if bc_len=to_unsigned(0, jpc_width-3) then
next_state <= bc_wl;
-- this works with pipeline level 1
-- elsif bsy_cnt(1)='0' then
-- we need a pipeline level of 2 in
-- the memory interface for this to work!
elsif bsy_cnt/=3 then
next_state <= bc_rn;
end if;
-- start read 2 to n
when bc_rn =>
if bc_len=to_unsigned(0, jpc_width-3) then
next_state <= bc_wl;
else
next_state <= bc_wr;
end if;
when bc_wr =>
-- w. pipeline level 2
if bsy_cnt/=3 then
next_state <= bc_rn;
else
next_state <= bc_w;
end if;
-- wait fot the last ack
when bc_wl =>
if bsy_cnt(1)='0' then
next_state <= idl;
end if;
end case;
end process;
--
-- state machine register
-- output register
--
process(clk, reset)
begin
if (reset='1') then
state <= idl;
bc_wr_ena <= '0';
inc_mem_start <= '0';
dec_len <= '0';
bc_rd <= '0';
bcl_bsy <= '0';
elsif rising_edge(clk) then
state <= next_state;
bc_wr_ena <= '0';
inc_mem_start <= '0';
dec_len <= '0';
bc_rd <= '0';
case next_state is
when idl =>
bcl_bsy <= '0';
when rd1 =>
when wr1 =>
when bc_cc =>
bcl_bsy <= '1';
-- cache check
when bc_sa =>
-- setup data
bc_wr_addr <= unsigned(cache_bcstart);
when bc_r1 =>
-- first memory read
inc_mem_start <= '1';
bc_rd <= '1';
when bc_w =>
-- wait
when bc_rn =>
-- following memory reads
inc_mem_start <= '1';
bc_rd <= '1';
when bc_wr =>
-- BC write
bc_wr_ena <= '1';
dec_len <= '1';
when bc_wl =>
-- wait for last (unnecessary read)
end case;
-- increment in state write
if state=bc_wr then
bc_wr_addr <= bc_wr_addr+1; -- next jbc address
end if;
end if;
end process;
end rtl;
1.1 jop/vhdl/memory/sc_sram32.vhd
http://www.opencores.org/cvsweb.shtml/jop/vhdl/memory/sc_sram32.vhd?rev=1.1&content-type=text/x-cvsweb-markup
Index: sc_sram32.vhd
===================================================================
--
-- sc_sram32.vhd
--
-- SimpCon compliant external memory interface
-- for 32-bit SRAM (e.g. Cyclone board)
--
-- Connection between mem_sc and the external memory bus
--
-- memory mapping
--
-- 000000-x7ffff external SRAM (w mirror) max. 512 kW (4*4 MBit)
-- 080000-xfffff external Flash (w mirror) max. 512 kB (4 MBit)
-- 100000-xfffff external NAND flash
--
-- RAM: 32 bit word
-- ROM: 8 bit word (for flash programming)
--
-- todo:
-- make a version with Flash interface
--
--
-- 2005-11-22 first version
--
Library IEEE;
use IEEE.std_logic_1164.all;
use ieee.numeric_std.all;
use work.jop_types.all;
entity sc_mem_if is
generic (ram_ws : integer; rom_cnt : integer; addr_bits : integer);
port (
clk, reset : in std_logic;
-- SimpCon interface
addr : in std_logic_vector(addr_bits-1 downto 0);
wr_data : in std_logic_vector(31 downto 0);
rd, wr : in std_logic;
rd_data : out std_logic_vector(31 downto 0);
bsy_cnt : out unsigned(1 downto 0);
-- memory interface
ram_addr : out std_logic_vector(17 downto 0);
ram_dout : out std_logic_vector(31 downto 0);
ram_din : in std_logic_vector(31 downto 0);
ram_dout_en : out std_logic;
ram_ncs : out std_logic;
ram_noe : out std_logic;
ram_nwe : out std_logic;
--
-- config/program flash and big nand flash interface
--
fl_a : out std_logic_vector(18 downto 0);
fl_d : inout std_logic_vector(7 downto 0);
fl_ncs : out std_logic;
fl_ncsb : out std_logic;
fl_noe : out std_logic;
fl_nwe : out std_logic;
fl_rdy : in std_logic
);
end sc_mem_if;
architecture rtl of sc_mem_if is
--
-- signals for mem interface
--
type state_type is (
idl, rd1, rd2,
wr1
);
signal state : state_type;
signal next_state : state_type;
signal nwr_int : std_logic;
signal wait_state : unsigned(3 downto 0);
signal cnt : unsigned(1 downto 0);
signal dout_ena : std_logic;
signal rd_data_ena : std_logic;
begin
ram_dout_en <= dout_ena;
bsy_cnt <= cnt;
--
-- Register memory address, write data and read data
--
process(clk, reset)
begin
if reset='1' then
ram_addr <= (others => '0');
ram_dout <= (others => '0');
rd_data <= (others => '0');
elsif rising_edge(clk) then
if rd='1' or wr='1' then
ram_addr <= addr(17 downto 0);
end if;
if wr='1' then
ram_dout <= wr_data;
end if;
if rd_data_ena='1' then
rd_data <= ram_din;
end if;
end if;
end process;
--
-- 'delay' nwe 1/2 cycle -> change on falling edge
--
process(clk, reset)
begin
if (reset='1') then
ram_nwe <= '1';
-- ram_noe <= '1';
elsif falling_edge(clk) then
ram_nwe <= nwr_int;
-- ram_noe <= noe_int;
end if;
end process;
--
-- next state logic
--
process(state, rd, wr, wait_state)
begin
next_state <= state;
case state is
when idl =>
if rd='1' then
if ram_ws=0 then
-- then we omit state rd1!
next_state <= rd2;
else
next_state <= rd1;
end if;
elsif wr='1' then
next_state <= wr1;
end if;
-- the WS state
when rd1 =>
if wait_state=2 then
next_state <= rd2;
end if;
-- last read state
when rd2 =>
next_state <= idl;
-- This should do to give us a pipeline
-- level of 1 for read
if rd='1' then
if ram_ws=0 then
-- then we omit state rd1!
next_state <= rd2;
else
next_state <= rd1;
end if;
elsif wr='1' then
next_state <= wr1;
end if;
-- the WS state
when wr1 =>
-- TODO: check what happens on ram_ws=0
-- TODO: do we need a write pipelining?
-- not at the moment, but parhaps later when
-- we write the stack content to main memory
if wait_state=1 then
next_state <= idl;
end if;
end case;
end process;
--
-- state machine register
-- output register
--
process(clk, reset)
begin
if (reset='1') then
state <= idl;
dout_ena <= '0';
ram_ncs <= '1';
ram_noe <= '1';
rd_data_ena <= '0';
elsif rising_edge(clk) then
state <= next_state;
dout_ena <= '0';
ram_ncs <= '1';
ram_noe <= '1';
rd_data_ena <= '0';
case next_state is
when idl =>
-- the wait state
when rd1 =>
ram_ncs <= '0';
ram_noe <= '0';
-- last read state
when rd2 =>
ram_ncs <= '0';
ram_noe <= '0';
rd_data_ena <= '1';
-- the WS state
when wr1 =>
ram_ncs <= '0';
dout_ena <= '1';
end case;
end if;
end process;
--
-- nwr combinatorial processing
-- for the negativ edge
--
process(next_state, state)
begin
nwr_int <= '1';
if next_state=wr1 then
nwr_int <= '0';
end if;
end process;
--
-- wait_state processing
-- cs delay, dout enable
--
process(clk, reset)
begin
if (reset='1') then
wait_state <= (others => '1');
cnt <= "00";
elsif rising_edge(clk) then
wait_state <= wait_state-1;
cnt <= "11";
if next_state=idl then
cnt <= "00";
-- if wait_state<4 then
elsif wait_state(3 downto 2)="00" then
cnt <= wait_state(1 downto 0)-1;
end if;
if rd='1' then
wait_state <= to_unsigned(ram_ws+1, 4);
if ram_ws<3 then
cnt <= to_unsigned(ram_ws+1, 2);
else
cnt <= "11";
end if;
elsif wr='1' then
-- one more cycle for the write
-- But in original mem32 this was only true
-- for ram_cnt=2!
if ram_ws<3 then
cnt <= to_unsigned(ram_ws+1, 2);
else
cnt <= "11";
end if;
wait_state <= to_unsigned(ram_ws+1, 4);
-- else
-- -- do we need this?
-- -- we don't care about wait_state in state idle
-- if state=idl then
-- wait_state <= (others => '1'); -- keep it on max value
-- end if;
end if;
end if;
end process;
-- TODO: move Flash interface to a second WB interface
fl_a <= (others => '0');
fl_d <= (others => 'Z');
fl_ncs <= '1';
fl_ncsb <= '1';
fl_noe <= '1';
fl_nwe <= '1';
-- fl_rdy : in std_logic
end rtl;
|
 |