feat(ip): add AXI CDMA IP

This commit is contained in:
2026-04-12 22:25:00 +08:00
parent 14c45b9886
commit 8bad8c21e5
7 changed files with 1300 additions and 0 deletions

View File

@@ -0,0 +1,112 @@
// ============================================================================
// snix_async_fifo.sv
//
// SystemVerilog implementation of an async FIFO. If targeting FPGAs, this
// implementation is intended to infer BRAM.
//
// Largely based on ZipCPU's async FIFO work:
// https://zipcpu.com/blog/2018/07/06/afifo.html
// ============================================================================
module snix_async_fifo #(parameter DATA_WIDTH = 8,
parameter FIFO_DEPTH = 4)
(input logic wclk,
input logic wrst_n,
input logic wr_en,
input logic [DATA_WIDTH-1:0] wdata,
output logic wfull,
input logic rclk,
input logic rrst_n,
input logic rd_en,
output logic [DATA_WIDTH-1:0] rdata,
output logic [$clog2(FIFO_DEPTH):0] rd_n_items,
output logic rempty);
localparam ADDR_W = $clog2(FIFO_DEPTH);
localparam NFF = 2;
logic [ADDR_W:0] rd_addr, next_rd_addr;
logic [ADDR_W:0] wr_addr, next_wr_addr;
logic [ADDR_W:0] rgray, wgray;
logic [ADDR_W:0] rd_wgray, wr_rgray, rd_wbin;// rd_n_items;
(* ASYNC_REG = "TRUE" *) logic [(ADDR_W+1)*(NFF-1)-1:0] rgray_cross, wgray_cross;
(* ram_style = "block" *) logic [DATA_WIDTH-1:0] mem [0:FIFO_DEPTH-1];
logic [DATA_WIDTH-1:0] lcl_rd_data;
logic lcl_read, lcl_rd_empty;
assign next_wr_addr = wr_addr + 1'b1;
always_ff @(posedge wclk or negedge wrst_n)
if(!wrst_n) begin
wr_addr <= (ADDR_W+1)'(0);
wgray <= (ADDR_W+1)'(0);
end
else if(wr_en && !wfull) begin
wr_addr <= next_wr_addr;
wgray <= next_wr_addr ^ (next_wr_addr>>1);
end
always_ff @(posedge rclk or negedge rrst_n)
if(!rrst_n) begin
rd_addr <= (ADDR_W+1)'(0);
rgray <= (ADDR_W+1)'(0);
end
else if(lcl_read && !lcl_rd_empty) begin
rd_addr <= next_rd_addr;
rgray <= next_rd_addr ^ (next_rd_addr>>1);
end
always_ff @(posedge wclk)
mem[wr_addr[ADDR_W-1:0]] <= wdata;
assign next_rd_addr = rd_addr + 1'b1;
assign lcl_rd_data = mem[rd_addr[ADDR_W-1:0]];
always_ff @(posedge wclk or negedge wrst_n)
if(!wrst_n) begin
wr_rgray <= (ADDR_W+1)'(0);
rgray_cross <= (ADDR_W+1)'(0);
end
else begin
{wr_rgray, rgray_cross} <= {rgray_cross, rgray};
end
always_ff @(posedge rclk or negedge rrst_n)
if(!rrst_n) begin
rd_wgray <= (ADDR_W+1)'(0);
wgray_cross <= (ADDR_W+1)'(0);
rd_n_items <= (ADDR_W+1)'(0);
end
else begin
{rd_wgray, wgray_cross} <= {wgray_cross, wgray};
rd_n_items <= (rd_wbin - rd_addr) & ((1 << (ADDR_W + 1)) - 1);
end
always_comb begin
rd_wbin[ADDR_W] = rd_wgray[ADDR_W];
for (int i = ADDR_W-1; i >= 0; i--) begin
rd_wbin[i] = rd_wbin[i+1] ^ rd_wgray[i];
end
end
assign wfull = wr_rgray == {~wgray[ADDR_W], wgray[ADDR_W-1:0]};
assign lcl_rd_empty = rd_wgray == rgray;
assign lcl_read = rempty || rd_en;
always_ff @(posedge rclk or negedge rrst_n)
if(!rrst_n) begin
rempty <= 1'b1;
end
else if(lcl_read) begin
rempty <= lcl_rd_empty;
end
always_ff @(posedge rclk)
if(lcl_read)
rdata <= lcl_rd_data;
endmodule: snix_async_fifo

View File

@@ -0,0 +1,219 @@
// ============================================================================
// snix_axi_cdma.sv
// Central DMA — memory-to-memory transfers via snix_axi_mm2mm
//
// Instantiates:
// snix_axi_cdma_csr — AXI-Lite register interface
// snix_axi_mm2mm — read-then-write AXI4 engine
//
// Software register map (see snix_axi_cdma_csr.sv for full detail):
// 0x00 CDMA_CTRL [0]=start [1]=stop [5:3]=size [13:6]=len
// 0x04 CDMA_NUM_BYTES [31:0]=transfer_len
// 0x08 CDMA_SRC_ADDR [31:0]=source base address
// 0x0C CDMA_DST_ADDR [31:0]=destination base address
// 0x10 STATUS [0]=done (sticky, read-only)
// ============================================================================
module snix_axi_cdma #(
parameter int ADDR_WIDTH = 32,
parameter int DATA_WIDTH = 64,
parameter int AXIL_ADDR_WIDTH = 32,
parameter int AXIL_DATA_WIDTH = 32,
parameter int ID_WIDTH = 4,
parameter int USER_WIDTH = 1)
(// Global signals
input logic clk,
input logic rst_n,
// AXI-Lite CSR interface
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_awaddr,
input logic s_axil_awvalid,
output logic s_axil_awready,
input logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata,
input logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb,
input logic s_axil_wvalid,
output logic s_axil_wready,
output logic [1:0] s_axil_bresp,
output logic s_axil_bvalid,
input logic s_axil_bready,
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_araddr,
input logic s_axil_arvalid,
output logic s_axil_arready,
output logic [AXIL_DATA_WIDTH-1:0] s_axil_rdata,
output logic [1:0] s_axil_rresp,
output logic s_axil_rvalid,
input logic s_axil_rready,
// AXI4 memory port — AW channel
output logic [ID_WIDTH-1:0] mm2mm_awid,
output logic [ADDR_WIDTH-1:0] mm2mm_awaddr,
output logic [7:0] mm2mm_awlen,
output logic [2:0] mm2mm_awsize,
output logic [1:0] mm2mm_awburst,
output logic mm2mm_awlock,
output logic [3:0] mm2mm_awcache,
output logic [2:0] mm2mm_awprot,
output logic [3:0] mm2mm_awqos,
output logic [USER_WIDTH-1:0] mm2mm_awuser,
output logic mm2mm_awvalid,
input logic mm2mm_awready,
// W channel
output logic [DATA_WIDTH-1:0] mm2mm_wdata,
output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb,
output logic mm2mm_wlast,
output logic [USER_WIDTH-1:0] mm2mm_wuser,
output logic mm2mm_wvalid,
input logic mm2mm_wready,
// B channel
input logic [ID_WIDTH-1:0] mm2mm_bid,
input logic [1:0] mm2mm_bresp,
input logic [USER_WIDTH-1:0] mm2mm_buser,
input logic mm2mm_bvalid,
output logic mm2mm_bready,
// AR channel
output logic [ID_WIDTH-1:0] mm2mm_arid,
output logic [ADDR_WIDTH-1:0] mm2mm_araddr,
output logic [7:0] mm2mm_arlen,
output logic [2:0] mm2mm_arsize,
output logic [1:0] mm2mm_arburst,
output logic mm2mm_arlock,
output logic [3:0] mm2mm_arcache,
output logic [2:0] mm2mm_arprot,
output logic [3:0] mm2mm_arqos,
output logic [USER_WIDTH-1:0] mm2mm_aruser,
output logic mm2mm_arvalid,
input logic mm2mm_arready,
// R channel
input logic [ID_WIDTH-1:0] mm2mm_rid,
input logic [DATA_WIDTH-1:0] mm2mm_rdata,
input logic [1:0] mm2mm_rresp,
input logic mm2mm_rlast,
input logic [USER_WIDTH-1:0] mm2mm_ruser,
input logic mm2mm_rvalid,
output logic mm2mm_rready,
output logic dma_finish);
localparam int NUM_REGS = 8;
localparam int FIFO_DEPTH = 16;
localparam int CDMA_CTRL_IDX = 0;
localparam int CDMA_NUM_BYTES_IDX = 1;
localparam int CDMA_SRC_ADDR_IDX = 2;
localparam int CDMA_DST_ADDR_IDX = 3;
// Index 4 = STATUS — write-protected inside snix_axi_cdma_csr
logic [NUM_REGS-1:0][AXIL_DATA_WIDTH-1:0] config_status_reg;
logic [AXIL_DATA_WIDTH-1:0] read_status_reg;
// -------------------------------------------------------------------------
// Decode control registers
// -------------------------------------------------------------------------
logic ctrl_start, ctrl_stop;
logic [2:0] ctrl_size;
logic [7:0] ctrl_len;
logic [31:0] ctrl_transfer_len;
logic [ADDR_WIDTH-1:0] ctrl_src_addr, ctrl_dst_addr;
logic ctrl_done;
assign ctrl_start = config_status_reg[CDMA_CTRL_IDX][0];
assign ctrl_stop = config_status_reg[CDMA_CTRL_IDX][1];
assign ctrl_size = config_status_reg[CDMA_CTRL_IDX][5:3];
assign ctrl_len = config_status_reg[CDMA_CTRL_IDX][13:6];
assign ctrl_transfer_len = config_status_reg[CDMA_NUM_BYTES_IDX];
assign ctrl_src_addr = config_status_reg[CDMA_SRC_ADDR_IDX][ADDR_WIDTH-1:0];
assign ctrl_dst_addr = config_status_reg[CDMA_DST_ADDR_IDX][ADDR_WIDTH-1:0];
assign read_status_reg = {{(AXIL_DATA_WIDTH-1){1'b0}}, ctrl_done};
assign dma_finish = ctrl_done;
// -------------------------------------------------------------------------
// CSR
// -------------------------------------------------------------------------
snix_axi_cdma_csr #(
.DATA_WIDTH(AXIL_DATA_WIDTH),
.ADDR_WIDTH(AXIL_ADDR_WIDTH),
.NUM_REGS (NUM_REGS))
cdma_csr (
.clk (clk),
.rst_n (rst_n),
.s_axil_awaddr (s_axil_awaddr),
.s_axil_awvalid (s_axil_awvalid),
.s_axil_awready (s_axil_awready),
.s_axil_wdata (s_axil_wdata),
.s_axil_wstrb (s_axil_wstrb),
.s_axil_wvalid (s_axil_wvalid),
.s_axil_wready (s_axil_wready),
.s_axil_bresp (s_axil_bresp),
.s_axil_bvalid (s_axil_bvalid),
.s_axil_bready (s_axil_bready),
.s_axil_araddr (s_axil_araddr),
.s_axil_arvalid (s_axil_arvalid),
.s_axil_arready (s_axil_arready),
.s_axil_rdata (s_axil_rdata),
.s_axil_rresp (s_axil_rresp),
.s_axil_rvalid (s_axil_rvalid),
.s_axil_rready (s_axil_rready),
.read_status_reg (read_status_reg),
.config_status_reg (config_status_reg));
// -------------------------------------------------------------------------
// MM2MM engine
// -------------------------------------------------------------------------
snix_axi_mm2mm #(
.ADDR_WIDTH(ADDR_WIDTH),
.DATA_WIDTH(DATA_WIDTH),
.ID_WIDTH (ID_WIDTH),
.USER_WIDTH(USER_WIDTH),
.FIFO_DEPTH(FIFO_DEPTH))
axi_mm2mm (
.clk (clk),
.rst_n (rst_n),
.ctrl_start (ctrl_start),
.ctrl_stop (ctrl_stop),
.ctrl_src_addr (ctrl_src_addr),
.ctrl_dst_addr (ctrl_dst_addr),
.ctrl_len (ctrl_len),
.ctrl_size (ctrl_size),
.ctrl_transfer_len(ctrl_transfer_len),
.ctrl_done (ctrl_done),
.mm2mm_awid (mm2mm_awid),
.mm2mm_awaddr (mm2mm_awaddr),
.mm2mm_awlen (mm2mm_awlen),
.mm2mm_awsize (mm2mm_awsize),
.mm2mm_awburst (mm2mm_awburst),
.mm2mm_awlock (mm2mm_awlock),
.mm2mm_awcache (mm2mm_awcache),
.mm2mm_awprot (mm2mm_awprot),
.mm2mm_awqos (mm2mm_awqos),
.mm2mm_awuser (mm2mm_awuser),
.mm2mm_awvalid (mm2mm_awvalid),
.mm2mm_awready (mm2mm_awready),
.mm2mm_wdata (mm2mm_wdata),
.mm2mm_wstrb (mm2mm_wstrb),
.mm2mm_wlast (mm2mm_wlast),
.mm2mm_wuser (mm2mm_wuser),
.mm2mm_wvalid (mm2mm_wvalid),
.mm2mm_wready (mm2mm_wready),
.mm2mm_bid (mm2mm_bid),
.mm2mm_bresp (mm2mm_bresp),
.mm2mm_buser (mm2mm_buser),
.mm2mm_bvalid (mm2mm_bvalid),
.mm2mm_bready (mm2mm_bready),
.mm2mm_arid (mm2mm_arid),
.mm2mm_araddr (mm2mm_araddr),
.mm2mm_arlen (mm2mm_arlen),
.mm2mm_arsize (mm2mm_arsize),
.mm2mm_arburst (mm2mm_arburst),
.mm2mm_arlock (mm2mm_arlock),
.mm2mm_arcache (mm2mm_arcache),
.mm2mm_arprot (mm2mm_arprot),
.mm2mm_arqos (mm2mm_arqos),
.mm2mm_aruser (mm2mm_aruser),
.mm2mm_arvalid (mm2mm_arvalid),
.mm2mm_arready (mm2mm_arready),
.mm2mm_rid (mm2mm_rid),
.mm2mm_rdata (mm2mm_rdata),
.mm2mm_rresp (mm2mm_rresp),
.mm2mm_rlast (mm2mm_rlast),
.mm2mm_ruser (mm2mm_ruser),
.mm2mm_rvalid (mm2mm_rvalid),
.mm2mm_rready (mm2mm_rready));
endmodule : snix_axi_cdma

View File

@@ -0,0 +1,192 @@
// ============================================================================
// snix_axi_cdma_csr.sv
// AXI-Lite CSR for snix_axi_cdma (memory-to-memory central DMA)
//
// Register Map (word-addressed, 32-bit registers):
//
// Offset Index Name Bits
// 0x00 0 CDMA_CTRL [0] = start (write-1 pulse)
// [1] = stop (write-1 pulse)
// [5:3] = size (AXI AxSIZE)
// [13:6] = len (AXI AxLEN)
// [31:14] = reserved
// 0x04 1 CDMA_NUM_BYTES [31:0] = transfer_len (byte count)
// 0x08 2 CDMA_SRC_ADDR [31:0] = source base address
// 0x0C 3 CDMA_DST_ADDR [31:0] = destination base address
// 0x10 4 STATUS (read-only) [0] = done (sticky; cleared on start)
// [31:1] = reserved
// 0x14 57 Reserved
//
// Differences from snix_axi_dma_csr:
// - Single CTRL register (no separate WR/RD paths)
// - Separate SRC_ADDR and DST_ADDR registers
// - STATUS[0] is write-protected; hardware sets it, start clears it
// ============================================================================
module snix_axi_cdma_csr #(
parameter int DATA_WIDTH = 32,
parameter int ADDR_WIDTH = 4,
parameter int NUM_REGS = 8)
(input logic clk,
input logic rst_n,
// AXI-Lite interface
input logic [ADDR_WIDTH-1:0] s_axil_awaddr,
input logic s_axil_awvalid,
output logic s_axil_awready,
input logic [DATA_WIDTH-1:0] s_axil_wdata,
input logic [DATA_WIDTH/8-1:0] s_axil_wstrb,
input logic s_axil_wvalid,
output logic s_axil_wready,
output logic [1:0] s_axil_bresp,
output logic s_axil_bvalid,
input logic s_axil_bready,
input logic [ADDR_WIDTH-1:0] s_axil_araddr,
input logic s_axil_arvalid,
output logic s_axil_arready,
output logic [DATA_WIDTH-1:0] s_axil_rdata,
output logic [1:0] s_axil_rresp,
output logic s_axil_rvalid,
input logic s_axil_rready,
// Status from mm2mm engine: [0] = ctrl_done (single-cycle pulse)
input logic [DATA_WIDTH-1:0] read_status_reg,
// Register file
output logic [NUM_REGS-1:0][DATA_WIDTH-1:0] config_status_reg);
localparam int AXIL_DATA_WIDTH = DATA_WIDTH;
localparam int AXIL_ADDR_WIDTH = ADDR_WIDTH;
localparam int ADDRLSB = $clog2(AXIL_DATA_WIDTH / 8);
localparam int REG_INDEX_WIDTH = $clog2(NUM_REGS);
localparam int CDMA_CTRL_IDX = 0; // 0x00 used: pulse-clear start/stop bits
//localparam int CDMA_NUM_BYTES_IDX = 1; // 0x04 decoded in snix_axi_cdma top
//localparam int CDMA_SRC_ADDR_IDX = 2; // 0x08 decoded in snix_axi_cdma top
//localparam int CDMA_DST_ADDR_IDX = 3; // 0x0C decoded in snix_axi_cdma top
localparam int STATUS_IDX = 4; // 0x10 used: write-protect + done latch
// -------------------------------------------------------------------------
// AXI-Lite skid-buffer register slices (same topology as snix_axi_dma_csr)
// -------------------------------------------------------------------------
logic s_axil_awvalid_reg;
logic [AXIL_ADDR_WIDTH-ADDRLSB-1:0] s_axil_awaddr_reg;
logic s_axil_arvalid_reg;
logic [AXIL_ADDR_WIDTH-ADDRLSB-1:0] s_axil_araddr_reg;
logic s_axil_wvalid_reg;
logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata_reg;
logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb_reg;
logic s_axil_write_ready, s_axil_read_ready;
logic [31:0] awaddr_index, araddr_index;
assign s_axil_write_ready = s_axil_awvalid_reg & s_axil_wvalid_reg &
(!s_axil_bvalid | s_axil_bready);
assign s_axil_read_ready = s_axil_arvalid_reg & (!s_axil_rvalid | s_axil_rready);
assign awaddr_index = {{(32-REG_INDEX_WIDTH){1'b0}}, s_axil_awaddr_reg[REG_INDEX_WIDTH-1:0]};
assign araddr_index = {{(32-REG_INDEX_WIDTH){1'b0}}, s_axil_araddr_reg[REG_INDEX_WIDTH-1:0]};
snix_register_slice #(.DATA_WIDTH(AXIL_ADDR_WIDTH - ADDRLSB)) reg_slice_u0 (
.clk (clk),
.rst_n (rst_n),
.s_axis_tdata (s_axil_awaddr[AXIL_ADDR_WIDTH-1:ADDRLSB]),
.s_axis_tvalid(s_axil_awvalid),
.s_axis_tready(s_axil_awready),
.m_axis_tdata (s_axil_awaddr_reg),
.m_axis_tvalid(s_axil_awvalid_reg),
.m_axis_tready(s_axil_write_ready));
snix_register_slice #(.DATA_WIDTH(AXIL_DATA_WIDTH + AXIL_DATA_WIDTH/8)) reg_slice_u1 (
.clk (clk),
.rst_n (rst_n),
.s_axis_tdata ({s_axil_wdata, s_axil_wstrb}),
.s_axis_tvalid(s_axil_wvalid),
.s_axis_tready(s_axil_wready),
.m_axis_tdata ({s_axil_wdata_reg, s_axil_wstrb_reg}),
.m_axis_tvalid(s_axil_wvalid_reg),
.m_axis_tready(s_axil_write_ready));
snix_register_slice #(.DATA_WIDTH(AXIL_ADDR_WIDTH - ADDRLSB)) reg_slice_u2 (
.clk (clk),
.rst_n (rst_n),
.s_axis_tdata (s_axil_araddr[AXIL_ADDR_WIDTH-1:ADDRLSB]),
.s_axis_tvalid(s_axil_arvalid),
.s_axis_tready(s_axil_arready),
.m_axis_tdata (s_axil_araddr_reg),
.m_axis_tvalid(s_axil_arvalid_reg),
.m_axis_tready(s_axil_read_ready));
// -------------------------------------------------------------------------
// B channel
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) s_axil_bvalid <= 1'b0;
else if (s_axil_write_ready) s_axil_bvalid <= 1'b1;
else if (s_axil_bready) s_axil_bvalid <= 1'b0;
// -------------------------------------------------------------------------
// R channel
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) s_axil_rvalid <= 1'b0;
else if (s_axil_read_ready) s_axil_rvalid <= 1'b1;
else if (s_axil_rready) s_axil_rvalid <= 1'b0;
// -------------------------------------------------------------------------
// Control / status signal aliases (combinatorial from register file)
// -------------------------------------------------------------------------
logic ctrl_start, ctrl_stop, ctrl_done;
assign ctrl_start = config_status_reg[CDMA_CTRL_IDX][0];
assign ctrl_stop = config_status_reg[CDMA_CTRL_IDX][1];
assign ctrl_done = read_status_reg[0];
// -------------------------------------------------------------------------
// Register file
//
// Write priority (highest to lowest within an always_ff block):
// 1. AXI-Lite write (byte-enable; STATUS_IDX is write-protected)
// 2. Pulse-clear: start[0] and stop[1] are single-cycle strobes
// 3. STATUS latch: done bit set by hardware, cleared when start fires
//
// Note on NBA priority: later assignments in program order win, so
// "config_status_reg <= config_status_reg" is safely overridden by
// the per-bit assignments that follow in the else branch.
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
config_status_reg <= '0;
end else if (s_axil_write_ready &&
awaddr_index < NUM_REGS &&
awaddr_index != STATUS_IDX) begin // STATUS is read-only
for (int i = 0; i < AXIL_DATA_WIDTH/8; i++) begin
config_status_reg[awaddr_index][8*i +: 8] <=
s_axil_wstrb_reg[i] ? s_axil_wdata_reg[8*i +: 8]
: config_status_reg[awaddr_index][8*i +: 8];
end
end else begin
config_status_reg <= config_status_reg;
// Pulse-clear: start and stop are one-cycle strobes
if (ctrl_start) config_status_reg[CDMA_CTRL_IDX][0] <= 1'b0;
if (ctrl_stop) config_status_reg[CDMA_CTRL_IDX][1] <= 1'b0;
// STATUS[0]: cleared when a new transfer starts; set sticky on done
if (ctrl_start) config_status_reg[STATUS_IDX][0] <= 1'b0;
if (ctrl_done) config_status_reg[STATUS_IDX][0] <= 1'b1;
end
end
// -------------------------------------------------------------------------
// Read data mux
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n)
s_axil_rdata <= '0;
else if (s_axil_read_ready && araddr_index < NUM_REGS)
s_axil_rdata <= config_status_reg[araddr_index];
end
assign s_axil_bresp = 2'b00;
assign s_axil_rresp = 2'b00;
endmodule : snix_axi_cdma_csr

View File

@@ -0,0 +1,455 @@
module snix_axi_mm2mm #(parameter int ADDR_WIDTH = 32,
parameter int DATA_WIDTH = 64,
parameter int ID_WIDTH = 4,
parameter int USER_WIDTH = 1,
parameter int FIFO_DEPTH = 16)
(// Global signals
input logic clk,
input logic rst_n,
// Control interface
input logic ctrl_start,
input logic ctrl_stop,
input logic [ADDR_WIDTH-1:0] ctrl_src_addr,
input logic [ADDR_WIDTH-1:0] ctrl_dst_addr,
input logic [7:0] ctrl_len,
input logic [2:0] ctrl_size,
input logic [31:0] ctrl_transfer_len,
output logic ctrl_done,
// AW Channel
output logic [ID_WIDTH-1:0] mm2mm_awid,
output logic [ADDR_WIDTH-1:0] mm2mm_awaddr,
output logic [7:0] mm2mm_awlen,
output logic [2:0] mm2mm_awsize,
output logic [1:0] mm2mm_awburst,
output logic mm2mm_awlock,
output logic [3:0] mm2mm_awcache,
output logic [2:0] mm2mm_awprot,
output logic [3:0] mm2mm_awqos,
output logic [USER_WIDTH-1:0] mm2mm_awuser,
output logic mm2mm_awvalid,
input logic mm2mm_awready,
// W Channel
output logic [DATA_WIDTH-1:0] mm2mm_wdata,
output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb,
output logic mm2mm_wlast,
output logic [USER_WIDTH-1:0] mm2mm_wuser,
output logic mm2mm_wvalid,
input logic mm2mm_wready,
// B Channel
input logic [ID_WIDTH-1:0] mm2mm_bid,
input logic [1:0] mm2mm_bresp,
input logic [USER_WIDTH-1:0] mm2mm_buser,
input logic mm2mm_bvalid,
output logic mm2mm_bready,
// AR Channel
output logic [ID_WIDTH-1:0] mm2mm_arid,
output logic [ADDR_WIDTH-1:0] mm2mm_araddr,
output logic [7:0] mm2mm_arlen,
output logic [2:0] mm2mm_arsize,
output logic [1:0] mm2mm_arburst,
output logic mm2mm_arlock,
output logic [3:0] mm2mm_arcache,
output logic [2:0] mm2mm_arprot,
output logic [3:0] mm2mm_arqos,
output logic [USER_WIDTH-1:0] mm2mm_aruser,
output logic mm2mm_arvalid,
input logic mm2mm_arready,
// R Channel
input logic [ID_WIDTH-1:0] mm2mm_rid,
input logic [DATA_WIDTH-1:0] mm2mm_rdata,
input logic [1:0] mm2mm_rresp,
input logic mm2mm_rlast,
input logic [USER_WIDTH-1:0] mm2mm_ruser,
input logic mm2mm_rvalid,
output logic mm2mm_rready
);
// -------------------------------------------------------------------------
// Local parameters
// -------------------------------------------------------------------------
localparam int STRB_WIDTH = DATA_WIDTH / 8;
localparam int STRB_IDX_WIDTH = $clog2(STRB_WIDTH) + 1; // +1 to hold full-width value
// -------------------------------------------------------------------------
// Start / stop edge detection
// -------------------------------------------------------------------------
logic ctrl_start_r, wr_start_edge;
logic ctrl_stop_r, wr_stop_edge;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) begin
ctrl_start_r <= 1'b0;
ctrl_stop_r <= 1'b0;
end else begin
ctrl_start_r <= ctrl_start;
ctrl_stop_r <= ctrl_stop;
end
assign wr_start_edge = ctrl_start & ~ctrl_start_r;
assign wr_stop_edge = ctrl_stop & ~ctrl_stop_r;
// -------------------------------------------------------------------------
// Abort latch — set on stop edge, cleared on start edge
// -------------------------------------------------------------------------
logic wr_abort;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) wr_abort <= 1'b0;
else if (wr_stop_edge) wr_abort <= 1'b1;
else if (wr_start_edge) wr_abort <= 1'b0;
// -------------------------------------------------------------------------
// FSM
// -------------------------------------------------------------------------
typedef enum logic [2:0] {IDLE, PREP1, PREP2, AR, READ, AW, WRITE, WAIT_BRESP} state_t;
state_t state, next_state;
logic transfer_done;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) state <= IDLE;
else state <= next_state;
always_comb begin
next_state = state; // default holds state; prevents latches
case (state)
IDLE: begin
next_state = (ctrl_start && !wr_abort) ? PREP1 : IDLE;
end
PREP1: begin
next_state = wr_abort ? IDLE : PREP2;
end
PREP2: begin
next_state = AR;
end
AR: begin
if (wr_abort)
next_state = IDLE;
else
next_state = (mm2mm_arvalid && mm2mm_arready) ? READ : AR;
end
READ: begin
next_state = (mm2mm_rvalid && mm2mm_rready && mm2mm_rlast) ? AW : READ;
end
AW: begin
if (wr_abort)
next_state = IDLE;
else
next_state = (mm2mm_awvalid && mm2mm_awready) ? WRITE : AW;
end
WRITE: begin
next_state = (mm2mm_wvalid && mm2mm_wready && mm2mm_wlast) ? WAIT_BRESP : WRITE;
end
WAIT_BRESP: begin
if (mm2mm_bvalid && mm2mm_bready) begin
if (wr_abort || transfer_done)
next_state = IDLE;
else
next_state = PREP1;
end
end
default: ;
endcase
end
// -------------------------------------------------------------------------
// 4K boundary & burst-length computation — pipelined across PREP1 / PREP2
//
// Identical two-stage pipeline to s2mm / mm2s. The src address is used
// for the 4K check; burst_actual_bytes is applied to both pointers so
// they stay in lock-step.
//
// PREP1 [Stage 1]: max_len, next_size, src_axi_addr (all regs)
// → next_bytes → cross_4k → bytes_to_4k
// → register bytes_to_4k_r
//
// PREP2 [Stage 2]: bytes_to_4k_r, pending_bytes, next_size (all regs)
// → num_bytes_comb → next_len_o
// → register next_arlen / next_awlen | burst_actual_bytes
// -------------------------------------------------------------------------
logic [7:0] max_len;
logic [2:0] next_size;
logic [ADDR_WIDTH-1:0] src_axi_addr;
logic [ADDR_WIDTH-1:0] dst_axi_addr;
logic [31:0] pending_bytes;
// Stage 1 wires
logic [14:0] next_bytes;
logic cross_4k;
logic [14:0] bytes_to_4k;
// Stage 1 → Stage 2 pipeline register
logic [14:0] bytes_to_4k_r;
// Stage 2 wires
logic [14:0] num_bytes_comb;
logic [7:0] next_len_o;
// Stage 1 combinatorial
assign next_bytes = compute_num_bytes(max_len, next_size);
assign cross_4k = ({1'b0, next_bytes} + {4'b0, src_axi_addr[11:0]}) >= 16'd4096;
assign bytes_to_4k = cross_4k ? (15'd4096 - {3'b0, src_axi_addr[11:0]}) : next_bytes;
// Stage 2 combinatorial
assign num_bytes_comb = ({{17{1'b0}}, bytes_to_4k_r} <= pending_bytes)
? bytes_to_4k_r
: pending_bytes[14:0];
assign next_len_o = compute_next_len(num_bytes_comb, next_size);
// -------------------------------------------------------------------------
// Transfer-state registers
// -------------------------------------------------------------------------
logic [31:0] transfer_len;
logic [31:0] copied_bytes;
logic [7:0] next_arlen;
logic [7:0] next_awlen;
logic [14:0] burst_actual_bytes;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) begin
src_axi_addr <= '0;
dst_axi_addr <= '0;
next_arlen <= '0;
next_awlen <= '0;
max_len <= '0;
next_size <= '0;
copied_bytes <= '0;
transfer_len <= '0;
bytes_to_4k_r <= '0;
burst_actual_bytes <= '0;
end else if (wr_start_edge) begin
src_axi_addr <= ctrl_src_addr;
dst_axi_addr <= ctrl_dst_addr;
next_arlen <= ctrl_len;
next_awlen <= ctrl_len;
max_len <= ctrl_len;
next_size <= ctrl_size;
copied_bytes <= '0;
transfer_len <= ctrl_transfer_len;
burst_actual_bytes <= '0;
end else if (state == PREP1) begin
bytes_to_4k_r <= bytes_to_4k;
end else if (state == PREP2) begin
next_arlen <= next_len_o;
next_awlen <= next_len_o;
burst_actual_bytes <= num_bytes_comb;
end else if (state == AR && mm2mm_arready) begin
src_axi_addr <= src_axi_addr + {{(ADDR_WIDTH-15){1'b0}}, burst_actual_bytes};
end else if (state == AW && mm2mm_awready) begin
dst_axi_addr <= dst_axi_addr + {{(ADDR_WIDTH-15){1'b0}}, burst_actual_bytes};
copied_bytes <= copied_bytes + {17'b0, burst_actual_bytes};
end
// pending_bytes — decremented in AR state after burst_actual_bytes is registered.
// TIMING FIX: reg(burst_actual_bytes) → subtractor → reg(pending_bytes)
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
pending_bytes <= '0;
else if (wr_start_edge)
pending_bytes <= ctrl_transfer_len;
else if (state == AR && mm2mm_arready)
pending_bytes <= pending_bytes - {17'b0, burst_actual_bytes};
// -------------------------------------------------------------------------
// Transfer-done flag
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
transfer_done <= 1'b0;
else if (wr_start_edge || state == IDLE || state == AR ||
state == PREP1 || state == PREP2)
transfer_done <= 1'b0;
else
transfer_done <= (copied_bytes == transfer_len) && (copied_bytes != '0);
// -------------------------------------------------------------------------
// ctrl_done — single-cycle pulse when FSM transitions into IDLE
// -------------------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) ctrl_done <= 1'b0;
else ctrl_done <= (next_state == IDLE) && (state != IDLE);
// -------------------------------------------------------------------------
// Beat counter — drives wlast
// -------------------------------------------------------------------------
logic [7:0] beat_cnt;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
beat_cnt <= '0;
else if (state == AW && mm2mm_awready)
beat_cnt <= '0;
else if (mm2mm_wvalid && mm2mm_wready)
beat_cnt <= beat_cnt + 1'b1;
// -------------------------------------------------------------------------
// Write strobe generation for partial last beat
//
// partial_strb_mask uses a per-bit comparator loop rather than a hardcoded
// case statement, making it correct for any DATA_WIDTH (including 1024-bit).
// Synthesises as a parallel comparator tree with no barrel shifter.
// -------------------------------------------------------------------------
logic [14:0] bytes_in_burst;
logic [STRB_WIDTH-1:0] wstrb_mask;
logic [STRB_IDX_WIDTH-1:0] valid_bytes;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
bytes_in_burst <= '0;
else if (state == AW && mm2mm_awready)
bytes_in_burst <= burst_actual_bytes;
else if (mm2mm_wvalid && mm2mm_wready)
bytes_in_burst <= (bytes_in_burst > STRB_WIDTH[14:0])
? (bytes_in_burst - STRB_WIDTH[14:0])
: '0;
assign valid_bytes = (bytes_in_burst <= STRB_WIDTH[14:0])
? bytes_in_burst[STRB_IDX_WIDTH-1:0]
: STRB_WIDTH[STRB_IDX_WIDTH-1:0];
logic [STRB_WIDTH-1:0] partial_strb_mask;
always_comb begin
for (int i = 0; i < STRB_WIDTH; i++)
partial_strb_mask[i] = (i < valid_bytes);
end
always_comb begin
if (mm2mm_wlast && (bytes_in_burst < STRB_WIDTH[14:0]) && (bytes_in_burst != '0))
wstrb_mask = partial_strb_mask;
else if (bytes_in_burst == '0)
wstrb_mask = '0;
else
wstrb_mask = {STRB_WIDTH{1'b1}};
end
// -------------------------------------------------------------------------
// FIFO — bridges R channel (read path) to W channel (write path)
//
// R-channel data is accepted only in READ state to prevent beats arriving
// before READ is entered from being silently dropped (same fix as mm2s).
// The write side drains only in WRITE state for symmetric gating.
// -------------------------------------------------------------------------
logic [DATA_WIDTH-1:0] fifo_tdata;
logic fifo_tvalid;
logic fifo_tlast;
logic fifo_tuser;
logic fifo_s_tready;
assign mm2mm_rready = fifo_s_tready && (state == READ);
snix_axis_fifo #(
.DATA_WIDTH(DATA_WIDTH),
.FIFO_DEPTH(FIFO_DEPTH)
) axis_fifo_u0 (
.clk (clk),
.rst_n (rst_n),
.s_axis_tdata (mm2mm_rdata),
.s_axis_tlast (mm2mm_rlast),
.s_axis_tuser (1'b0),
.s_axis_tvalid (mm2mm_rvalid && (state == READ)),
.s_axis_tready (fifo_s_tready),
.m_axis_tdata (fifo_tdata),
.m_axis_tlast (fifo_tlast),
.m_axis_tuser (fifo_tuser),
.m_axis_tvalid (fifo_tvalid),
.m_axis_tready (mm2mm_wready && (state == WRITE))
);
// -------------------------------------------------------------------------
// AXI output assignments — AR channel
// -------------------------------------------------------------------------
assign mm2mm_arvalid = (state == AR);
assign mm2mm_araddr = src_axi_addr;
assign mm2mm_arlen = next_arlen;
assign mm2mm_arsize = next_size;
assign mm2mm_arburst = 2'b01;
assign mm2mm_arlock = 1'b0;
assign mm2mm_arcache = 4'b0;
assign mm2mm_arprot = 3'b0;
assign mm2mm_arqos = 4'b0;
assign mm2mm_arid = '0;
assign mm2mm_aruser = '0;
// -------------------------------------------------------------------------
// AXI output assignments — AW channel
// -------------------------------------------------------------------------
assign mm2mm_awvalid = (state == AW);
assign mm2mm_awaddr = dst_axi_addr;
assign mm2mm_awlen = next_awlen;
assign mm2mm_awsize = next_size;
assign mm2mm_awburst = 2'b01;
assign mm2mm_awlock = 1'b0;
assign mm2mm_awcache = 4'b0;
assign mm2mm_awprot = 3'b0;
assign mm2mm_awqos = 4'b0;
assign mm2mm_awid = '0;
assign mm2mm_awuser = '0;
// -------------------------------------------------------------------------
// AXI output assignments — W channel
// -------------------------------------------------------------------------
generate
for (genvar i = 0; i < STRB_WIDTH; i++) begin : gen_wdata_mask
assign mm2mm_wdata[i*8 +: 8] = wstrb_mask[i] ? fifo_tdata[i*8 +: 8] : 8'h00;
end
endgenerate
assign mm2mm_wvalid = (state == WRITE) && fifo_tvalid;
assign mm2mm_wlast = (beat_cnt == next_awlen) && mm2mm_wvalid;
assign mm2mm_wstrb = wstrb_mask;
assign mm2mm_wuser = '0;
// -------------------------------------------------------------------------
// AXI output assignments — B channel
// -------------------------------------------------------------------------
assign mm2mm_bready = (state == WAIT_BRESP);
// -------------------------------------------------------------------------
// Functions (identical to s2mm / mm2s)
// -------------------------------------------------------------------------
// Returns total byte count for an AXI burst: (len+1) << size.
function automatic [14:0] compute_num_bytes(
input logic [7:0] len,
input logic [2:0] size
);
case (size)
3'b000: compute_num_bytes = {7'b0, len + 1'b1};
3'b001: compute_num_bytes = {6'b0, len + 1'b1, 1'b0};
3'b010: compute_num_bytes = {5'b0, len + 1'b1, 2'b0};
3'b011: compute_num_bytes = {4'b0, len + 1'b1, 3'b0};
3'b100: compute_num_bytes = {3'b0, len + 1'b1, 4'b0};
3'b101: compute_num_bytes = {2'b0, len + 1'b1, 5'b0};
3'b110: compute_num_bytes = {1'b0, len + 1'b1, 6'b0};
3'b111: compute_num_bytes = { len + 1'b1, 7'b0};
endcase
endfunction
// Returns arlen/awlen = ceil(bytes / beat_size) - 1.
function automatic [7:0] compute_next_len(
input logic [14:0] bytes_i,
input logic [2:0] size
);
logic [14:0] num_beats;
case (size)
3'b000: num_beats = bytes_i;
3'b001: num_beats = (bytes_i + 15'd1) >> 1;
3'b010: num_beats = (bytes_i + 15'd3) >> 2;
3'b011: num_beats = (bytes_i + 15'd7) >> 3;
3'b100: num_beats = (bytes_i + 15'd15) >> 4;
3'b101: num_beats = (bytes_i + 15'd31) >> 5;
3'b110: num_beats = (bytes_i + 15'd63) >> 6;
3'b111: num_beats = (bytes_i + 15'd127) >> 7;
endcase
if (num_beats == 15'd0)
compute_next_len = 8'd0;
else
compute_next_len = num_beats[7:0] - 8'd1;
endfunction
endmodule : snix_axi_mm2mm

View File

@@ -0,0 +1,113 @@
// ============================================================================
// snix_axis_fifo.sv
//
// AXI4-Stream FIFO with two operating modes selected by FRAME_FIFO:
//
// FRAME_FIFO = 0 (default) — streaming / cut-through
// Output valid asserts as soon as any data enters the FIFO.
// Downstream can begin reading before the full packet arrives.
//
// FRAME_FIFO = 1 — store-and-forward
// Output valid is suppressed until the complete packet (through tlast)
// has been written into the FIFO. Guarantees the downstream never
// sees a stalled mid-packet transfer.
// ============================================================================
module snix_axis_fifo #(
parameter int DATA_WIDTH = 32,
parameter int USER_WIDTH = 1,
parameter int FIFO_DEPTH = 16,
parameter bit FRAME_FIFO = 0 // 0 = streaming, 1 = store-and-forward
) (
input logic clk,
input logic rst_n,
// AXI4-Stream slave (input)
input logic [DATA_WIDTH-1:0] s_axis_tdata,
input logic [USER_WIDTH-1:0] s_axis_tuser,
input logic s_axis_tvalid,
input logic s_axis_tlast,
output logic s_axis_tready,
// AXI4-Stream master (output)
output logic [DATA_WIDTH-1:0] m_axis_tdata,
output logic [USER_WIDTH-1:0] m_axis_tuser,
output logic m_axis_tvalid,
output logic m_axis_tlast,
input logic m_axis_tready
);
localparam int AWIDTH = $clog2(FIFO_DEPTH);
// Internal FIFO signals
logic [DATA_WIDTH-1:0] fifo_tdata;
logic [USER_WIDTH-1:0] fifo_tuser;
logic fifo_tlast;
logic wr_en, rd_en;
logic fifo_full, fifo_empty;
logic [AWIDTH:0] fill_cnt;
// Handshake strobes
assign s_axis_tready = ~fifo_full;
assign wr_en = s_axis_tvalid & s_axis_tready;
assign rd_en = m_axis_tvalid & m_axis_tready;
// Output mapping
assign m_axis_tdata = fifo_tdata;
assign m_axis_tuser = fifo_tuser;
assign m_axis_tlast = fifo_tlast;
// -------------------------------------------------------------------------
// Output valid generation
//
// FRAME_FIFO = 1 : store-and-forward FSM
// IDLE -> STREAM when tlast is written into the FIFO
// STREAM -> IDLE when tlast is read out of the FIFO
//
// FRAME_FIFO = 0 : cut-through, valid tracks fifo_empty directly
// -------------------------------------------------------------------------
generate
if (FRAME_FIFO) begin : gen_frame_mode
typedef enum logic { IDLE, STREAM } state_t;
state_t state, nxt;
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) state <= IDLE;
else state <= nxt;
always_comb begin
nxt = state;
case (state)
IDLE: if (wr_en && s_axis_tlast) nxt = STREAM;
STREAM: if (rd_en && fifo_tlast) nxt = IDLE;
endcase
end
assign m_axis_tvalid = ~fifo_empty & (state == STREAM);
end else begin : gen_stream_mode
assign m_axis_tvalid = ~fifo_empty;
end
endgenerate
// -------------------------------------------------------------------------
// Sync FIFO instance — packs {tdata, tuser, tlast} into one word
// -------------------------------------------------------------------------
snix_sync_fifo #(
.DATA_WIDTH (DATA_WIDTH + USER_WIDTH + 1),
.FIFO_DEPTH (FIFO_DEPTH)
) u_fifo (
.clk (clk),
.rst_n (rst_n),
.data_i ({s_axis_tdata, s_axis_tuser, s_axis_tlast}),
.wr_en (wr_en),
.rd_en (rd_en),
.data_o ({fifo_tdata, fifo_tuser, fifo_tlast}),
.fifo_full (fifo_full),
.fifo_empty (fifo_empty),
.fill_cnt (fill_cnt)
);
endmodule : snix_axis_fifo

View File

@@ -0,0 +1,79 @@
// ============================================================================
// snix_register_slice.sv
//
// Generic ready/valid register slice (skid buffer).
//
// Same architecture as snix_axis_register but without tuser/tlast —
// used internally by the DMA and CDMA engines for pipeline decoupling
// on raw data paths.
// ============================================================================
module snix_register_slice #(
parameter DATA_WIDTH = 32
) (
input logic clk,
input logic rst_n,
// Input interface
input logic [DATA_WIDTH-1:0] s_axis_tdata,
input logic s_axis_tvalid,
output logic s_axis_tready,
// Output interface
output logic [DATA_WIDTH-1:0] m_axis_tdata,
output logic m_axis_tvalid,
input logic m_axis_tready
);
// Skid register
logic skid_valid;
logic [DATA_WIDTH-1:0] skid_data;
wire s_hsk = s_axis_tvalid & s_axis_tready;
wire m_stall = m_axis_tvalid & ~m_axis_tready;
// -----------------------------------------------------------------
// Skid valid
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
skid_valid <= 1'b0;
else if (s_hsk & m_stall)
skid_valid <= 1'b1;
else if (m_axis_tready)
skid_valid <= 1'b0;
// -----------------------------------------------------------------
// Skid data
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
skid_data <= '0;
else if (s_hsk)
skid_data <= s_axis_tdata;
// Ready when skid is empty
assign s_axis_tready = ~skid_valid;
// -----------------------------------------------------------------
// Output valid
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
m_axis_tvalid <= 1'b0;
else if (~m_axis_tvalid | m_axis_tready)
m_axis_tvalid <= s_axis_tvalid | skid_valid;
// -----------------------------------------------------------------
// Output data — skid has priority
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
m_axis_tdata <= '0;
else if (~m_axis_tvalid | m_axis_tready) begin
if (skid_valid)
m_axis_tdata <= skid_data;
else if (s_axis_tvalid)
m_axis_tdata <= s_axis_tdata;
end
endmodule : snix_register_slice

View File

@@ -0,0 +1,130 @@
// ============================================================================
// snix_sync_fifo.sv
//
// Synchronous FIFO with first-word-fall-through (FWFT) bypass.
//
// Architecture:
// - Dual-pointer circular buffer with MSB wrap-bit for full detection
// - Write-through bypass path: when the FIFO is empty and a write arrives,
// data is forwarded directly to the output without a one-cycle read delay
// - Block-RAM inference hint for FPGA targets
// ============================================================================
module snix_sync_fifo #(
parameter int DATA_WIDTH = 32,
parameter int FIFO_DEPTH = 16
) (
input logic clk,
input logic rst_n,
// Write port
input logic [DATA_WIDTH-1:0] data_i,
input logic wr_en,
// Read port
input logic rd_en,
output logic [DATA_WIDTH-1:0] data_o,
// Status
output logic [$clog2(FIFO_DEPTH):0] fill_cnt,
output logic fifo_full,
output logic fifo_empty
);
localparam int AWIDTH = $clog2(FIFO_DEPTH);
// Storage
(* ram_style="block" *)
logic [DATA_WIDTH-1:0] mem [0:FIFO_DEPTH-1];
// Pointers — extra MSB for wrap-around detection
logic [AWIDTH:0] wptr, rptr;
// Bypass (FWFT) path
logic [DATA_WIDTH-1:0] fwd_data;
logic fwd_valid;
// Qualified strobes
wire do_wr = wr_en & ~fifo_full;
wire do_rd = rd_en & ~fifo_empty;
// -----------------------------------------------------------------
// Write pointer
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
wptr <= '0;
else if (do_wr)
wptr <= wptr + 1'b1;
// -----------------------------------------------------------------
// Read pointer
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
rptr <= '0;
else if (do_rd)
rptr <= rptr + 1'b1;
// -----------------------------------------------------------------
// Memory write
// -----------------------------------------------------------------
always_ff @(posedge clk)
if (do_wr)
mem[wptr[AWIDTH-1:0]] <= data_i;
// -----------------------------------------------------------------
// Memory read — pre-fetch next location for FWFT
// -----------------------------------------------------------------
logic [DATA_WIDTH-1:0] mem_rd;
always_ff @(posedge clk)
if (do_rd)
mem_rd <= mem[rptr[AWIDTH-1:0] + 1'b1];
// -----------------------------------------------------------------
// Fill counter and empty flag
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n) begin
fifo_empty <= 1'b1;
fill_cnt <= '0;
end else begin
case ({do_wr, do_rd})
2'b10: begin
fill_cnt <= fill_cnt + 1'b1;
fifo_empty <= 1'b0;
end
2'b01: begin
fill_cnt <= fill_cnt - 1'b1;
fifo_empty <= (fill_cnt <= 1);
end
default: ;
endcase
end
// Full flag — MSB of fill counter
assign fifo_full = fill_cnt[AWIDTH];
// -----------------------------------------------------------------
// FWFT bypass — forward write data directly when FIFO is empty
// -----------------------------------------------------------------
always_ff @(posedge clk or negedge rst_n)
if (!rst_n)
fwd_valid <= 1'b0;
else if (fifo_empty || rd_en) begin
if (!wr_en)
fwd_valid <= 1'b0;
else if (fifo_empty || (rd_en && fill_cnt == 1))
fwd_valid <= 1'b1;
else
fwd_valid <= 1'b0;
end
always_ff @(posedge clk)
if (fifo_empty || rd_en)
fwd_data <= data_i;
// Output mux — bypass path has priority
assign data_o = fwd_valid ? fwd_data : mem_rd;
endmodule : snix_sync_fifo