From 8bad8c21e5249dbf3d087f37bf7a35c75beaf58a Mon Sep 17 00:00:00 2001 From: FallenSigh Date: Sun, 12 Apr 2026 22:25:00 +0800 Subject: [PATCH] feat(ip): add AXI CDMA IP --- rtl/ip/cdma/snix_async_fifo.sv | 112 +++++++ rtl/ip/cdma/snix_axi_cdma.sv | 219 ++++++++++++++ rtl/ip/cdma/snix_axi_cdma_csr.sv | 192 ++++++++++++ rtl/ip/cdma/snix_axi_mm2mm.sv | 455 +++++++++++++++++++++++++++++ rtl/ip/cdma/snix_axis_fifo.sv | 113 +++++++ rtl/ip/cdma/snix_register_slice.sv | 79 +++++ rtl/ip/cdma/snix_sync_fifo.sv | 130 +++++++++ 7 files changed, 1300 insertions(+) create mode 100644 rtl/ip/cdma/snix_async_fifo.sv create mode 100644 rtl/ip/cdma/snix_axi_cdma.sv create mode 100644 rtl/ip/cdma/snix_axi_cdma_csr.sv create mode 100644 rtl/ip/cdma/snix_axi_mm2mm.sv create mode 100644 rtl/ip/cdma/snix_axis_fifo.sv create mode 100644 rtl/ip/cdma/snix_register_slice.sv create mode 100644 rtl/ip/cdma/snix_sync_fifo.sv diff --git a/rtl/ip/cdma/snix_async_fifo.sv b/rtl/ip/cdma/snix_async_fifo.sv new file mode 100644 index 0000000..e05597b --- /dev/null +++ b/rtl/ip/cdma/snix_async_fifo.sv @@ -0,0 +1,112 @@ +// ============================================================================ +// snix_async_fifo.sv +// +// SystemVerilog implementation of an async FIFO. If targeting FPGAs, this +// implementation is intended to infer BRAM. +// +// Largely based on ZipCPU's async FIFO work: +// https://zipcpu.com/blog/2018/07/06/afifo.html +// ============================================================================ +module snix_async_fifo #(parameter DATA_WIDTH = 8, + parameter FIFO_DEPTH = 4) + (input logic wclk, + input logic wrst_n, + input logic wr_en, + input logic [DATA_WIDTH-1:0] wdata, + output logic wfull, + + input logic rclk, + input logic rrst_n, + input logic rd_en, + output logic [DATA_WIDTH-1:0] rdata, + output logic [$clog2(FIFO_DEPTH):0] rd_n_items, + output logic rempty); + + localparam ADDR_W = $clog2(FIFO_DEPTH); + localparam NFF = 2; + + logic [ADDR_W:0] rd_addr, next_rd_addr; + logic [ADDR_W:0] wr_addr, next_wr_addr; + logic [ADDR_W:0] rgray, wgray; + logic [ADDR_W:0] rd_wgray, wr_rgray, rd_wbin;// rd_n_items; + + (* ASYNC_REG = "TRUE" *) logic [(ADDR_W+1)*(NFF-1)-1:0] rgray_cross, wgray_cross; + (* ram_style = "block" *) logic [DATA_WIDTH-1:0] mem [0:FIFO_DEPTH-1]; + + logic [DATA_WIDTH-1:0] lcl_rd_data; + logic lcl_read, lcl_rd_empty; + + assign next_wr_addr = wr_addr + 1'b1; + always_ff @(posedge wclk or negedge wrst_n) + if(!wrst_n) begin + wr_addr <= (ADDR_W+1)'(0); + wgray <= (ADDR_W+1)'(0); + end + else if(wr_en && !wfull) begin + wr_addr <= next_wr_addr; + wgray <= next_wr_addr ^ (next_wr_addr>>1); + end + + always_ff @(posedge rclk or negedge rrst_n) + if(!rrst_n) begin + rd_addr <= (ADDR_W+1)'(0); + rgray <= (ADDR_W+1)'(0); + end + else if(lcl_read && !lcl_rd_empty) begin + rd_addr <= next_rd_addr; + rgray <= next_rd_addr ^ (next_rd_addr>>1); + end + + + always_ff @(posedge wclk) + mem[wr_addr[ADDR_W-1:0]] <= wdata; + + + assign next_rd_addr = rd_addr + 1'b1; + assign lcl_rd_data = mem[rd_addr[ADDR_W-1:0]]; + + always_ff @(posedge wclk or negedge wrst_n) + if(!wrst_n) begin + wr_rgray <= (ADDR_W+1)'(0); + rgray_cross <= (ADDR_W+1)'(0); + end + else begin + {wr_rgray, rgray_cross} <= {rgray_cross, rgray}; + end + + always_ff @(posedge rclk or negedge rrst_n) + if(!rrst_n) begin + rd_wgray <= (ADDR_W+1)'(0); + wgray_cross <= (ADDR_W+1)'(0); + rd_n_items <= (ADDR_W+1)'(0); + end + else begin + {rd_wgray, wgray_cross} <= {wgray_cross, wgray}; + rd_n_items <= (rd_wbin - rd_addr) & ((1 << (ADDR_W + 1)) - 1); + end + + always_comb begin + rd_wbin[ADDR_W] = rd_wgray[ADDR_W]; + for (int i = ADDR_W-1; i >= 0; i--) begin + rd_wbin[i] = rd_wbin[i+1] ^ rd_wgray[i]; + end + end + + assign wfull = wr_rgray == {~wgray[ADDR_W], wgray[ADDR_W-1:0]}; + assign lcl_rd_empty = rd_wgray == rgray; + assign lcl_read = rempty || rd_en; + + always_ff @(posedge rclk or negedge rrst_n) + if(!rrst_n) begin + rempty <= 1'b1; + end + else if(lcl_read) begin + rempty <= lcl_rd_empty; + end + + always_ff @(posedge rclk) + if(lcl_read) + rdata <= lcl_rd_data; + + +endmodule: snix_async_fifo diff --git a/rtl/ip/cdma/snix_axi_cdma.sv b/rtl/ip/cdma/snix_axi_cdma.sv new file mode 100644 index 0000000..c09c6d0 --- /dev/null +++ b/rtl/ip/cdma/snix_axi_cdma.sv @@ -0,0 +1,219 @@ +// ============================================================================ +// snix_axi_cdma.sv +// Central DMA — memory-to-memory transfers via snix_axi_mm2mm +// +// Instantiates: +// snix_axi_cdma_csr — AXI-Lite register interface +// snix_axi_mm2mm — read-then-write AXI4 engine +// +// Software register map (see snix_axi_cdma_csr.sv for full detail): +// 0x00 CDMA_CTRL [0]=start [1]=stop [5:3]=size [13:6]=len +// 0x04 CDMA_NUM_BYTES [31:0]=transfer_len +// 0x08 CDMA_SRC_ADDR [31:0]=source base address +// 0x0C CDMA_DST_ADDR [31:0]=destination base address +// 0x10 STATUS [0]=done (sticky, read-only) +// ============================================================================ +module snix_axi_cdma #( + parameter int ADDR_WIDTH = 32, + parameter int DATA_WIDTH = 64, + parameter int AXIL_ADDR_WIDTH = 32, + parameter int AXIL_DATA_WIDTH = 32, + parameter int ID_WIDTH = 4, + parameter int USER_WIDTH = 1) + (// Global signals + input logic clk, + input logic rst_n, + // AXI-Lite CSR interface + input logic [AXIL_ADDR_WIDTH-1:0] s_axil_awaddr, + input logic s_axil_awvalid, + output logic s_axil_awready, + input logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata, + input logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb, + input logic s_axil_wvalid, + output logic s_axil_wready, + output logic [1:0] s_axil_bresp, + output logic s_axil_bvalid, + input logic s_axil_bready, + input logic [AXIL_ADDR_WIDTH-1:0] s_axil_araddr, + input logic s_axil_arvalid, + output logic s_axil_arready, + output logic [AXIL_DATA_WIDTH-1:0] s_axil_rdata, + output logic [1:0] s_axil_rresp, + output logic s_axil_rvalid, + input logic s_axil_rready, + // AXI4 memory port — AW channel + output logic [ID_WIDTH-1:0] mm2mm_awid, + output logic [ADDR_WIDTH-1:0] mm2mm_awaddr, + output logic [7:0] mm2mm_awlen, + output logic [2:0] mm2mm_awsize, + output logic [1:0] mm2mm_awburst, + output logic mm2mm_awlock, + output logic [3:0] mm2mm_awcache, + output logic [2:0] mm2mm_awprot, + output logic [3:0] mm2mm_awqos, + output logic [USER_WIDTH-1:0] mm2mm_awuser, + output logic mm2mm_awvalid, + input logic mm2mm_awready, + // W channel + output logic [DATA_WIDTH-1:0] mm2mm_wdata, + output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb, + output logic mm2mm_wlast, + output logic [USER_WIDTH-1:0] mm2mm_wuser, + output logic mm2mm_wvalid, + input logic mm2mm_wready, + // B channel + input logic [ID_WIDTH-1:0] mm2mm_bid, + input logic [1:0] mm2mm_bresp, + input logic [USER_WIDTH-1:0] mm2mm_buser, + input logic mm2mm_bvalid, + output logic mm2mm_bready, + // AR channel + output logic [ID_WIDTH-1:0] mm2mm_arid, + output logic [ADDR_WIDTH-1:0] mm2mm_araddr, + output logic [7:0] mm2mm_arlen, + output logic [2:0] mm2mm_arsize, + output logic [1:0] mm2mm_arburst, + output logic mm2mm_arlock, + output logic [3:0] mm2mm_arcache, + output logic [2:0] mm2mm_arprot, + output logic [3:0] mm2mm_arqos, + output logic [USER_WIDTH-1:0] mm2mm_aruser, + output logic mm2mm_arvalid, + input logic mm2mm_arready, + // R channel + input logic [ID_WIDTH-1:0] mm2mm_rid, + input logic [DATA_WIDTH-1:0] mm2mm_rdata, + input logic [1:0] mm2mm_rresp, + input logic mm2mm_rlast, + input logic [USER_WIDTH-1:0] mm2mm_ruser, + input logic mm2mm_rvalid, + output logic mm2mm_rready, + + output logic dma_finish); + +localparam int NUM_REGS = 8; +localparam int FIFO_DEPTH = 16; + +localparam int CDMA_CTRL_IDX = 0; +localparam int CDMA_NUM_BYTES_IDX = 1; +localparam int CDMA_SRC_ADDR_IDX = 2; +localparam int CDMA_DST_ADDR_IDX = 3; +// Index 4 = STATUS — write-protected inside snix_axi_cdma_csr + + logic [NUM_REGS-1:0][AXIL_DATA_WIDTH-1:0] config_status_reg; + logic [AXIL_DATA_WIDTH-1:0] read_status_reg; + +// ------------------------------------------------------------------------- +// Decode control registers +// ------------------------------------------------------------------------- + logic ctrl_start, ctrl_stop; + logic [2:0] ctrl_size; + logic [7:0] ctrl_len; + logic [31:0] ctrl_transfer_len; + logic [ADDR_WIDTH-1:0] ctrl_src_addr, ctrl_dst_addr; + logic ctrl_done; + +assign ctrl_start = config_status_reg[CDMA_CTRL_IDX][0]; +assign ctrl_stop = config_status_reg[CDMA_CTRL_IDX][1]; +assign ctrl_size = config_status_reg[CDMA_CTRL_IDX][5:3]; +assign ctrl_len = config_status_reg[CDMA_CTRL_IDX][13:6]; +assign ctrl_transfer_len = config_status_reg[CDMA_NUM_BYTES_IDX]; +assign ctrl_src_addr = config_status_reg[CDMA_SRC_ADDR_IDX][ADDR_WIDTH-1:0]; +assign ctrl_dst_addr = config_status_reg[CDMA_DST_ADDR_IDX][ADDR_WIDTH-1:0]; + +assign read_status_reg = {{(AXIL_DATA_WIDTH-1){1'b0}}, ctrl_done}; +assign dma_finish = ctrl_done; +// ------------------------------------------------------------------------- +// CSR +// ------------------------------------------------------------------------- +snix_axi_cdma_csr #( + .DATA_WIDTH(AXIL_DATA_WIDTH), + .ADDR_WIDTH(AXIL_ADDR_WIDTH), + .NUM_REGS (NUM_REGS)) +cdma_csr ( + .clk (clk), + .rst_n (rst_n), + .s_axil_awaddr (s_axil_awaddr), + .s_axil_awvalid (s_axil_awvalid), + .s_axil_awready (s_axil_awready), + .s_axil_wdata (s_axil_wdata), + .s_axil_wstrb (s_axil_wstrb), + .s_axil_wvalid (s_axil_wvalid), + .s_axil_wready (s_axil_wready), + .s_axil_bresp (s_axil_bresp), + .s_axil_bvalid (s_axil_bvalid), + .s_axil_bready (s_axil_bready), + .s_axil_araddr (s_axil_araddr), + .s_axil_arvalid (s_axil_arvalid), + .s_axil_arready (s_axil_arready), + .s_axil_rdata (s_axil_rdata), + .s_axil_rresp (s_axil_rresp), + .s_axil_rvalid (s_axil_rvalid), + .s_axil_rready (s_axil_rready), + .read_status_reg (read_status_reg), + .config_status_reg (config_status_reg)); + +// ------------------------------------------------------------------------- +// MM2MM engine +// ------------------------------------------------------------------------- +snix_axi_mm2mm #( + .ADDR_WIDTH(ADDR_WIDTH), + .DATA_WIDTH(DATA_WIDTH), + .ID_WIDTH (ID_WIDTH), + .USER_WIDTH(USER_WIDTH), + .FIFO_DEPTH(FIFO_DEPTH)) +axi_mm2mm ( + .clk (clk), + .rst_n (rst_n), + .ctrl_start (ctrl_start), + .ctrl_stop (ctrl_stop), + .ctrl_src_addr (ctrl_src_addr), + .ctrl_dst_addr (ctrl_dst_addr), + .ctrl_len (ctrl_len), + .ctrl_size (ctrl_size), + .ctrl_transfer_len(ctrl_transfer_len), + .ctrl_done (ctrl_done), + .mm2mm_awid (mm2mm_awid), + .mm2mm_awaddr (mm2mm_awaddr), + .mm2mm_awlen (mm2mm_awlen), + .mm2mm_awsize (mm2mm_awsize), + .mm2mm_awburst (mm2mm_awburst), + .mm2mm_awlock (mm2mm_awlock), + .mm2mm_awcache (mm2mm_awcache), + .mm2mm_awprot (mm2mm_awprot), + .mm2mm_awqos (mm2mm_awqos), + .mm2mm_awuser (mm2mm_awuser), + .mm2mm_awvalid (mm2mm_awvalid), + .mm2mm_awready (mm2mm_awready), + .mm2mm_wdata (mm2mm_wdata), + .mm2mm_wstrb (mm2mm_wstrb), + .mm2mm_wlast (mm2mm_wlast), + .mm2mm_wuser (mm2mm_wuser), + .mm2mm_wvalid (mm2mm_wvalid), + .mm2mm_wready (mm2mm_wready), + .mm2mm_bid (mm2mm_bid), + .mm2mm_bresp (mm2mm_bresp), + .mm2mm_buser (mm2mm_buser), + .mm2mm_bvalid (mm2mm_bvalid), + .mm2mm_bready (mm2mm_bready), + .mm2mm_arid (mm2mm_arid), + .mm2mm_araddr (mm2mm_araddr), + .mm2mm_arlen (mm2mm_arlen), + .mm2mm_arsize (mm2mm_arsize), + .mm2mm_arburst (mm2mm_arburst), + .mm2mm_arlock (mm2mm_arlock), + .mm2mm_arcache (mm2mm_arcache), + .mm2mm_arprot (mm2mm_arprot), + .mm2mm_arqos (mm2mm_arqos), + .mm2mm_aruser (mm2mm_aruser), + .mm2mm_arvalid (mm2mm_arvalid), + .mm2mm_arready (mm2mm_arready), + .mm2mm_rid (mm2mm_rid), + .mm2mm_rdata (mm2mm_rdata), + .mm2mm_rresp (mm2mm_rresp), + .mm2mm_rlast (mm2mm_rlast), + .mm2mm_ruser (mm2mm_ruser), + .mm2mm_rvalid (mm2mm_rvalid), + .mm2mm_rready (mm2mm_rready)); + +endmodule : snix_axi_cdma diff --git a/rtl/ip/cdma/snix_axi_cdma_csr.sv b/rtl/ip/cdma/snix_axi_cdma_csr.sv new file mode 100644 index 0000000..880cf01 --- /dev/null +++ b/rtl/ip/cdma/snix_axi_cdma_csr.sv @@ -0,0 +1,192 @@ +// ============================================================================ +// snix_axi_cdma_csr.sv +// AXI-Lite CSR for snix_axi_cdma (memory-to-memory central DMA) +// +// Register Map (word-addressed, 32-bit registers): +// +// Offset Index Name Bits +// 0x00 0 CDMA_CTRL [0] = start (write-1 pulse) +// [1] = stop (write-1 pulse) +// [5:3] = size (AXI AxSIZE) +// [13:6] = len (AXI AxLEN) +// [31:14] = reserved +// 0x04 1 CDMA_NUM_BYTES [31:0] = transfer_len (byte count) +// 0x08 2 CDMA_SRC_ADDR [31:0] = source base address +// 0x0C 3 CDMA_DST_ADDR [31:0] = destination base address +// 0x10 4 STATUS (read-only) [0] = done (sticky; cleared on start) +// [31:1] = reserved +// 0x14– 5–7 Reserved +// +// Differences from snix_axi_dma_csr: +// - Single CTRL register (no separate WR/RD paths) +// - Separate SRC_ADDR and DST_ADDR registers +// - STATUS[0] is write-protected; hardware sets it, start clears it +// ============================================================================ +module snix_axi_cdma_csr #( + parameter int DATA_WIDTH = 32, + parameter int ADDR_WIDTH = 4, + parameter int NUM_REGS = 8) + (input logic clk, + input logic rst_n, + // AXI-Lite interface + input logic [ADDR_WIDTH-1:0] s_axil_awaddr, + input logic s_axil_awvalid, + output logic s_axil_awready, + input logic [DATA_WIDTH-1:0] s_axil_wdata, + input logic [DATA_WIDTH/8-1:0] s_axil_wstrb, + input logic s_axil_wvalid, + output logic s_axil_wready, + output logic [1:0] s_axil_bresp, + output logic s_axil_bvalid, + input logic s_axil_bready, + input logic [ADDR_WIDTH-1:0] s_axil_araddr, + input logic s_axil_arvalid, + output logic s_axil_arready, + output logic [DATA_WIDTH-1:0] s_axil_rdata, + output logic [1:0] s_axil_rresp, + output logic s_axil_rvalid, + input logic s_axil_rready, + // Status from mm2mm engine: [0] = ctrl_done (single-cycle pulse) + input logic [DATA_WIDTH-1:0] read_status_reg, + // Register file + output logic [NUM_REGS-1:0][DATA_WIDTH-1:0] config_status_reg); + +localparam int AXIL_DATA_WIDTH = DATA_WIDTH; +localparam int AXIL_ADDR_WIDTH = ADDR_WIDTH; +localparam int ADDRLSB = $clog2(AXIL_DATA_WIDTH / 8); +localparam int REG_INDEX_WIDTH = $clog2(NUM_REGS); + +localparam int CDMA_CTRL_IDX = 0; // 0x00 used: pulse-clear start/stop bits +//localparam int CDMA_NUM_BYTES_IDX = 1; // 0x04 decoded in snix_axi_cdma top +//localparam int CDMA_SRC_ADDR_IDX = 2; // 0x08 decoded in snix_axi_cdma top +//localparam int CDMA_DST_ADDR_IDX = 3; // 0x0C decoded in snix_axi_cdma top +localparam int STATUS_IDX = 4; // 0x10 used: write-protect + done latch + +// ------------------------------------------------------------------------- +// AXI-Lite skid-buffer register slices (same topology as snix_axi_dma_csr) +// ------------------------------------------------------------------------- + logic s_axil_awvalid_reg; + logic [AXIL_ADDR_WIDTH-ADDRLSB-1:0] s_axil_awaddr_reg; + logic s_axil_arvalid_reg; + logic [AXIL_ADDR_WIDTH-ADDRLSB-1:0] s_axil_araddr_reg; + logic s_axil_wvalid_reg; + logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata_reg; + logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb_reg; + + logic s_axil_write_ready, s_axil_read_ready; + logic [31:0] awaddr_index, araddr_index; + +assign s_axil_write_ready = s_axil_awvalid_reg & s_axil_wvalid_reg & + (!s_axil_bvalid | s_axil_bready); +assign s_axil_read_ready = s_axil_arvalid_reg & (!s_axil_rvalid | s_axil_rready); + +assign awaddr_index = {{(32-REG_INDEX_WIDTH){1'b0}}, s_axil_awaddr_reg[REG_INDEX_WIDTH-1:0]}; +assign araddr_index = {{(32-REG_INDEX_WIDTH){1'b0}}, s_axil_araddr_reg[REG_INDEX_WIDTH-1:0]}; + +snix_register_slice #(.DATA_WIDTH(AXIL_ADDR_WIDTH - ADDRLSB)) reg_slice_u0 ( + .clk (clk), + .rst_n (rst_n), + .s_axis_tdata (s_axil_awaddr[AXIL_ADDR_WIDTH-1:ADDRLSB]), + .s_axis_tvalid(s_axil_awvalid), + .s_axis_tready(s_axil_awready), + .m_axis_tdata (s_axil_awaddr_reg), + .m_axis_tvalid(s_axil_awvalid_reg), + .m_axis_tready(s_axil_write_ready)); + +snix_register_slice #(.DATA_WIDTH(AXIL_DATA_WIDTH + AXIL_DATA_WIDTH/8)) reg_slice_u1 ( + .clk (clk), + .rst_n (rst_n), + .s_axis_tdata ({s_axil_wdata, s_axil_wstrb}), + .s_axis_tvalid(s_axil_wvalid), + .s_axis_tready(s_axil_wready), + .m_axis_tdata ({s_axil_wdata_reg, s_axil_wstrb_reg}), + .m_axis_tvalid(s_axil_wvalid_reg), + .m_axis_tready(s_axil_write_ready)); + +snix_register_slice #(.DATA_WIDTH(AXIL_ADDR_WIDTH - ADDRLSB)) reg_slice_u2 ( + .clk (clk), + .rst_n (rst_n), + .s_axis_tdata (s_axil_araddr[AXIL_ADDR_WIDTH-1:ADDRLSB]), + .s_axis_tvalid(s_axil_arvalid), + .s_axis_tready(s_axil_arready), + .m_axis_tdata (s_axil_araddr_reg), + .m_axis_tvalid(s_axil_arvalid_reg), + .m_axis_tready(s_axil_read_ready)); + +// ------------------------------------------------------------------------- +// B channel +// ------------------------------------------------------------------------- +always_ff @(posedge clk or negedge rst_n) + if (!rst_n) s_axil_bvalid <= 1'b0; + else if (s_axil_write_ready) s_axil_bvalid <= 1'b1; + else if (s_axil_bready) s_axil_bvalid <= 1'b0; + +// ------------------------------------------------------------------------- +// R channel +// ------------------------------------------------------------------------- +always_ff @(posedge clk or negedge rst_n) + if (!rst_n) s_axil_rvalid <= 1'b0; + else if (s_axil_read_ready) s_axil_rvalid <= 1'b1; + else if (s_axil_rready) s_axil_rvalid <= 1'b0; + +// ------------------------------------------------------------------------- +// Control / status signal aliases (combinatorial from register file) +// ------------------------------------------------------------------------- + logic ctrl_start, ctrl_stop, ctrl_done; + +assign ctrl_start = config_status_reg[CDMA_CTRL_IDX][0]; +assign ctrl_stop = config_status_reg[CDMA_CTRL_IDX][1]; +assign ctrl_done = read_status_reg[0]; + +// ------------------------------------------------------------------------- +// Register file +// +// Write priority (highest to lowest within an always_ff block): +// 1. AXI-Lite write (byte-enable; STATUS_IDX is write-protected) +// 2. Pulse-clear: start[0] and stop[1] are single-cycle strobes +// 3. STATUS latch: done bit set by hardware, cleared when start fires +// +// Note on NBA priority: later assignments in program order win, so +// "config_status_reg <= config_status_reg" is safely overridden by +// the per-bit assignments that follow in the else branch. +// ------------------------------------------------------------------------- +always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + config_status_reg <= '0; + + end else if (s_axil_write_ready && + awaddr_index < NUM_REGS && + awaddr_index != STATUS_IDX) begin // STATUS is read-only + for (int i = 0; i < AXIL_DATA_WIDTH/8; i++) begin + config_status_reg[awaddr_index][8*i +: 8] <= + s_axil_wstrb_reg[i] ? s_axil_wdata_reg[8*i +: 8] + : config_status_reg[awaddr_index][8*i +: 8]; + end + + end else begin + config_status_reg <= config_status_reg; + + // Pulse-clear: start and stop are one-cycle strobes + if (ctrl_start) config_status_reg[CDMA_CTRL_IDX][0] <= 1'b0; + if (ctrl_stop) config_status_reg[CDMA_CTRL_IDX][1] <= 1'b0; + + // STATUS[0]: cleared when a new transfer starts; set sticky on done + if (ctrl_start) config_status_reg[STATUS_IDX][0] <= 1'b0; + if (ctrl_done) config_status_reg[STATUS_IDX][0] <= 1'b1; + end +end + +// ------------------------------------------------------------------------- +// Read data mux +// ------------------------------------------------------------------------- +always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) + s_axil_rdata <= '0; + else if (s_axil_read_ready && araddr_index < NUM_REGS) + s_axil_rdata <= config_status_reg[araddr_index]; +end + +assign s_axil_bresp = 2'b00; +assign s_axil_rresp = 2'b00; + +endmodule : snix_axi_cdma_csr diff --git a/rtl/ip/cdma/snix_axi_mm2mm.sv b/rtl/ip/cdma/snix_axi_mm2mm.sv new file mode 100644 index 0000000..4be1b3e --- /dev/null +++ b/rtl/ip/cdma/snix_axi_mm2mm.sv @@ -0,0 +1,455 @@ +module snix_axi_mm2mm #(parameter int ADDR_WIDTH = 32, + parameter int DATA_WIDTH = 64, + parameter int ID_WIDTH = 4, + parameter int USER_WIDTH = 1, + parameter int FIFO_DEPTH = 16) + (// Global signals + input logic clk, + input logic rst_n, + + // Control interface + input logic ctrl_start, + input logic ctrl_stop, + input logic [ADDR_WIDTH-1:0] ctrl_src_addr, + input logic [ADDR_WIDTH-1:0] ctrl_dst_addr, + input logic [7:0] ctrl_len, + input logic [2:0] ctrl_size, + input logic [31:0] ctrl_transfer_len, + output logic ctrl_done, + + // AW Channel + output logic [ID_WIDTH-1:0] mm2mm_awid, + output logic [ADDR_WIDTH-1:0] mm2mm_awaddr, + output logic [7:0] mm2mm_awlen, + output logic [2:0] mm2mm_awsize, + output logic [1:0] mm2mm_awburst, + output logic mm2mm_awlock, + output logic [3:0] mm2mm_awcache, + output logic [2:0] mm2mm_awprot, + output logic [3:0] mm2mm_awqos, + output logic [USER_WIDTH-1:0] mm2mm_awuser, + output logic mm2mm_awvalid, + input logic mm2mm_awready, + + // W Channel + output logic [DATA_WIDTH-1:0] mm2mm_wdata, + output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb, + output logic mm2mm_wlast, + output logic [USER_WIDTH-1:0] mm2mm_wuser, + output logic mm2mm_wvalid, + input logic mm2mm_wready, + + // B Channel + input logic [ID_WIDTH-1:0] mm2mm_bid, + input logic [1:0] mm2mm_bresp, + input logic [USER_WIDTH-1:0] mm2mm_buser, + input logic mm2mm_bvalid, + output logic mm2mm_bready, + + // AR Channel + output logic [ID_WIDTH-1:0] mm2mm_arid, + output logic [ADDR_WIDTH-1:0] mm2mm_araddr, + output logic [7:0] mm2mm_arlen, + output logic [2:0] mm2mm_arsize, + output logic [1:0] mm2mm_arburst, + output logic mm2mm_arlock, + output logic [3:0] mm2mm_arcache, + output logic [2:0] mm2mm_arprot, + output logic [3:0] mm2mm_arqos, + output logic [USER_WIDTH-1:0] mm2mm_aruser, + output logic mm2mm_arvalid, + input logic mm2mm_arready, + + // R Channel + input logic [ID_WIDTH-1:0] mm2mm_rid, + input logic [DATA_WIDTH-1:0] mm2mm_rdata, + input logic [1:0] mm2mm_rresp, + input logic mm2mm_rlast, + input logic [USER_WIDTH-1:0] mm2mm_ruser, + input logic mm2mm_rvalid, + output logic mm2mm_rready + ); + + // ------------------------------------------------------------------------- + // Local parameters + // ------------------------------------------------------------------------- + localparam int STRB_WIDTH = DATA_WIDTH / 8; + localparam int STRB_IDX_WIDTH = $clog2(STRB_WIDTH) + 1; // +1 to hold full-width value + + // ------------------------------------------------------------------------- + // Start / stop edge detection + // ------------------------------------------------------------------------- + logic ctrl_start_r, wr_start_edge; + logic ctrl_stop_r, wr_stop_edge; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + ctrl_start_r <= 1'b0; + ctrl_stop_r <= 1'b0; + end else begin + ctrl_start_r <= ctrl_start; + ctrl_stop_r <= ctrl_stop; + end + + assign wr_start_edge = ctrl_start & ~ctrl_start_r; + assign wr_stop_edge = ctrl_stop & ~ctrl_stop_r; + + // ------------------------------------------------------------------------- + // Abort latch — set on stop edge, cleared on start edge + // ------------------------------------------------------------------------- + logic wr_abort; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) wr_abort <= 1'b0; + else if (wr_stop_edge) wr_abort <= 1'b1; + else if (wr_start_edge) wr_abort <= 1'b0; + + // ------------------------------------------------------------------------- + // FSM + // ------------------------------------------------------------------------- + typedef enum logic [2:0] {IDLE, PREP1, PREP2, AR, READ, AW, WRITE, WAIT_BRESP} state_t; + state_t state, next_state; + + logic transfer_done; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) state <= IDLE; + else state <= next_state; + + always_comb begin + next_state = state; // default holds state; prevents latches + case (state) + IDLE: begin + next_state = (ctrl_start && !wr_abort) ? PREP1 : IDLE; + end + PREP1: begin + next_state = wr_abort ? IDLE : PREP2; + end + PREP2: begin + next_state = AR; + end + AR: begin + if (wr_abort) + next_state = IDLE; + else + next_state = (mm2mm_arvalid && mm2mm_arready) ? READ : AR; + end + READ: begin + next_state = (mm2mm_rvalid && mm2mm_rready && mm2mm_rlast) ? AW : READ; + end + AW: begin + if (wr_abort) + next_state = IDLE; + else + next_state = (mm2mm_awvalid && mm2mm_awready) ? WRITE : AW; + end + WRITE: begin + next_state = (mm2mm_wvalid && mm2mm_wready && mm2mm_wlast) ? WAIT_BRESP : WRITE; + end + WAIT_BRESP: begin + if (mm2mm_bvalid && mm2mm_bready) begin + if (wr_abort || transfer_done) + next_state = IDLE; + else + next_state = PREP1; + end + end + default: ; + endcase + end + + // ------------------------------------------------------------------------- + // 4K boundary & burst-length computation — pipelined across PREP1 / PREP2 + // + // Identical two-stage pipeline to s2mm / mm2s. The src address is used + // for the 4K check; burst_actual_bytes is applied to both pointers so + // they stay in lock-step. + // + // PREP1 [Stage 1]: max_len, next_size, src_axi_addr (all regs) + // → next_bytes → cross_4k → bytes_to_4k + // → register bytes_to_4k_r + // + // PREP2 [Stage 2]: bytes_to_4k_r, pending_bytes, next_size (all regs) + // → num_bytes_comb → next_len_o + // → register next_arlen / next_awlen | burst_actual_bytes + // ------------------------------------------------------------------------- + logic [7:0] max_len; + logic [2:0] next_size; + logic [ADDR_WIDTH-1:0] src_axi_addr; + logic [ADDR_WIDTH-1:0] dst_axi_addr; + logic [31:0] pending_bytes; + + // Stage 1 wires + logic [14:0] next_bytes; + logic cross_4k; + logic [14:0] bytes_to_4k; + // Stage 1 → Stage 2 pipeline register + logic [14:0] bytes_to_4k_r; + // Stage 2 wires + logic [14:0] num_bytes_comb; + logic [7:0] next_len_o; + + // Stage 1 combinatorial + assign next_bytes = compute_num_bytes(max_len, next_size); + assign cross_4k = ({1'b0, next_bytes} + {4'b0, src_axi_addr[11:0]}) >= 16'd4096; + assign bytes_to_4k = cross_4k ? (15'd4096 - {3'b0, src_axi_addr[11:0]}) : next_bytes; + + // Stage 2 combinatorial + assign num_bytes_comb = ({{17{1'b0}}, bytes_to_4k_r} <= pending_bytes) + ? bytes_to_4k_r + : pending_bytes[14:0]; + assign next_len_o = compute_next_len(num_bytes_comb, next_size); + + // ------------------------------------------------------------------------- + // Transfer-state registers + // ------------------------------------------------------------------------- + logic [31:0] transfer_len; + logic [31:0] copied_bytes; + logic [7:0] next_arlen; + logic [7:0] next_awlen; + logic [14:0] burst_actual_bytes; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + src_axi_addr <= '0; + dst_axi_addr <= '0; + next_arlen <= '0; + next_awlen <= '0; + max_len <= '0; + next_size <= '0; + copied_bytes <= '0; + transfer_len <= '0; + bytes_to_4k_r <= '0; + burst_actual_bytes <= '0; + end else if (wr_start_edge) begin + src_axi_addr <= ctrl_src_addr; + dst_axi_addr <= ctrl_dst_addr; + next_arlen <= ctrl_len; + next_awlen <= ctrl_len; + max_len <= ctrl_len; + next_size <= ctrl_size; + copied_bytes <= '0; + transfer_len <= ctrl_transfer_len; + burst_actual_bytes <= '0; + end else if (state == PREP1) begin + bytes_to_4k_r <= bytes_to_4k; + end else if (state == PREP2) begin + next_arlen <= next_len_o; + next_awlen <= next_len_o; + burst_actual_bytes <= num_bytes_comb; + end else if (state == AR && mm2mm_arready) begin + src_axi_addr <= src_axi_addr + {{(ADDR_WIDTH-15){1'b0}}, burst_actual_bytes}; + end else if (state == AW && mm2mm_awready) begin + dst_axi_addr <= dst_axi_addr + {{(ADDR_WIDTH-15){1'b0}}, burst_actual_bytes}; + copied_bytes <= copied_bytes + {17'b0, burst_actual_bytes}; + end + + // pending_bytes — decremented in AR state after burst_actual_bytes is registered. + // TIMING FIX: reg(burst_actual_bytes) → subtractor → reg(pending_bytes) + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + pending_bytes <= '0; + else if (wr_start_edge) + pending_bytes <= ctrl_transfer_len; + else if (state == AR && mm2mm_arready) + pending_bytes <= pending_bytes - {17'b0, burst_actual_bytes}; + + // ------------------------------------------------------------------------- + // Transfer-done flag + // ------------------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + transfer_done <= 1'b0; + else if (wr_start_edge || state == IDLE || state == AR || + state == PREP1 || state == PREP2) + transfer_done <= 1'b0; + else + transfer_done <= (copied_bytes == transfer_len) && (copied_bytes != '0); + + // ------------------------------------------------------------------------- + // ctrl_done — single-cycle pulse when FSM transitions into IDLE + // ------------------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) ctrl_done <= 1'b0; + else ctrl_done <= (next_state == IDLE) && (state != IDLE); + + // ------------------------------------------------------------------------- + // Beat counter — drives wlast + // ------------------------------------------------------------------------- + logic [7:0] beat_cnt; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + beat_cnt <= '0; + else if (state == AW && mm2mm_awready) + beat_cnt <= '0; + else if (mm2mm_wvalid && mm2mm_wready) + beat_cnt <= beat_cnt + 1'b1; + + // ------------------------------------------------------------------------- + // Write strobe generation for partial last beat + // + // partial_strb_mask uses a per-bit comparator loop rather than a hardcoded + // case statement, making it correct for any DATA_WIDTH (including 1024-bit). + // Synthesises as a parallel comparator tree with no barrel shifter. + // ------------------------------------------------------------------------- + logic [14:0] bytes_in_burst; + logic [STRB_WIDTH-1:0] wstrb_mask; + logic [STRB_IDX_WIDTH-1:0] valid_bytes; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + bytes_in_burst <= '0; + else if (state == AW && mm2mm_awready) + bytes_in_burst <= burst_actual_bytes; + else if (mm2mm_wvalid && mm2mm_wready) + bytes_in_burst <= (bytes_in_burst > STRB_WIDTH[14:0]) + ? (bytes_in_burst - STRB_WIDTH[14:0]) + : '0; + + assign valid_bytes = (bytes_in_burst <= STRB_WIDTH[14:0]) + ? bytes_in_burst[STRB_IDX_WIDTH-1:0] + : STRB_WIDTH[STRB_IDX_WIDTH-1:0]; + + logic [STRB_WIDTH-1:0] partial_strb_mask; + + always_comb begin + for (int i = 0; i < STRB_WIDTH; i++) + partial_strb_mask[i] = (i < valid_bytes); + end + + always_comb begin + if (mm2mm_wlast && (bytes_in_burst < STRB_WIDTH[14:0]) && (bytes_in_burst != '0)) + wstrb_mask = partial_strb_mask; + else if (bytes_in_burst == '0) + wstrb_mask = '0; + else + wstrb_mask = {STRB_WIDTH{1'b1}}; + end + + // ------------------------------------------------------------------------- + // FIFO — bridges R channel (read path) to W channel (write path) + // + // R-channel data is accepted only in READ state to prevent beats arriving + // before READ is entered from being silently dropped (same fix as mm2s). + // The write side drains only in WRITE state for symmetric gating. + // ------------------------------------------------------------------------- + logic [DATA_WIDTH-1:0] fifo_tdata; + logic fifo_tvalid; + logic fifo_tlast; + logic fifo_tuser; + logic fifo_s_tready; + + assign mm2mm_rready = fifo_s_tready && (state == READ); + + snix_axis_fifo #( + .DATA_WIDTH(DATA_WIDTH), + .FIFO_DEPTH(FIFO_DEPTH) + ) axis_fifo_u0 ( + .clk (clk), + .rst_n (rst_n), + .s_axis_tdata (mm2mm_rdata), + .s_axis_tlast (mm2mm_rlast), + .s_axis_tuser (1'b0), + .s_axis_tvalid (mm2mm_rvalid && (state == READ)), + .s_axis_tready (fifo_s_tready), + .m_axis_tdata (fifo_tdata), + .m_axis_tlast (fifo_tlast), + .m_axis_tuser (fifo_tuser), + .m_axis_tvalid (fifo_tvalid), + .m_axis_tready (mm2mm_wready && (state == WRITE)) + ); + + // ------------------------------------------------------------------------- + // AXI output assignments — AR channel + // ------------------------------------------------------------------------- + assign mm2mm_arvalid = (state == AR); + assign mm2mm_araddr = src_axi_addr; + assign mm2mm_arlen = next_arlen; + assign mm2mm_arsize = next_size; + assign mm2mm_arburst = 2'b01; + assign mm2mm_arlock = 1'b0; + assign mm2mm_arcache = 4'b0; + assign mm2mm_arprot = 3'b0; + assign mm2mm_arqos = 4'b0; + assign mm2mm_arid = '0; + assign mm2mm_aruser = '0; + + // ------------------------------------------------------------------------- + // AXI output assignments — AW channel + // ------------------------------------------------------------------------- + assign mm2mm_awvalid = (state == AW); + assign mm2mm_awaddr = dst_axi_addr; + assign mm2mm_awlen = next_awlen; + assign mm2mm_awsize = next_size; + assign mm2mm_awburst = 2'b01; + assign mm2mm_awlock = 1'b0; + assign mm2mm_awcache = 4'b0; + assign mm2mm_awprot = 3'b0; + assign mm2mm_awqos = 4'b0; + assign mm2mm_awid = '0; + assign mm2mm_awuser = '0; + + // ------------------------------------------------------------------------- + // AXI output assignments — W channel + // ------------------------------------------------------------------------- + generate + for (genvar i = 0; i < STRB_WIDTH; i++) begin : gen_wdata_mask + assign mm2mm_wdata[i*8 +: 8] = wstrb_mask[i] ? fifo_tdata[i*8 +: 8] : 8'h00; + end + endgenerate + + assign mm2mm_wvalid = (state == WRITE) && fifo_tvalid; + assign mm2mm_wlast = (beat_cnt == next_awlen) && mm2mm_wvalid; + assign mm2mm_wstrb = wstrb_mask; + assign mm2mm_wuser = '0; + + // ------------------------------------------------------------------------- + // AXI output assignments — B channel + // ------------------------------------------------------------------------- + assign mm2mm_bready = (state == WAIT_BRESP); + + // ------------------------------------------------------------------------- + // Functions (identical to s2mm / mm2s) + // ------------------------------------------------------------------------- + + // Returns total byte count for an AXI burst: (len+1) << size. + function automatic [14:0] compute_num_bytes( + input logic [7:0] len, + input logic [2:0] size + ); + case (size) + 3'b000: compute_num_bytes = {7'b0, len + 1'b1}; + 3'b001: compute_num_bytes = {6'b0, len + 1'b1, 1'b0}; + 3'b010: compute_num_bytes = {5'b0, len + 1'b1, 2'b0}; + 3'b011: compute_num_bytes = {4'b0, len + 1'b1, 3'b0}; + 3'b100: compute_num_bytes = {3'b0, len + 1'b1, 4'b0}; + 3'b101: compute_num_bytes = {2'b0, len + 1'b1, 5'b0}; + 3'b110: compute_num_bytes = {1'b0, len + 1'b1, 6'b0}; + 3'b111: compute_num_bytes = { len + 1'b1, 7'b0}; + endcase + endfunction + + // Returns arlen/awlen = ceil(bytes / beat_size) - 1. + function automatic [7:0] compute_next_len( + input logic [14:0] bytes_i, + input logic [2:0] size + ); + logic [14:0] num_beats; + case (size) + 3'b000: num_beats = bytes_i; + 3'b001: num_beats = (bytes_i + 15'd1) >> 1; + 3'b010: num_beats = (bytes_i + 15'd3) >> 2; + 3'b011: num_beats = (bytes_i + 15'd7) >> 3; + 3'b100: num_beats = (bytes_i + 15'd15) >> 4; + 3'b101: num_beats = (bytes_i + 15'd31) >> 5; + 3'b110: num_beats = (bytes_i + 15'd63) >> 6; + 3'b111: num_beats = (bytes_i + 15'd127) >> 7; + endcase + if (num_beats == 15'd0) + compute_next_len = 8'd0; + else + compute_next_len = num_beats[7:0] - 8'd1; + endfunction + +endmodule : snix_axi_mm2mm diff --git a/rtl/ip/cdma/snix_axis_fifo.sv b/rtl/ip/cdma/snix_axis_fifo.sv new file mode 100644 index 0000000..ba6d824 --- /dev/null +++ b/rtl/ip/cdma/snix_axis_fifo.sv @@ -0,0 +1,113 @@ +// ============================================================================ +// snix_axis_fifo.sv +// +// AXI4-Stream FIFO with two operating modes selected by FRAME_FIFO: +// +// FRAME_FIFO = 0 (default) — streaming / cut-through +// Output valid asserts as soon as any data enters the FIFO. +// Downstream can begin reading before the full packet arrives. +// +// FRAME_FIFO = 1 — store-and-forward +// Output valid is suppressed until the complete packet (through tlast) +// has been written into the FIFO. Guarantees the downstream never +// sees a stalled mid-packet transfer. +// ============================================================================ +module snix_axis_fifo #( + parameter int DATA_WIDTH = 32, + parameter int USER_WIDTH = 1, + parameter int FIFO_DEPTH = 16, + parameter bit FRAME_FIFO = 0 // 0 = streaming, 1 = store-and-forward +) ( + input logic clk, + input logic rst_n, + + // AXI4-Stream slave (input) + input logic [DATA_WIDTH-1:0] s_axis_tdata, + input logic [USER_WIDTH-1:0] s_axis_tuser, + input logic s_axis_tvalid, + input logic s_axis_tlast, + output logic s_axis_tready, + + // AXI4-Stream master (output) + output logic [DATA_WIDTH-1:0] m_axis_tdata, + output logic [USER_WIDTH-1:0] m_axis_tuser, + output logic m_axis_tvalid, + output logic m_axis_tlast, + input logic m_axis_tready +); + + localparam int AWIDTH = $clog2(FIFO_DEPTH); + + // Internal FIFO signals + logic [DATA_WIDTH-1:0] fifo_tdata; + logic [USER_WIDTH-1:0] fifo_tuser; + logic fifo_tlast; + logic wr_en, rd_en; + logic fifo_full, fifo_empty; + logic [AWIDTH:0] fill_cnt; + + // Handshake strobes + assign s_axis_tready = ~fifo_full; + assign wr_en = s_axis_tvalid & s_axis_tready; + assign rd_en = m_axis_tvalid & m_axis_tready; + + // Output mapping + assign m_axis_tdata = fifo_tdata; + assign m_axis_tuser = fifo_tuser; + assign m_axis_tlast = fifo_tlast; + + // ------------------------------------------------------------------------- + // Output valid generation + // + // FRAME_FIFO = 1 : store-and-forward FSM + // IDLE -> STREAM when tlast is written into the FIFO + // STREAM -> IDLE when tlast is read out of the FIFO + // + // FRAME_FIFO = 0 : cut-through, valid tracks fifo_empty directly + // ------------------------------------------------------------------------- + generate + if (FRAME_FIFO) begin : gen_frame_mode + + typedef enum logic { IDLE, STREAM } state_t; + state_t state, nxt; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) state <= IDLE; + else state <= nxt; + + always_comb begin + nxt = state; + case (state) + IDLE: if (wr_en && s_axis_tlast) nxt = STREAM; + STREAM: if (rd_en && fifo_tlast) nxt = IDLE; + endcase + end + + assign m_axis_tvalid = ~fifo_empty & (state == STREAM); + + end else begin : gen_stream_mode + + assign m_axis_tvalid = ~fifo_empty; + + end + endgenerate + + // ------------------------------------------------------------------------- + // Sync FIFO instance — packs {tdata, tuser, tlast} into one word + // ------------------------------------------------------------------------- + snix_sync_fifo #( + .DATA_WIDTH (DATA_WIDTH + USER_WIDTH + 1), + .FIFO_DEPTH (FIFO_DEPTH) + ) u_fifo ( + .clk (clk), + .rst_n (rst_n), + .data_i ({s_axis_tdata, s_axis_tuser, s_axis_tlast}), + .wr_en (wr_en), + .rd_en (rd_en), + .data_o ({fifo_tdata, fifo_tuser, fifo_tlast}), + .fifo_full (fifo_full), + .fifo_empty (fifo_empty), + .fill_cnt (fill_cnt) + ); + +endmodule : snix_axis_fifo diff --git a/rtl/ip/cdma/snix_register_slice.sv b/rtl/ip/cdma/snix_register_slice.sv new file mode 100644 index 0000000..dedb4ad --- /dev/null +++ b/rtl/ip/cdma/snix_register_slice.sv @@ -0,0 +1,79 @@ +// ============================================================================ +// snix_register_slice.sv +// +// Generic ready/valid register slice (skid buffer). +// +// Same architecture as snix_axis_register but without tuser/tlast — +// used internally by the DMA and CDMA engines for pipeline decoupling +// on raw data paths. +// ============================================================================ +module snix_register_slice #( + parameter DATA_WIDTH = 32 +) ( + input logic clk, + input logic rst_n, + + // Input interface + input logic [DATA_WIDTH-1:0] s_axis_tdata, + input logic s_axis_tvalid, + output logic s_axis_tready, + + // Output interface + output logic [DATA_WIDTH-1:0] m_axis_tdata, + output logic m_axis_tvalid, + input logic m_axis_tready +); + + // Skid register + logic skid_valid; + logic [DATA_WIDTH-1:0] skid_data; + + wire s_hsk = s_axis_tvalid & s_axis_tready; + wire m_stall = m_axis_tvalid & ~m_axis_tready; + + // ----------------------------------------------------------------- + // Skid valid + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + skid_valid <= 1'b0; + else if (s_hsk & m_stall) + skid_valid <= 1'b1; + else if (m_axis_tready) + skid_valid <= 1'b0; + + // ----------------------------------------------------------------- + // Skid data + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + skid_data <= '0; + else if (s_hsk) + skid_data <= s_axis_tdata; + + // Ready when skid is empty + assign s_axis_tready = ~skid_valid; + + // ----------------------------------------------------------------- + // Output valid + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + m_axis_tvalid <= 1'b0; + else if (~m_axis_tvalid | m_axis_tready) + m_axis_tvalid <= s_axis_tvalid | skid_valid; + + // ----------------------------------------------------------------- + // Output data — skid has priority + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + m_axis_tdata <= '0; + else if (~m_axis_tvalid | m_axis_tready) begin + if (skid_valid) + m_axis_tdata <= skid_data; + else if (s_axis_tvalid) + m_axis_tdata <= s_axis_tdata; + end + +endmodule : snix_register_slice diff --git a/rtl/ip/cdma/snix_sync_fifo.sv b/rtl/ip/cdma/snix_sync_fifo.sv new file mode 100644 index 0000000..9a64927 --- /dev/null +++ b/rtl/ip/cdma/snix_sync_fifo.sv @@ -0,0 +1,130 @@ +// ============================================================================ +// snix_sync_fifo.sv +// +// Synchronous FIFO with first-word-fall-through (FWFT) bypass. +// +// Architecture: +// - Dual-pointer circular buffer with MSB wrap-bit for full detection +// - Write-through bypass path: when the FIFO is empty and a write arrives, +// data is forwarded directly to the output without a one-cycle read delay +// - Block-RAM inference hint for FPGA targets +// ============================================================================ +module snix_sync_fifo #( + parameter int DATA_WIDTH = 32, + parameter int FIFO_DEPTH = 16 +) ( + input logic clk, + input logic rst_n, + + // Write port + input logic [DATA_WIDTH-1:0] data_i, + input logic wr_en, + + // Read port + input logic rd_en, + output logic [DATA_WIDTH-1:0] data_o, + + // Status + output logic [$clog2(FIFO_DEPTH):0] fill_cnt, + output logic fifo_full, + output logic fifo_empty +); + + localparam int AWIDTH = $clog2(FIFO_DEPTH); + + // Storage + (* ram_style="block" *) + logic [DATA_WIDTH-1:0] mem [0:FIFO_DEPTH-1]; + + // Pointers — extra MSB for wrap-around detection + logic [AWIDTH:0] wptr, rptr; + + // Bypass (FWFT) path + logic [DATA_WIDTH-1:0] fwd_data; + logic fwd_valid; + + // Qualified strobes + wire do_wr = wr_en & ~fifo_full; + wire do_rd = rd_en & ~fifo_empty; + + // ----------------------------------------------------------------- + // Write pointer + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + wptr <= '0; + else if (do_wr) + wptr <= wptr + 1'b1; + + // ----------------------------------------------------------------- + // Read pointer + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + rptr <= '0; + else if (do_rd) + rptr <= rptr + 1'b1; + + // ----------------------------------------------------------------- + // Memory write + // ----------------------------------------------------------------- + always_ff @(posedge clk) + if (do_wr) + mem[wptr[AWIDTH-1:0]] <= data_i; + + // ----------------------------------------------------------------- + // Memory read — pre-fetch next location for FWFT + // ----------------------------------------------------------------- + logic [DATA_WIDTH-1:0] mem_rd; + + always_ff @(posedge clk) + if (do_rd) + mem_rd <= mem[rptr[AWIDTH-1:0] + 1'b1]; + + // ----------------------------------------------------------------- + // Fill counter and empty flag + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + fifo_empty <= 1'b1; + fill_cnt <= '0; + end else begin + case ({do_wr, do_rd}) + 2'b10: begin + fill_cnt <= fill_cnt + 1'b1; + fifo_empty <= 1'b0; + end + 2'b01: begin + fill_cnt <= fill_cnt - 1'b1; + fifo_empty <= (fill_cnt <= 1); + end + default: ; + endcase + end + + // Full flag — MSB of fill counter + assign fifo_full = fill_cnt[AWIDTH]; + + // ----------------------------------------------------------------- + // FWFT bypass — forward write data directly when FIFO is empty + // ----------------------------------------------------------------- + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + fwd_valid <= 1'b0; + else if (fifo_empty || rd_en) begin + if (!wr_en) + fwd_valid <= 1'b0; + else if (fifo_empty || (rd_en && fill_cnt == 1)) + fwd_valid <= 1'b1; + else + fwd_valid <= 1'b0; + end + + always_ff @(posedge clk) + if (fifo_empty || rd_en) + fwd_data <= data_i; + + // Output mux — bypass path has priority + assign data_o = fwd_valid ? fwd_data : mem_rd; + +endmodule : snix_sync_fifo