381 lines
16 KiB
Systemverilog
381 lines
16 KiB
Systemverilog
// ============================================================================
|
|
// snix_axil_cdma_mux.sv
|
|
// Multi-Channel AXI-Lite MUX Wrapper for snix_axi_mm2mm engine
|
|
//
|
|
// Register Map (per channel, offset 0x40):
|
|
// 0x00: READ_ADDR (Source Address)
|
|
// 0x04: WRITE_ADDR (Destination Address)
|
|
// 0x08: LENGTH (Transfer length in bytes)
|
|
// 0x0C: TAG (User tag, purely for software tracking)
|
|
// 0x10: CTRL (Write 1 to bit 0 to trigger)
|
|
// [5:3] = AXI AxSIZE (0=1B, 1=2B, 2=4B, 3=8B...)
|
|
// [13:6] = AXI AxLEN (0=1 beat, 15=16 beats...)
|
|
// 0x14: STATUS (Bit 0: Busy RO, Bit 1: Done W1C)
|
|
// ============================================================================
|
|
`timescale 1ns / 1ps
|
|
|
|
module snix_axil_cdma_mux #(
|
|
parameter int ADDR_WIDTH = 32,
|
|
parameter int DATA_WIDTH = 32, // Matches engine default
|
|
parameter int AXIL_ADDR_WIDTH = 32,
|
|
parameter int AXIL_DATA_WIDTH = 32,
|
|
parameter int ID_WIDTH = 4,
|
|
parameter int USER_WIDTH = 1,
|
|
parameter int PORTS = 8,
|
|
parameter int FIFO_DEPTH = 16
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
|
|
// ==========================================
|
|
// AXI-Lite Slave Interface (CPU CSR Access)
|
|
// ==========================================
|
|
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_awaddr,
|
|
input logic s_axil_awvalid,
|
|
output logic s_axil_awready,
|
|
input logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata,
|
|
input logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb,
|
|
input logic s_axil_wvalid,
|
|
output logic s_axil_wready,
|
|
output logic [1:0] s_axil_bresp,
|
|
output logic s_axil_bvalid,
|
|
input logic s_axil_bready,
|
|
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_araddr,
|
|
input logic s_axil_arvalid,
|
|
output logic s_axil_arready,
|
|
output logic [AXIL_DATA_WIDTH-1:0] s_axil_rdata,
|
|
output logic [1:0] s_axil_rresp,
|
|
output logic s_axil_rvalid,
|
|
input logic s_axil_rready,
|
|
|
|
// ==========================================
|
|
// AXI4 Master Interface (To Crossbar/Memory)
|
|
// ==========================================
|
|
output logic [ID_WIDTH-1:0] mm2mm_awid,
|
|
output logic [ADDR_WIDTH-1:0] mm2mm_awaddr,
|
|
output logic [7:0] mm2mm_awlen,
|
|
output logic [2:0] mm2mm_awsize,
|
|
output logic [1:0] mm2mm_awburst,
|
|
output logic mm2mm_awlock,
|
|
output logic [3:0] mm2mm_awcache,
|
|
output logic [2:0] mm2mm_awprot,
|
|
output logic [3:0] mm2mm_awqos,
|
|
output logic [USER_WIDTH-1:0] mm2mm_awuser,
|
|
output logic mm2mm_awvalid,
|
|
input logic mm2mm_awready,
|
|
output logic [DATA_WIDTH-1:0] mm2mm_wdata,
|
|
output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb,
|
|
output logic mm2mm_wlast,
|
|
output logic [USER_WIDTH-1:0] mm2mm_wuser,
|
|
output logic mm2mm_wvalid,
|
|
input logic mm2mm_wready,
|
|
input logic [ID_WIDTH-1:0] mm2mm_bid,
|
|
input logic [1:0] mm2mm_bresp,
|
|
input logic [USER_WIDTH-1:0] mm2mm_buser,
|
|
input logic mm2mm_bvalid,
|
|
output logic mm2mm_bready,
|
|
output logic [ID_WIDTH-1:0] mm2mm_arid,
|
|
output logic [ADDR_WIDTH-1:0] mm2mm_araddr,
|
|
output logic [7:0] mm2mm_arlen,
|
|
output logic [2:0] mm2mm_arsize,
|
|
output logic [1:0] mm2mm_arburst,
|
|
output logic mm2mm_arlock,
|
|
output logic [3:0] mm2mm_arcache,
|
|
output logic [2:0] mm2mm_arprot,
|
|
output logic [3:0] mm2mm_arqos,
|
|
output logic [USER_WIDTH-1:0] mm2mm_aruser,
|
|
output logic mm2mm_arvalid,
|
|
input logic mm2mm_arready,
|
|
input logic [ID_WIDTH-1:0] mm2mm_rid,
|
|
input logic [DATA_WIDTH-1:0] mm2mm_rdata,
|
|
input logic [1:0] mm2mm_rresp,
|
|
input logic mm2mm_rlast,
|
|
input logic [USER_WIDTH-1:0] mm2mm_ruser,
|
|
input logic mm2mm_rvalid,
|
|
output logic mm2mm_rready,
|
|
|
|
// Global Interrupt (OR'd from all channels)
|
|
output logic dma_finish
|
|
);
|
|
|
|
// ==========================================
|
|
// Local Parameters & Utilities
|
|
// ==========================================
|
|
localparam int CH_BITS = $clog2(PORTS);
|
|
|
|
// Function to safely apply WSTRB to 32-bit registers
|
|
function automatic logic [31:0] apply_wstrb(
|
|
input logic [31:0] old_val,
|
|
input logic [31:0] new_val,
|
|
input logic [3:0] wstrb
|
|
);
|
|
logic [31:0] res;
|
|
res[7:0] = wstrb[0] ? new_val[7:0] : old_val[7:0];
|
|
res[15:8] = wstrb[1] ? new_val[15:8] : old_val[15:8];
|
|
res[23:16] = wstrb[2] ? new_val[23:16] : old_val[23:16];
|
|
res[31:24] = wstrb[3] ? new_val[31:24] : old_val[31:24];
|
|
return res;
|
|
endfunction
|
|
|
|
// ==========================================
|
|
// Internal Registers (Per Channel)
|
|
// ==========================================
|
|
logic [ADDR_WIDTH-1:0] ch_src_addr [PORTS];
|
|
logic [ADDR_WIDTH-1:0] ch_dst_addr [PORTS];
|
|
logic [31:0] ch_len [PORTS];
|
|
logic [31:0] ch_tag [PORTS];
|
|
logic [31:0] ch_ctrl [PORTS];
|
|
|
|
logic [PORTS-1:0] ch_req; // Pending requests (Busy)
|
|
logic [PORTS-1:0] ch_done; // Completion flags
|
|
|
|
logic [PORTS-1:0] arb_set_done; // From Arbiter to CSR
|
|
|
|
// ==========================================
|
|
// Address Decoding (0x40 offset per channel)
|
|
// ==========================================
|
|
wire [CH_BITS-1:0] wr_ch = s_axil_awaddr[6 +: CH_BITS];
|
|
wire [5:0] wr_reg = s_axil_awaddr[5:0];
|
|
wire [CH_BITS-1:0] rd_ch = s_axil_araddr[6 +: CH_BITS];
|
|
wire [5:0] rd_reg = s_axil_araddr[5:0];
|
|
|
|
// ==========================================
|
|
// AXI-Lite Slave Logic (Robust Backpressure)
|
|
// ==========================================
|
|
assign s_axil_bresp = 2'b00;
|
|
assign s_axil_rresp = 2'b00;
|
|
|
|
// Write Path Handshake
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
s_axil_awready <= 1'b0;
|
|
s_axil_wready <= 1'b0;
|
|
end else begin
|
|
if (s_axil_awvalid && s_axil_wvalid && !s_axil_awready && (!s_axil_bvalid || s_axil_bready)) begin
|
|
s_axil_awready <= 1'b1;
|
|
s_axil_wready <= 1'b1;
|
|
end else begin
|
|
s_axil_awready <= 1'b0;
|
|
s_axil_wready <= 1'b0;
|
|
end
|
|
end
|
|
end
|
|
|
|
wire do_write = s_axil_awready && s_axil_awvalid && s_axil_wready && s_axil_wvalid;
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
s_axil_bvalid <= 1'b0;
|
|
end else begin
|
|
if (do_write) begin
|
|
s_axil_bvalid <= 1'b1;
|
|
end else if (s_axil_bready && s_axil_bvalid) begin
|
|
s_axil_bvalid <= 1'b0;
|
|
end
|
|
end
|
|
end
|
|
|
|
// Read Path Handshake
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
s_axil_arready <= 1'b0;
|
|
end else begin
|
|
if (s_axil_arvalid && !s_axil_arready && (!s_axil_rvalid || s_axil_rready)) begin
|
|
s_axil_arready <= 1'b1;
|
|
end else begin
|
|
s_axil_arready <= 1'b0;
|
|
end
|
|
end
|
|
end
|
|
|
|
wire do_read = s_axil_arready && s_axil_arvalid;
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
s_axil_rvalid <= 1'b0;
|
|
s_axil_rdata <= '0;
|
|
end else begin
|
|
if (do_read) begin
|
|
s_axil_rvalid <= 1'b1;
|
|
if (rd_ch < PORTS) begin
|
|
case (rd_reg)
|
|
6'h00: s_axil_rdata <= ch_src_addr[rd_ch];
|
|
6'h04: s_axil_rdata <= ch_dst_addr[rd_ch];
|
|
6'h08: s_axil_rdata <= ch_len[rd_ch];
|
|
6'h0C: s_axil_rdata <= ch_tag[rd_ch];
|
|
6'h10: s_axil_rdata <= ch_ctrl[rd_ch];
|
|
6'h14: s_axil_rdata <= {30'd0, ch_done[rd_ch], ch_req[rd_ch]};
|
|
default: s_axil_rdata <= 32'd0;
|
|
endcase
|
|
end else begin
|
|
s_axil_rdata <= 32'd0; // Out of bounds
|
|
end
|
|
end else if (s_axil_rready && s_axil_rvalid) begin
|
|
s_axil_rvalid <= 1'b0;
|
|
end
|
|
end
|
|
end
|
|
|
|
// ==========================================
|
|
// Register File Write Logic
|
|
// ==========================================
|
|
assign dma_finish = |ch_done;
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
ch_req <= '0;
|
|
ch_done <= '0;
|
|
for (int i=0; i<PORTS; i++) begin
|
|
ch_src_addr[i] <= '0; ch_dst_addr[i] <= '0;
|
|
ch_len[i] <= '0; ch_tag[i] <= '0; ch_ctrl[i] <= '0;
|
|
end
|
|
end else begin
|
|
// 1. Process Hardware Status Updates (Highest Priority for Done flag)
|
|
for (int i=0; i<PORTS; i++) begin
|
|
if (arb_set_done[i]) begin
|
|
ch_req[i] <= 1'b0;
|
|
ch_done[i] <= 1'b1;
|
|
end
|
|
end
|
|
|
|
// 2. Process CPU Writes
|
|
if (do_write && wr_ch < PORTS) begin
|
|
case (wr_reg)
|
|
6'h00: if (!ch_req[wr_ch]) ch_src_addr[wr_ch] <= apply_wstrb(ch_src_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
|
|
6'h04: if (!ch_req[wr_ch]) ch_dst_addr[wr_ch] <= apply_wstrb(ch_dst_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
|
|
6'h08: if (!ch_req[wr_ch]) ch_len[wr_ch] <= apply_wstrb(ch_len[wr_ch], s_axil_wdata, s_axil_wstrb);
|
|
6'h0C: if (!ch_req[wr_ch]) ch_tag[wr_ch] <= apply_wstrb(ch_tag[wr_ch], s_axil_wdata, s_axil_wstrb);
|
|
6'h10: begin
|
|
ch_ctrl[wr_ch] <= apply_wstrb(ch_ctrl[wr_ch], s_axil_wdata, s_axil_wstrb);
|
|
// Trigger Bit Processing
|
|
if (s_axil_wstrb[0] && s_axil_wdata[0]) begin
|
|
ch_req[wr_ch] <= 1'b1;
|
|
// Clean up done bit automatically upon new start
|
|
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
|
|
end
|
|
end
|
|
6'h14: begin
|
|
// Software W1C for Done flag (Bit 1)
|
|
// ONLY clear if hardware is not setting it in the exact same cycle
|
|
if (s_axil_wstrb[0] && s_axil_wdata[1]) begin
|
|
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
|
|
end
|
|
end
|
|
endcase
|
|
end
|
|
end
|
|
end
|
|
|
|
// ==========================================
|
|
// Round-Robin Arbiter & Engine Driver
|
|
// ==========================================
|
|
typedef enum logic [1:0] {IDLE, RUN} state_t;
|
|
state_t state;
|
|
|
|
logic [CH_BITS-1:0] cur_ch;
|
|
logic [CH_BITS-1:0] rr_ptr;
|
|
|
|
// Interfaces to Engine
|
|
logic engine_start;
|
|
logic [ADDR_WIDTH-1:0] engine_src;
|
|
logic [ADDR_WIDTH-1:0] engine_dst;
|
|
logic [31:0] engine_bytes;
|
|
logic [7:0] engine_len;
|
|
logic [2:0] engine_size;
|
|
logic engine_done;
|
|
|
|
// Dynamic routing to the engine based on current active channel
|
|
assign engine_src = ch_src_addr[cur_ch];
|
|
assign engine_dst = ch_dst_addr[cur_ch];
|
|
assign engine_bytes = ch_len[cur_ch];
|
|
assign engine_len = ch_ctrl[cur_ch][13:6];
|
|
assign engine_size = ch_ctrl[cur_ch][5:3];
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
state <= IDLE;
|
|
engine_start <= 1'b0;
|
|
rr_ptr <= '0;
|
|
cur_ch <= '0;
|
|
arb_set_done <= '0;
|
|
end else begin
|
|
arb_set_done <= '0;
|
|
engine_start <= 1'b0;
|
|
|
|
case (state)
|
|
IDLE: begin
|
|
for (int i = 0; i < PORTS; i++) begin
|
|
logic [CH_BITS:0] check_ch_ext;
|
|
logic [CH_BITS-1:0] check_ch;
|
|
|
|
// Calculate next channel safely avoiding modulo operators in loop
|
|
check_ch_ext = {1'b0, rr_ptr} + i[CH_BITS:0];
|
|
check_ch = (check_ch_ext >= PORTS) ? (check_ch_ext - PORTS) : check_ch_ext[CH_BITS-1:0];
|
|
|
|
if (ch_req[check_ch] && !arb_set_done[check_ch]) begin
|
|
cur_ch <= check_ch;
|
|
rr_ptr <= (check_ch == (PORTS - 1)) ? '0 : (check_ch + 1);
|
|
engine_start <= 1'b1;
|
|
state <= RUN;
|
|
break;
|
|
end
|
|
end
|
|
end
|
|
|
|
RUN: begin
|
|
if (engine_done) begin
|
|
arb_set_done[cur_ch] <= 1'b1;
|
|
state <= IDLE;
|
|
end
|
|
// Optional: Add a watchdog timeout counter here if dealing with untrusted PCIe/AXI endpoints
|
|
end
|
|
endcase
|
|
end
|
|
end
|
|
|
|
// ==========================================
|
|
// Instantiate The Original Core Engine
|
|
// ==========================================
|
|
snix_axi_mm2mm #(
|
|
.ADDR_WIDTH(ADDR_WIDTH),
|
|
.DATA_WIDTH(DATA_WIDTH),
|
|
.ID_WIDTH (ID_WIDTH),
|
|
.USER_WIDTH(USER_WIDTH),
|
|
.FIFO_DEPTH(FIFO_DEPTH)
|
|
) u_core_engine (
|
|
.clk (clk),
|
|
.rst_n (rst_n),
|
|
.ctrl_start (engine_start),
|
|
.ctrl_stop (1'b0), // Tied off; can be wired if global abort is needed
|
|
.ctrl_src_addr (engine_src),
|
|
.ctrl_dst_addr (engine_dst),
|
|
.ctrl_len (engine_len),
|
|
.ctrl_size (engine_size),
|
|
.ctrl_transfer_len (engine_bytes),
|
|
.ctrl_done (engine_done),
|
|
// AXI4 Port Connections
|
|
.mm2mm_awid (mm2mm_awid), .mm2mm_awaddr (mm2mm_awaddr),
|
|
.mm2mm_awlen (mm2mm_awlen), .mm2mm_awsize (mm2mm_awsize),
|
|
.mm2mm_awburst(mm2mm_awburst),.mm2mm_awlock (mm2mm_awlock),
|
|
.mm2mm_awcache(mm2mm_awcache),.mm2mm_awprot (mm2mm_awprot),
|
|
.mm2mm_awqos (mm2mm_awqos), .mm2mm_awuser (mm2mm_awuser),
|
|
.mm2mm_awvalid(mm2mm_awvalid),.mm2mm_awready(mm2mm_awready),
|
|
.mm2mm_wdata (mm2mm_wdata), .mm2mm_wstrb (mm2mm_wstrb),
|
|
.mm2mm_wlast (mm2mm_wlast), .mm2mm_wuser (mm2mm_wuser),
|
|
.mm2mm_wvalid (mm2mm_wvalid), .mm2mm_wready (mm2mm_wready),
|
|
.mm2mm_bid (mm2mm_bid), .mm2mm_bresp (mm2mm_bresp),
|
|
.mm2mm_buser (mm2mm_buser), .mm2mm_bvalid (mm2mm_bvalid),
|
|
.mm2mm_bready (mm2mm_bready),
|
|
.mm2mm_arid (mm2mm_arid), .mm2mm_araddr (mm2mm_araddr),
|
|
.mm2mm_arlen (mm2mm_arlen), .mm2mm_arsize (mm2mm_arsize),
|
|
.mm2mm_arburst(mm2mm_arburst),.mm2mm_arlock (mm2mm_arlock),
|
|
.mm2mm_arcache(mm2mm_arcache),.mm2mm_arprot (mm2mm_arprot),
|
|
.mm2mm_arqos (mm2mm_arqos), .mm2mm_aruser (mm2mm_aruser),
|
|
.mm2mm_arvalid(mm2mm_arvalid),.mm2mm_arready(mm2mm_arready),
|
|
.mm2mm_rid (mm2mm_rid), .mm2mm_rdata (mm2mm_rdata),
|
|
.mm2mm_rresp (mm2mm_rresp), .mm2mm_rlast (mm2mm_rlast),
|
|
.mm2mm_ruser (mm2mm_ruser), .mm2mm_rvalid (mm2mm_rvalid),
|
|
.mm2mm_rready (mm2mm_rready)
|
|
);
|
|
|
|
endmodule |