Skip to content

Commit

Permalink
Added vector operands file and VPU interface
Browse files Browse the repository at this point in the history
  • Loading branch information
bkam002 committed Feb 13, 2025
1 parent b2ad2e5 commit 1d454d4
Show file tree
Hide file tree
Showing 4 changed files with 357 additions and 0 deletions.
4 changes: 4 additions & 0 deletions hw/rtl/VX_config.vh
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
`define VLEN 256
`endif

`ifndef ELEN
`define ELEN 64
`endif

`ifndef NUM_CLUSTERS
`define NUM_CLUSTERS 1
`endif
Expand Down
3 changes: 3 additions & 0 deletions hw/rtl/VX_define.vh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@

`define NR_BITS `CLOG2(`NUM_REGS)

`define ELE_PER_VREG (VLEN / 8)
`define ELE_PER_VREG_BITS `CLOG2(`ELE_PER_VREG)

`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))

Expand Down
300 changes: 300 additions & 0 deletions hw/rtl/core/VX_voperands.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

`include "VX_define.vh"

// reset all GPRs in debug mode
`ifdef SIMULATION
`ifndef NDEBUG
`define GPR_RESET
`endif
`endif

module VX_voperands import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_BANKS = 4,
parameter OUT_BUF = 3
) (
input wire clk,
input wire reset,

`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] perf_stalls,
`endif

VX_writeback_if.slave writeback_if,
VX_vpu_if.slave vpu_if,
VX_operands_if.master operands_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam NUM_SRC_OPDS = 3;
localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
localparam REGS_DATAW = `XLEN * `NUM_THREADS;
localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
localparam XLEN_SIZE = `XLEN / 8;
localparam BYTEENW = `NUM_THREADS * XLEN_SIZE;

`UNUSED_VAR (writeback_if.data.sop)

wire [NUM_SRC_OPDS-1:0] src_valid;
wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in;
wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in;
wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx;

wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready;
wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2;
wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1;
wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2;
wire [NUM_BANKS-1:0][`ELE_PER_VREG_BITS-1:0] gpr_rd_lane_id_st1;

wire pipe_ready_in;
wire pipe_valid_st1, pipe_ready_st1;
wire pipe_valid_st2, pipe_ready_st2;
wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2;

reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2;

reg [NUM_SRC_OPDS-1:0] data_fetched_st1;

reg has_collision_n;
wire has_collision_st1;

wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds;
assign src_opds = {vpu_if.data.rs3, vpu_if.data.rs2, vpu_if.data.rs1};

for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in
if (ISSUE_WIS != 0) begin : g_wis
assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], vpu_if.data.wis};
end else begin : g_no_wis
assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS];
end
end

for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx
if (NUM_BANKS != 1) begin : g_multibanks
assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0];
end else begin : g_singlebank
assign req_bank_idx[i] = '0;
end
end

for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid
assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i];
end

assign req_valid_in = {NUM_SRC_OPDS{vpu_if.valid}} & src_valid;

VX_stream_xbar #(
.NUM_INPUTS (NUM_SRC_OPDS),
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (PER_BANK_ADDRW),
.ARBITER ("P"), // use priority arbiter
.OUT_BUF (0) // no output buffering
) req_xbar (
.clk (clk),
.reset (reset),
`UNUSED_PIN(collisions),
.valid_in (req_valid_in),
.data_in (req_data_in),
.sel_in (req_bank_idx),
.ready_in (req_ready_in),
.valid_out (gpr_rd_valid),
.data_out (gpr_rd_addr),
.sel_out (gpr_rd_req_idx),
.ready_out (gpr_rd_ready)
);

assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}};

always @(*) begin
has_collision_n = 0;
for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin
for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin
has_collision_n |= src_valid[i]
&& src_valid[j+i]
&& (req_bank_idx[i] == req_bank_idx[j+i]);
end
end
end

wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in;

assign pipe_data = {
vpu_if.data.wis,
vpu_if.data.tmask,
vpu_if.data.PC,
vpu_if.data.wb,
vpu_if.data.ex_type,
vpu_if.data.op_type,
vpu_if.data.op_args,
vpu_if.data.rd,
vpu_if.data.uuid
};

assign vpu_if.ready = pipe_ready_in && ~has_collision_n;

wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1;
wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2;

VX_pipe_buffer #(
.DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH) + NUM_BANKS * `ELE_PER_VREG_BITS)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.valid_in (vpu_if.valid),
.ready_in (pipe_ready_in),
.data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx, vpu_if.data.lane_id}),
.data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1, gpr_rd_lane_id_st1}),
.valid_out(pipe_valid_st1),
.ready_out(pipe_ready_st1)
);

always @(posedge clk) begin
if (reset || vpu_if.ready) begin
data_fetched_st1 <= 0;
end else begin
data_fetched_st1 <= data_fetched_st1 | req_fire_in;
end
end

wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;

VX_pipe_buffer #(
.DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH) + META_DATAW)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.valid_in (pipe_valid2_st1),
.ready_in (pipe_ready_st1),
.data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, pipe_data_st1}),
.data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, pipe_data_st2}),
.valid_out(pipe_valid_st2),
.ready_out(pipe_ready_st2)
);

always @(*) begin
src_data_m_st2 = src_data_st2;
for (integer b = 0; b < NUM_BANKS; ++b) begin
if (gpr_rd_valid_st2[b]) begin
src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b];
end
end
end

always @(posedge clk) begin
if (reset || pipe_fire_st2) begin
src_data_st2 <= 0;
end else begin
src_data_st2 <= src_data_m_st2;
end
end

VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (pipe_valid_st2),
.ready_in (pipe_ready_st2),
.data_in ({pipe_data_st2, src_data_m_st2}),
.data_out ({
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.wb,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.rd,
operands_if.data.uuid,
operands_if.data.rs3_data,
operands_if.data.rs2_data,
operands_if.data.rs1_data
}),
.valid_out (operands_if.valid),
.ready_out (operands_if.ready)
);

wire [PER_BANK_ADDRW-1:0] gpr_wr_addr;
if (ISSUE_WIS != 0) begin : g_gpr_wr_addr
assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis};
end else begin : g_gpr_wr_addr_no_wis
assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS];
end

wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx;
if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx
assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0];
end else begin : g_gpr_wr_bank_idx_0
assign gpr_wr_bank_idx = '0;
end

for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams
wire gpr_wr_enabled;
if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks
assign gpr_wr_enabled = writeback_if.valid
&& (gpr_wr_bank_idx == BANK_SEL_BITS'(b));
end else begin : g_gpr_wr_enabled
assign gpr_wr_enabled = writeback_if.valid;
end

wire [BYTEENW-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
end

VX_dp_ram #(
.DATAW (REGS_DATAW),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS * `ELE_PER_VREG),
.WRENW (BYTEENW),
`ifdef GPR_RESET
.RESET_RAM (1),
`endif
.OUT_REG (1),
.RDW_MODE ("R")
) vrf_ram (
.clk (clk),
.reset (reset),
.read (pipe_fire_st1),
.wren (wren),
.write (gpr_wr_enabled),
.waddr (gpr_wr_addr),
.wdata (writeback_if.data.data),
.raddr ({gpr_rd_lane_id_st1, gpr_rd_addr_st1[b]}),
.rdata (gpr_rd_data_st2[b])
);
end

`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] collisions_r;
always @(posedge clk) begin
if (reset) begin
collisions_r <= '0;
end else begin
collisions_r <= collisions_r + `PERF_CTR_BITS'(vpu_if.valid && pipe_ready_in && has_collision_n);
end
end
assign perf_stalls = collisions_r;
`endif

endmodule
50 changes: 50 additions & 0 deletions hw/rtl/interfaces/VX_vpu_if.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

`include "VX_define.vh"

interface VX_vpu_if import VX_gpu_pkg::*; ();

typedef struct packed {
logic [`UUID_WIDTH-1:0] uuid;
logic [ISSUE_WIS_W-1:0] wis;
logic [`NUM_THREADS-1:0] tmask;
logic [`PC_BITS-1:0] PC;
logic [`EX_BITS-1:0] ex_type;
logic [`INST_OP_BITS-1:0] op_type;
op_args_t op_args;
logic wb;
logic [`NR_BITS-1:0] rd;
logic [`NR_BITS-1:0] rs1;
logic [`NR_BITS-1:0] rs2;
logic [`NR_BITS-1:0] rs3;
logic [`ELE_PER_VREG_BITS-1:0] lane_id;
} data_t;

logic valid;
data_t data;
logic ready;

modport master (
output valid,
output data,
input ready
);

modport slave (
input valid,
input data,
output ready
);

endinterface

0 comments on commit 1d454d4

Please sign in to comment.