五级流水线CPU之低功耗设计 (二) :Clock Gating(门控)
2015-03-02 17:33
459 查看
五级流水线整体图示:
Verilog代码综合得到的总框图:
某些指令执行的流水线级数:
如上图所见,很多指令存在着某些stage保持原数据不必刷新寄存器数据的情况(NOP),而刷新计数器数据无疑会产生动态功率损耗,之前的CPU基本设计中无论数据改不改变都刷新寄存器数据,造成了不必要的动态功率损耗。基于此动机,我们可以尝试将某些原本不需要刷新数据但实际刷新了数据的情况消除,而实现消除的方法是:时钟门控(clock gating)。门控基本原理就是通过关闭芯片上暂时用不到的功能和它的时钟,从而实现节省电流消耗的目的。一般而言,门控时一般选择那些不经常使用或者变化的寄存器,观察可以发现除了LOAD指令和STORE指令,流水线的第四级DATA_MEMORY的三个数据d_addr,d_we,d_dataout不必刷新,因此可以通过门控来减少功率损耗。
代码实现如下:
`define idle 1'b0
`define exec 1'b1
`define NOP 5'b00000
`define HALT 5'b00001
`define LOAD 5'b00010
`define STORE 5'b00011
`define LDIH 5'b00100
`define ADD 5'b00101
`define ADDI 5'b00110
`define ADDC 5'b00111
`define SUB 5'b01000
`define SUBI 5'b01001
`define SUBC 5'b01010
`define CMP 5'b01011
`define AND 5'b01100
`define OR 5'b01101
`define XOR 5'b01110
`define SLL 5'b01111
`define SRL 5'b10000
`define SLA 5'b10001
`define SRA 5'b10010
`define JUMP 5'b10011
`define JMPR 5'b10100
`define BZ 5'b10101
`define BNZ 5'b10110
`define BN 5'b10111
`define BNN 5'b11000
`define BC 5'b11001
`define BNC 5'b11010
module clock_gating(
input clk,reset,enable,start,
input [15:0] d_datain,i_datain,
output wire [7:0] i_addr,
output reg [7:0] d_addr,pc,
output reg [15:0] d_dataout,
output reg d_we
);
reg state,nextstate;
reg [15:0] gr[0:7];
reg [15:0] id_ir,ex_ir,mem_ir,reg_A,reg_B,reg_C,ALUo,smdr,smdr1,reg_C1,wb_ir;
reg dw,zf,nf,cf;
assign i_addr = pc;
//************* CPU control *************//
always @(posedge clk or negedge reset)
begin
if (!reset)
state <= `idle;
else
state <= nextstate;
end
always @(*)
begin
case (state)
`idle : begin
if ((enable == 1'b1) && (start == 1'b1))
nextstate <= `exec;
else
nextstate <= `idle;
end
`exec : begin
if ((enable == 1'b0) || (wb_ir[15:11] == `HALT))//HALT
nextstate <= `idle;
else
nextstate <= `exec;
end
endcase
end
//************* IF : Instruction fetch *************//
always @(posedge clk or negedge reset)
begin
if (!reset)
begin
id_ir <= 16'b0000_0000_0000_0000;
pc <= 8'b0000_0000;
end
else if (state ==`exec)
begin
if( ((mem_ir[15:11] == `BZ) && (zf == 1'b1)) || ((mem_ir[15:11] == `BN) && (nf == 1'b1))
|| ((mem_ir[15:11] == `BNZ) && (zf == 1'b0)) || ((mem_ir[15:11] == `BNN) && (nf == 1'b0))
|| ((mem_ir[15:11] == `BC) && (cf == 1'b1)) || ((mem_ir[15:11] == `BNC) && (cf == 1'b0))
|| (mem_ir[15:11] == `JMPR) )
begin
pc <= reg_C[7:0];
id_ir <= i_datain;
end
//JUMP指令
else if(i_datain[15:11] == `JUMP)
begin
pc <= i_datain[7:0];
id_ir <= i_datain;
end
//********上一条指令为LOAD与当前指令的寄存器相同时会引起数据冒险与阻塞stall:引入气泡(延迟一个周期)"*******//
else if((id_ir[15:11] == `LOAD) && (i_datain[15:11] != `NOP) && (i_datain[15:11] != `HALT)
&& (i_datain[15:11] != `LOAD) && (i_datain[15:11] != `JUMP) )
begin
//*********** 当前指令的r1与 上一条指令中r1相同,发生冲突的前提是:上一条的指令使用到r1************//
//***** 使用到r1的指令有 STORE、LDIH、ADDI、SUBI、JMPR、BZ、BNZ、BN、BNN、BC、BNC *******//
if((id_ir[10:8] == i_datain[10:8]) && ((i_datain[15:11] == `STORE) || (i_datain[15:11] == `LDIH)
|| (i_datain[15:11] == `ADDI) || (i_datain[15:11] == `SUBI) || (i_datain[15:11] == `JMPR)
|| (i_datain[15:11] == `BZ) || (i_datain[15:11] == `BNZ) || (i_datain[15:11] == `BN)
|| (i_datain[15:11] == `BNN) || (i_datain[15:11] == `BC) || (i_datain[15:11] == `BNC) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
//************* r2 **************//
else if((id_ir[10:8] == i_datain[6:4]) && ((i_datain[15:11] == `STORE) || (i_datain[15:11] == `ADD)
||(i_datain[15:11] == `ADDC) || (i_datain[15:11] == `SUB) || (i_datain[15:11] == `SUBC)
||(i_datain[15:11] == `CMP) || (i_datain[15:11] == `AND) || (i_datain[15:11] == `OR)
||(i_datain[15:11] == `XOR) || (i_datain[15:11] == `SLL) || (i_datain[15:11] == `SRL)
||(i_datain[15:11] == `SLA) || (i_datain[15:11] == `SRA) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
//************* r3 **************//
else if((id_ir[10:8] == i_datain[2:0]) && ((i_datain[15:11] == `ADD) || (i_datain[15:11] == `ADDC)
||(i_datain[15:11] == `SUB) || (i_datain[15:11] == `SUBC) || (i_datain[15:11] == `CMP)
|| (i_datain[15:11] == `AND) || (i_datain[15:11] == `OR) ||(i_datain[15:11] == `XOR) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
end
else
begin
pc <= pc + 8'b1;
id_ir <= i_datain;
end
end
else if(state == `idle)
pc <= pc;
end
//************* ID : Instruction Decode *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
ex_ir <= 16'b0000_0000_0000_0000;
reg_A <= 16'b0000_0000_0000_0000;
reg_B <= 16'b0000_0000_0000_0000;
smdr <= 16'b0000_0000_0000_0000;
end
else if (state == `exec)
begin
ex_ir <= id_ir;
//------ reg_A 的赋值(包含Hazard处理) ------//
if ( (id_ir[15:11] == `LDIH) || (id_ir[15:11] == `ADDI) || (id_ir[15:11] == `BZ) || (id_ir[15:11] == `BNZ)
|| (id_ir[15:11] == `BN) || (id_ir[15:11] == `BNN) || (id_ir[15:11] == `BC) || (id_ir[15:11] == `BNC)
|| (id_ir[15:11] == `JMPR) ) //reg_A 为r1的情况
begin
//******* 一阶数据相关 (solution : data forwarding) *******//
if((id_ir[10:8] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_A <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[10:8] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_A <= d_datain;
else
reg_A <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[10:8] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[10:8]]; end
if((mem_ir[10:8] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_A <= 0; end
end
else //reg_A 为r2的情况
begin
//******* 一阶数据相关 *******//
if((id_ir[6:4] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_A <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[6:4] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_A <= d_datain;
else
reg_A <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[6:4] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[6:4]]; end
if((mem_ir[15:11] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_A <= 0; end
end
//----- reg_B 的赋值(包含Hazard处理) ------//
if ((id_ir[15:11] == `LOAD) || (id_ir[15:11] == `SLL) || (id_ir[15:11] == `SRL)
|| (id_ir[15:11] == `SLA) || (id_ir[15:11] == `SRA) || id_ir[15:11] == `STORE )
reg_B <= {12'b0000_0000_0000, id_ir[3:0]};//reg_B为val3的情况
else if(id_ir[15:11] == `LDIH)
reg_B <= {id_ir[7:0],8'b0000_0000};//LDIH : r1 <- r1 + {val2,val3,0000_0000}
else if ( (id_ir[15:11] == `ADDI) || (id_ir[15:11] == `SUBI)|| (id_ir[15:11] == `BZ)
|| (id_ir[15:11] == `BNZ) || (id_ir[15:11] == `BN) || (id_ir[15:11] == `BNN) || (id_ir[15:11] == `BC)
|| (id_ir[15:11] == `BNC) || (id_ir[15:11] == `JMPR) )
reg_B <= {8'b0000_0000, id_ir[7:0]};//reg_B 为{val2 + val3}的情况
else //reg_B 为r3的情况
begin
//******* 一阶数据相关 *******//
if((id_ir[2:0] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_B <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[2:0] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_B <= d_datain;
else
reg_B <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[2:0] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_B <= reg_C1; end
else
begin reg_B <= gr[id_ir[2:0]]; end
if((mem_ir[15:11] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_B <= 0; end
end
//------ smdr的赋值(包含Hazard处理) -------//
if (id_ir[15:11] == `STORE)
begin
//******* 一阶数据相关 (solution : data forwarding) *******//
if((id_ir[10:8] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin smdr <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[10:8] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
smdr <= d_datain;
else
smdr <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[10:8] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin smdr <= reg_C1; end
else
begin smdr <= gr[id_ir[10:8]]; end
if((mem_ir[10:8] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin smdr <= 0; end
end
else
smdr <= smdr;
end
end
//***************** clock gating ***************//
reg clock_gating;
always @(posedge clk or negedge reset)
begin
if(!reset)
clock_gating <= 0;
else if( (mem_ir[15:11] == `LOAD) || (mem_ir[15:11] == `STORE) )
clock_gating <= 1'b1;
else
clock_gating <= 0;
end
//************* EX *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
mem_ir <= 0;
zf <= 0;
nf <= 0;
reg_C <= 0;
dw <= 0;
smdr1 <= 0;
end
else if (state == `exec)
begin
mem_ir <= ex_ir;
reg_C <= ALUo;
smdr1 <= smdr;
if ( (ex_ir[15:11] == `CMP) )
begin
if (ALUo == 16'b0000_0000_0000_0000)
zf <= 1'b1;
else begin
zf <= 1'b0;
if (ALUo[15] == 1'b1)
nf <= 1'b1;
else
nf <= 1'b0;
end
end
else begin
nf <= nf;
zf <= zf;
end
//STORE指令
if (ex_ir[15:11] == `STORE)
dw <= 1'b1;
else
dw <= 1'b0;
end
end
//ALUo
reg signed [15:0] A_reg;//算术右移中间变量
always @(reg_A)
A_reg <= reg_A;
always @(*)
begin
case(ex_ir[15:11])
`LOAD : {cf,ALUo} <= reg_A + reg_B;
`STORE : {cf,ALUo} <= reg_A + reg_B;
`LDIH : {cf,ALUo} <= reg_A + reg_B;
`ADD : {cf,ALUo} <= reg_A + reg_B;
`ADDI : {cf,ALUo} <= reg_A + reg_B;
`ADDC : {cf,ALUo} <= reg_A + reg_B + cf;
`SUB : {cf,ALUo} <= reg_A - reg_B;
`SUBI : {cf,ALUo} <= reg_A - reg_B;
`SUBC : {cf,ALUo} <= reg_A - reg_B - cf;
`CMP : {cf,ALUo} <= reg_A - reg_B;// CMP的功能是set CF NF ZF
`AND : {cf,ALUo} <= reg_A & reg_B;
`OR : {cf,ALUo} <= reg_A | reg_B;
`XOR : {cf,ALUo} <= reg_A ^ reg_B;
`SLL : {cf,ALUo} <= reg_A << reg_B;//逻辑左移,低位补0
`SRL : {cf,ALUo} <= reg_A >> reg_B;//逻辑右移,高位补0
`SLA : {cf,ALUo} <= reg_A <<< reg_B;//算术左移等同于逻辑左移,低位补0
`SRA : {cf,ALUo} <= A_reg >>> reg_B;//算术右移,高位补符号位
`JMPR : {cf,ALUo} <= reg_A + reg_B;
`BZ : {cf,ALUo} <= reg_A + reg_B;
`BNZ : {cf,ALUo} <= reg_A + reg_B;
`BN : {cf,ALUo} <= reg_A + reg_B;
`BNN : {cf,ALUo} <= reg_A + reg_B;
`BC : {cf,ALUo} <= reg_A + reg_B;
`BNC : {cf,ALUo} <= reg_A + reg_B;
default:
begin cf <= cf; ALUo <= ALUo; end
endcase
end
//************* MEM *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
wb_ir <= 0;
reg_C1 <= 0;
d_dataout <= 0;
d_we <= 0;
end
else if (state == `exec)
begin
wb_ir <= mem_ir;
//*************** clcok gating **************//
if(clock_gating) begin
d_dataout <= smdr1;
d_we <= dw;
d_addr <= reg_C[7:0];
end
if(mem_ir[15:11] == `LOAD)
reg_C1 <= d_datain;
else
reg_C1 <= reg_C;
end
end
//************* WB *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
gr[0] <= 16'b0000_0000_0000_0000;
gr[1] <= 16'b0000_0000_0000_0000;
gr[2] <= 16'b0000_0000_0000_0000;
gr[3] <= 16'b0000_0000_0000_0000;
gr[4] <= 16'b0000_0000_0000_0000;
gr[5] <= 16'b0000_0000_0000_0000;
gr[6] <= 16'b0000_0000_0000_0000;
gr[7] <= 16'b0000_0000_0000_0000;
gr[wb_ir[10:8]] <= gr[wb_ir[10:8]];
end
else if (state == `exec)
begin
if ( (wb_ir[15:11] == `LOAD) || (wb_ir[15:11] == `LDIH) || (wb_ir[15:11] == `ADD) || (wb_ir[15:11] == `ADDI)
||(wb_ir[15:11] == `ADDC) || (wb_ir[15:11] == `SUB) || (wb_ir[15:11] == `SUBI) || (wb_ir[15:11] == `SUBC)
||(wb_ir[15:11] == `CMP) || (wb_ir[15:11] == `AND) || (wb_ir[15:11] == `OR) || (wb_ir[15:11] == `XOR)
||(wb_ir[15:11] == `SLL) || (wb_ir[15:11] == `SRL) || (wb_ir[15:11] == `SLA) || (wb_ir[15:11] == `SRA) )
gr[wb_ir[10:8]] <= reg_C1;
end
end
endmodule
优化前后的XPOWER测试结果比较:
Basic design:
With clock gating:
Verilog代码综合得到的总框图:
某些指令执行的流水线级数:
如上图所见,很多指令存在着某些stage保持原数据不必刷新寄存器数据的情况(NOP),而刷新计数器数据无疑会产生动态功率损耗,之前的CPU基本设计中无论数据改不改变都刷新寄存器数据,造成了不必要的动态功率损耗。基于此动机,我们可以尝试将某些原本不需要刷新数据但实际刷新了数据的情况消除,而实现消除的方法是:时钟门控(clock gating)。门控基本原理就是通过关闭芯片上暂时用不到的功能和它的时钟,从而实现节省电流消耗的目的。一般而言,门控时一般选择那些不经常使用或者变化的寄存器,观察可以发现除了LOAD指令和STORE指令,流水线的第四级DATA_MEMORY的三个数据d_addr,d_we,d_dataout不必刷新,因此可以通过门控来减少功率损耗。
代码实现如下:
`define idle 1'b0
`define exec 1'b1
`define NOP 5'b00000
`define HALT 5'b00001
`define LOAD 5'b00010
`define STORE 5'b00011
`define LDIH 5'b00100
`define ADD 5'b00101
`define ADDI 5'b00110
`define ADDC 5'b00111
`define SUB 5'b01000
`define SUBI 5'b01001
`define SUBC 5'b01010
`define CMP 5'b01011
`define AND 5'b01100
`define OR 5'b01101
`define XOR 5'b01110
`define SLL 5'b01111
`define SRL 5'b10000
`define SLA 5'b10001
`define SRA 5'b10010
`define JUMP 5'b10011
`define JMPR 5'b10100
`define BZ 5'b10101
`define BNZ 5'b10110
`define BN 5'b10111
`define BNN 5'b11000
`define BC 5'b11001
`define BNC 5'b11010
module clock_gating(
input clk,reset,enable,start,
input [15:0] d_datain,i_datain,
output wire [7:0] i_addr,
output reg [7:0] d_addr,pc,
output reg [15:0] d_dataout,
output reg d_we
);
reg state,nextstate;
reg [15:0] gr[0:7];
reg [15:0] id_ir,ex_ir,mem_ir,reg_A,reg_B,reg_C,ALUo,smdr,smdr1,reg_C1,wb_ir;
reg dw,zf,nf,cf;
assign i_addr = pc;
//************* CPU control *************//
always @(posedge clk or negedge reset)
begin
if (!reset)
state <= `idle;
else
state <= nextstate;
end
always @(*)
begin
case (state)
`idle : begin
if ((enable == 1'b1) && (start == 1'b1))
nextstate <= `exec;
else
nextstate <= `idle;
end
`exec : begin
if ((enable == 1'b0) || (wb_ir[15:11] == `HALT))//HALT
nextstate <= `idle;
else
nextstate <= `exec;
end
endcase
end
//************* IF : Instruction fetch *************//
always @(posedge clk or negedge reset)
begin
if (!reset)
begin
id_ir <= 16'b0000_0000_0000_0000;
pc <= 8'b0000_0000;
end
else if (state ==`exec)
begin
if( ((mem_ir[15:11] == `BZ) && (zf == 1'b1)) || ((mem_ir[15:11] == `BN) && (nf == 1'b1))
|| ((mem_ir[15:11] == `BNZ) && (zf == 1'b0)) || ((mem_ir[15:11] == `BNN) && (nf == 1'b0))
|| ((mem_ir[15:11] == `BC) && (cf == 1'b1)) || ((mem_ir[15:11] == `BNC) && (cf == 1'b0))
|| (mem_ir[15:11] == `JMPR) )
begin
pc <= reg_C[7:0];
id_ir <= i_datain;
end
//JUMP指令
else if(i_datain[15:11] == `JUMP)
begin
pc <= i_datain[7:0];
id_ir <= i_datain;
end
//********上一条指令为LOAD与当前指令的寄存器相同时会引起数据冒险与阻塞stall:引入气泡(延迟一个周期)"*******//
else if((id_ir[15:11] == `LOAD) && (i_datain[15:11] != `NOP) && (i_datain[15:11] != `HALT)
&& (i_datain[15:11] != `LOAD) && (i_datain[15:11] != `JUMP) )
begin
//*********** 当前指令的r1与 上一条指令中r1相同,发生冲突的前提是:上一条的指令使用到r1************//
//***** 使用到r1的指令有 STORE、LDIH、ADDI、SUBI、JMPR、BZ、BNZ、BN、BNN、BC、BNC *******//
if((id_ir[10:8] == i_datain[10:8]) && ((i_datain[15:11] == `STORE) || (i_datain[15:11] == `LDIH)
|| (i_datain[15:11] == `ADDI) || (i_datain[15:11] == `SUBI) || (i_datain[15:11] == `JMPR)
|| (i_datain[15:11] == `BZ) || (i_datain[15:11] == `BNZ) || (i_datain[15:11] == `BN)
|| (i_datain[15:11] == `BNN) || (i_datain[15:11] == `BC) || (i_datain[15:11] == `BNC) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
//************* r2 **************//
else if((id_ir[10:8] == i_datain[6:4]) && ((i_datain[15:11] == `STORE) || (i_datain[15:11] == `ADD)
||(i_datain[15:11] == `ADDC) || (i_datain[15:11] == `SUB) || (i_datain[15:11] == `SUBC)
||(i_datain[15:11] == `CMP) || (i_datain[15:11] == `AND) || (i_datain[15:11] == `OR)
||(i_datain[15:11] == `XOR) || (i_datain[15:11] == `SLL) || (i_datain[15:11] == `SRL)
||(i_datain[15:11] == `SLA) || (i_datain[15:11] == `SRA) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
//************* r3 **************//
else if((id_ir[10:8] == i_datain[2:0]) && ((i_datain[15:11] == `ADD) || (i_datain[15:11] == `ADDC)
||(i_datain[15:11] == `SUB) || (i_datain[15:11] == `SUBC) || (i_datain[15:11] == `CMP)
|| (i_datain[15:11] == `AND) || (i_datain[15:11] == `OR) ||(i_datain[15:11] == `XOR) ))
begin
pc <= pc;
id_ir <= 16'bxxxx_xxxx_xxxx_xxxx;
end
end
else
begin
pc <= pc + 8'b1;
id_ir <= i_datain;
end
end
else if(state == `idle)
pc <= pc;
end
//************* ID : Instruction Decode *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
ex_ir <= 16'b0000_0000_0000_0000;
reg_A <= 16'b0000_0000_0000_0000;
reg_B <= 16'b0000_0000_0000_0000;
smdr <= 16'b0000_0000_0000_0000;
end
else if (state == `exec)
begin
ex_ir <= id_ir;
//------ reg_A 的赋值(包含Hazard处理) ------//
if ( (id_ir[15:11] == `LDIH) || (id_ir[15:11] == `ADDI) || (id_ir[15:11] == `BZ) || (id_ir[15:11] == `BNZ)
|| (id_ir[15:11] == `BN) || (id_ir[15:11] == `BNN) || (id_ir[15:11] == `BC) || (id_ir[15:11] == `BNC)
|| (id_ir[15:11] == `JMPR) ) //reg_A 为r1的情况
begin
//******* 一阶数据相关 (solution : data forwarding) *******//
if((id_ir[10:8] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_A <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[10:8] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_A <= d_datain;
else
reg_A <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[10:8] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[10:8]]; end
if((mem_ir[10:8] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_A <= 0; end
end
else //reg_A 为r2的情况
begin
//******* 一阶数据相关 *******//
if((id_ir[6:4] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_A <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[6:4] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_A <= d_datain;
else
reg_A <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[6:4] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[6:4]]; end
if((mem_ir[15:11] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_A <= 0; end
end
//----- reg_B 的赋值(包含Hazard处理) ------//
if ((id_ir[15:11] == `LOAD) || (id_ir[15:11] == `SLL) || (id_ir[15:11] == `SRL)
|| (id_ir[15:11] == `SLA) || (id_ir[15:11] == `SRA) || id_ir[15:11] == `STORE )
reg_B <= {12'b0000_0000_0000, id_ir[3:0]};//reg_B为val3的情况
else if(id_ir[15:11] == `LDIH)
reg_B <= {id_ir[7:0],8'b0000_0000};//LDIH : r1 <- r1 + {val2,val3,0000_0000}
else if ( (id_ir[15:11] == `ADDI) || (id_ir[15:11] == `SUBI)|| (id_ir[15:11] == `BZ)
|| (id_ir[15:11] == `BNZ) || (id_ir[15:11] == `BN) || (id_ir[15:11] == `BNN) || (id_ir[15:11] == `BC)
|| (id_ir[15:11] == `BNC) || (id_ir[15:11] == `JMPR) )
reg_B <= {8'b0000_0000, id_ir[7:0]};//reg_B 为{val2 + val3}的情况
else //reg_B 为r3的情况
begin
//******* 一阶数据相关 *******//
if((id_ir[2:0] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin reg_B <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[2:0] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
reg_B <= d_datain;
else
reg_B <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[2:0] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin reg_B <= reg_C1; end
else
begin reg_B <= gr[id_ir[2:0]]; end
if((mem_ir[15:11] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin reg_B <= 0; end
end
//------ smdr的赋值(包含Hazard处理) -------//
if (id_ir[15:11] == `STORE)
begin
//******* 一阶数据相关 (solution : data forwarding) *******//
if((id_ir[10:8] == ex_ir[10:8]) && (ex_ir[15:11] != `NOP) && (ex_ir[15:11] != `HALT) && (ex_ir[15:11] != `LOAD)
&& (ex_ir[15:11] != `CMP) && (ex_ir[15:11] != `JUMP) )
begin smdr <= ALUo; end
//******* 二阶数据相关 *******//
else if((id_ir[10:8] == mem_ir[10:8]) && (mem_ir[15:11] != `NOP) && (mem_ir[15:11] != `HALT)
&& (mem_ir[15:11] != `CMP) && (mem_ir[15:11] != `JUMP) )
begin
if(mem_ir[15:11] == `LOAD)
smdr <= d_datain;
else
smdr <= reg_C;
end
//******* 三阶数据相关 *******//
else if((id_ir[10:8] == wb_ir[10:8]) && (wb_ir[15:11] != `NOP) && (wb_ir[15:11] != `HALT)
&& (wb_ir[15:11] != `CMP) && (wb_ir[15:11] != `JUMP) )
begin smdr <= reg_C1; end
else
begin smdr <= gr[id_ir[10:8]]; end
if((mem_ir[10:8] == `JMPR) || ((mem_ir[15:11] == `BZ) && zf == 1'b1)
|| ((mem_ir[15:11] == `BNZ) && zf == 1'b0) || ((mem_ir[15:11] == `BN) && nf == 1'b1)
|| ((mem_ir[15:11] == `BNN) && nf == 1'b0) || ((mem_ir[15:11] == `BC) && cf == 1'b1)
|| ((mem_ir[15:11] == `BNC) && cf == 1'b0) )
begin smdr <= 0; end
end
else
smdr <= smdr;
end
end
//***************** clock gating ***************//
reg clock_gating;
always @(posedge clk or negedge reset)
begin
if(!reset)
clock_gating <= 0;
else if( (mem_ir[15:11] == `LOAD) || (mem_ir[15:11] == `STORE) )
clock_gating <= 1'b1;
else
clock_gating <= 0;
end
//************* EX *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
mem_ir <= 0;
zf <= 0;
nf <= 0;
reg_C <= 0;
dw <= 0;
smdr1 <= 0;
end
else if (state == `exec)
begin
mem_ir <= ex_ir;
reg_C <= ALUo;
smdr1 <= smdr;
if ( (ex_ir[15:11] == `CMP) )
begin
if (ALUo == 16'b0000_0000_0000_0000)
zf <= 1'b1;
else begin
zf <= 1'b0;
if (ALUo[15] == 1'b1)
nf <= 1'b1;
else
nf <= 1'b0;
end
end
else begin
nf <= nf;
zf <= zf;
end
//STORE指令
if (ex_ir[15:11] == `STORE)
dw <= 1'b1;
else
dw <= 1'b0;
end
end
//ALUo
reg signed [15:0] A_reg;//算术右移中间变量
always @(reg_A)
A_reg <= reg_A;
always @(*)
begin
case(ex_ir[15:11])
`LOAD : {cf,ALUo} <= reg_A + reg_B;
`STORE : {cf,ALUo} <= reg_A + reg_B;
`LDIH : {cf,ALUo} <= reg_A + reg_B;
`ADD : {cf,ALUo} <= reg_A + reg_B;
`ADDI : {cf,ALUo} <= reg_A + reg_B;
`ADDC : {cf,ALUo} <= reg_A + reg_B + cf;
`SUB : {cf,ALUo} <= reg_A - reg_B;
`SUBI : {cf,ALUo} <= reg_A - reg_B;
`SUBC : {cf,ALUo} <= reg_A - reg_B - cf;
`CMP : {cf,ALUo} <= reg_A - reg_B;// CMP的功能是set CF NF ZF
`AND : {cf,ALUo} <= reg_A & reg_B;
`OR : {cf,ALUo} <= reg_A | reg_B;
`XOR : {cf,ALUo} <= reg_A ^ reg_B;
`SLL : {cf,ALUo} <= reg_A << reg_B;//逻辑左移,低位补0
`SRL : {cf,ALUo} <= reg_A >> reg_B;//逻辑右移,高位补0
`SLA : {cf,ALUo} <= reg_A <<< reg_B;//算术左移等同于逻辑左移,低位补0
`SRA : {cf,ALUo} <= A_reg >>> reg_B;//算术右移,高位补符号位
`JMPR : {cf,ALUo} <= reg_A + reg_B;
`BZ : {cf,ALUo} <= reg_A + reg_B;
`BNZ : {cf,ALUo} <= reg_A + reg_B;
`BN : {cf,ALUo} <= reg_A + reg_B;
`BNN : {cf,ALUo} <= reg_A + reg_B;
`BC : {cf,ALUo} <= reg_A + reg_B;
`BNC : {cf,ALUo} <= reg_A + reg_B;
default:
begin cf <= cf; ALUo <= ALUo; end
endcase
end
//************* MEM *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
wb_ir <= 0;
reg_C1 <= 0;
d_dataout <= 0;
d_we <= 0;
end
else if (state == `exec)
begin
wb_ir <= mem_ir;
//*************** clcok gating **************//
if(clock_gating) begin
d_dataout <= smdr1;
d_we <= dw;
d_addr <= reg_C[7:0];
end
if(mem_ir[15:11] == `LOAD)
reg_C1 <= d_datain;
else
reg_C1 <= reg_C;
end
end
//************* WB *************//
always @(posedge clk or negedge reset)
begin
if(!reset) begin
gr[0] <= 16'b0000_0000_0000_0000;
gr[1] <= 16'b0000_0000_0000_0000;
gr[2] <= 16'b0000_0000_0000_0000;
gr[3] <= 16'b0000_0000_0000_0000;
gr[4] <= 16'b0000_0000_0000_0000;
gr[5] <= 16'b0000_0000_0000_0000;
gr[6] <= 16'b0000_0000_0000_0000;
gr[7] <= 16'b0000_0000_0000_0000;
gr[wb_ir[10:8]] <= gr[wb_ir[10:8]];
end
else if (state == `exec)
begin
if ( (wb_ir[15:11] == `LOAD) || (wb_ir[15:11] == `LDIH) || (wb_ir[15:11] == `ADD) || (wb_ir[15:11] == `ADDI)
||(wb_ir[15:11] == `ADDC) || (wb_ir[15:11] == `SUB) || (wb_ir[15:11] == `SUBI) || (wb_ir[15:11] == `SUBC)
||(wb_ir[15:11] == `CMP) || (wb_ir[15:11] == `AND) || (wb_ir[15:11] == `OR) || (wb_ir[15:11] == `XOR)
||(wb_ir[15:11] == `SLL) || (wb_ir[15:11] == `SRL) || (wb_ir[15:11] == `SLA) || (wb_ir[15:11] == `SRA) )
gr[wb_ir[10:8]] <= reg_C1;
end
end
endmodule
优化前后的XPOWER测试结果比较:
Basic design:
With clock gating:
相关文章推荐
- 五级流水线CPU之低功耗设计(2)——门控(CLOCK GATE)
- 五级流水线CPU之低功耗设计 (一) :Bypassing(旁路)
- cpu五级流水线设计优化之低功耗
- 五级流水线CPU之低功耗设计(1)——旁路(By Passing)
- 数字电路设计之五级流水线设计(CPU)
- 数字电路设计之五级流水线设计(CPU)
- cpu五级流水线基础设计
- 自己动手写CPU之第七阶段(5)——流水线暂停机制的设计与实现
- 请问谁那有计算机系统结构的课程设计—设计一个流水线CPU。。
- 单周期CPU及流水线CPU设计(1)---logisim部件设计
- Verilog 数字电路设计之带hazard的五级流水线CPU
- 五级流水线CPU
- 指令集并行流水线CPU设计
- 简单的MIPS5级流水线CPU设计
- 16位5级流水线CPU设计
- 自己动手写CPU之第七阶段(5)——流水线暂停机制的设计与实现
- CPU五级流水线工程(带Hazard)
- JavaCard CPU的设计与FPGA实现
- 软件低功耗设计方法
- 如何设计低功耗嵌入式系统