T3Q is the biggest fuck face

...on the face of Earth
This commit is contained in:
Scare Crowe 2021-10-04 22:41:35 +05:00
parent 53f3b81e5a
commit 48cc5c470f
23 changed files with 82281 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,351 @@
//-----------------------------------------------------------------------------
// The confidential and proprietary information contained in this file may
// only be used by a person authorised under and to the extent permitted
// by a subsisting licensing agreement from ARM Limited.
//
// (C) COPYRIGHT 2013-2014 ARM Limited.
// ALL RIGHTS RESERVED
//
// This entire notice must be reproduced on all copies of this file
// and copies of this file may only be made by a person if such person is
// permitted to do so under the terms of a subsisting license agreement
// from ARM Limited.
//
// Filename : $RCSfile: maia_cx_crypt2.v $
// Checked In : $Date: 2014-08-29 00:16:46 -0500 (Fri, 29 Aug 2014) $
// Revision : $Revision: 70482 $
// Release Information : Cortex-A72-r1p0-00rel0
//
//-----------------------------------------------------------------------------
// Verilog-2001 (IEEE Std 1364-2001)
//-----------------------------------------------------------------------------
//#
//# Overview
//# ========
//#
// This block does the following operations:
// - AES encrypt and decrypt operations: aesd, aese, aesmc, aesimc
// - SHA single-cycle operations: sha1h, sha1su1, sha256su0
//#
//# Module Declaration
//# ==================
//#
`include "maia_header.v"
module maia_cx_crypt2 (
//#
//# Interface Signals
//# =================
//#
// Global inputs
ck_gclkcx_crypt,
cx_reset3,
// Control inputs
ival_e1_q,
aesd_e1_q,
aese_e1_q,
aesmc_e1_q,
aesimc_e1_q,
aesdimc_e1_q,
aesemc_e1_q,
pmull_e1_q,
sha1h_e1_q,
sha1su1_e1_q,
sha256su0_e1_q,
// Data inputs
qd,
qn,
// Outputs
crypt2_out_e3_q,
crypt2_active
);
//#
//# Interface Signals
//# =================
//#
// Global inputs
input ck_gclkcx_crypt;
input cx_reset3;
// Control inputs
input ival_e1_q;
input aesd_e1_q; // aes encode
input aese_e1_q; // aes decode
input aesmc_e1_q; // ae smix columns
input aesimc_e1_q; // aes inverse mix columns
input aesdimc_e1_q; // aes decode superop
input aesemc_e1_q; // aes encode superop
input pmull_e1_q; // polynomial multiplication
input sha1h_e1_q; // sha1 fixed rotate
input sha1su1_e1_q; // sha1 schedule update 1
input sha256su0_e1_q; // sha256 schedule update 0
// Data inputs
input [127:0] qd;
input [127:0] qn;
// Outputs
output [127:0] crypt2_out_e3_q;
output crypt2_active;
//#
//# Internal Signals - Automatic Declarations
//# =========================================
//#
wire [ 15: 0] aes_shf_e1;
reg [ 15: 0] aes_shf_e2_q;
wire [127: 0] aesd_e1;
reg aesd_e2_q;
wire aesd_or_e_e1;
wire [127: 0] aesd_out;
wire [ 15: 0] aesd_shf_e1;
reg aesdimc_e2_q;
wire [127: 0] aesdimc_out;
wire [127: 0] aese_e1;
reg aese_e2_q;
wire [127: 0] aese_out;
wire [ 15: 0] aese_shf_e1;
reg aesemc_e2_q;
wire [127: 0] aesemc_out;
reg aesimc_e2_q;
wire [127: 0] aesimc_in;
wire [127: 0] aesimc_out;
reg aesmc_e2_q;
wire [127: 0] aesmc_in;
wire [127: 0] aesmc_out;
wire [127: 0] crypt2_d_e1;
reg [127: 0] crypt2_d_e2_q;
wire [127: 0] crypt2_out_e2;
reg [127: 0] crypt2_out_e3_q;
reg ival_e2_q;
reg pmull_e2_q;
wire [127: 0] pmull_out;
wire [127: 0] qx_e1;
wire [ 31: 0] sha1h_in_e1;
wire [ 31: 0] sha1h_out_e1;
wire [127: 0] sha1su1_out_e1;
wire [127: 0] sha1su1_qdin_e1;
wire [127: 0] sha1su1_qnin_e1;
wire [127: 0] sha256su0_out_e1;
wire sha_inst_e1;
reg sha_inst_e2_q;
//#
//# Main Code
//# =========
//#
//
// aes functions are all in the same block because of limited result bus bandwidth.
// Mais CX has 3x64-bit result buses, and each of these instructions produces
// a 128-bit result. Two instructions could be issued in a cycle, but there is
// no value in doing this because they could not both write results.
//
// The single-cycle 2-input SHA instructions are in the same block because they have the same inputs
// and latency as the aes instructions.
//
// Originally, all functions in this block had single-cycle latency, but CX is unable to make use
// of single-cycle latency. To reduce area, functionality is spread across E1 and E2
// In particular, the AES SBOX and ISBOX functions are split into LUT(mult inverse) -> affine transform
// & affine inverse transform -> LUT(mult inverse), so that they can share the same LUT.
// E1
// 38% of this cycle is used up to drive qd and qn from the issq block. Therefore, the relatively
// shallow SHA operations are performed in this cycle, along with some preliminary processing for AESE and AESD
assign qx_e1[127:0] = {128{aesd_or_e_e1}} & (qd[127:0] ^ qn[127:0]);
maia_cx_aese1 uaese1(
.q (qx_e1[127:0]),
.aese_out (aese_e1[127:0]),
.aese_shf (aese_shf_e1[15:0])
);
maia_cx_aesd1 uaesd1(
.q (qx_e1[127:0]),
.aesd_out (aesd_e1[127:0]),
.aesd_shf (aesd_shf_e1[15:0])
);
assign aesd_or_e_e1 = aesd_e1_q | aese_e1_q;
// Perform sha functions in E1 to save pipeline flops
// and reduce complexity of multiplexer in E2
assign sha1h_in_e1[31:0] = {32{sha1h_e1_q}} & qn[31:0];
maia_cx_sha1h usha1h(
.qn (sha1h_in_e1[31:0]),
.d (sha1h_out_e1[31:0])
);
assign sha1su1_qdin_e1[127:0] = {128{sha1su1_e1_q}} & qd[127:0];
assign sha1su1_qnin_e1[127:0] = {128{sha1su1_e1_q}} & qn[127:0];
maia_cx_sha1su1 usha1su1(
.qd (sha1su1_qdin_e1[127:0]),
.qn (sha1su1_qnin_e1[127:0]),
.d (sha1su1_out_e1[127:0])
);
maia_cx_sha256su0 usha256su0(
.qd (qd[127:0]),
.qn (qn[127:0]),
.d (sha256su0_out_e1[127:0])
);
assign sha_inst_e1 = sha1h_e1_q | sha1su1_e1_q | sha256su0_e1_q;
assign crypt2_d_e1[127:0] = ({128{sha1h_e1_q}} & {{96{1'b0}}, sha1h_out_e1[31:0]})
| ({128{sha1su1_e1_q}} & sha1su1_out_e1[127:0])
| ({128{sha256su0_e1_q}} & sha256su0_out_e1[127:0])
| ({128{aese_e1_q}} & aese_e1[127:0])
| ({128{aesd_e1_q}} & aesd_e1[127:0])
| ({128{~(aesd_or_e_e1 | sha_inst_e1)}} & qn[127:0]);
assign aes_shf_e1[15:0] = {16{aese_e1_q}} & aese_shf_e1[15:0] |
{16{aesd_e1_q}} & aesd_shf_e1[15:0];
// reset flop(s) since feeds into active signal used for RCG
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt or posedge cx_reset3)
begin: uival_e2_q
if (cx_reset3 == 1'b1)
ival_e2_q <= `MAIA_DFF_DELAY {1{1'b0}};
`ifdef MAIA_XPROP_FLOP
else if (cx_reset3==1'b0)
ival_e2_q <= `MAIA_DFF_DELAY ival_e1_q;
else
ival_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
`else
else
ival_e2_q <= `MAIA_DFF_DELAY ival_e1_q;
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: ucrypt2_e2
if (ival_e1_q==1'b1) begin
crypt2_d_e2_q[127:0] <= `MAIA_DFF_DELAY crypt2_d_e1[127:0];
aes_shf_e2_q[15:0] <= `MAIA_DFF_DELAY aes_shf_e1[15:0];
aesd_e2_q <= `MAIA_DFF_DELAY aesd_e1_q;
aese_e2_q <= `MAIA_DFF_DELAY aese_e1_q;
aesmc_e2_q <= `MAIA_DFF_DELAY aesmc_e1_q;
aesimc_e2_q <= `MAIA_DFF_DELAY aesimc_e1_q;
aesemc_e2_q <= `MAIA_DFF_DELAY aesemc_e1_q;
aesdimc_e2_q <= `MAIA_DFF_DELAY aesdimc_e1_q;
pmull_e2_q <= `MAIA_DFF_DELAY pmull_e1_q;
sha_inst_e2_q <= `MAIA_DFF_DELAY sha_inst_e1;
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e1_q==1'b0));
else begin
crypt2_d_e2_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
aes_shf_e2_q[15:0] <= `MAIA_DFF_DELAY {16{1'bx}};
aesd_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
aese_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
aesmc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
aesimc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
aesemc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
aesdimc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
pmull_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha_inst_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Enable data inputs for selected operation (glitch suppression in unused datapaths)
assign aesmc_in[127:0] = {128{aesmc_e2_q }} & crypt2_d_e2_q[127:0];
assign aesimc_in[127:0] = {128{aesimc_e2_q}} & crypt2_d_e2_q[127:0];
maia_cx_aesed2 uaesed2(
.aes_din (crypt2_d_e2_q[127:0]),
.aes_shf (aes_shf_e2_q[15:0]),
.aesd_out (aesd_out[127:0]),
.aese_out (aese_out[127:0]),
.aesemc_out (aesemc_out[127:0]),
.aesdimc_out (aesdimc_out[127:0])
);
maia_cx_aesmc uaesmc(
.d_in (aesmc_in[127:0]),
.mc (aesmc_out[127:0])
);
maia_cx_aesimc uaesimc(
.d_in (aesimc_in[127:0]),
.imc (aesimc_out[127:0])
);
maia_cx_pmull upmull(
.a_in (crypt2_d_e2_q[63:0]),
.b_in (crypt2_d_e2_q[127:64]),
.p_out (pmull_out[127:0])
);
assign crypt2_out_e2[127:0] = ({128{aesd_e2_q & ~aesdimc_e2_q}} & aesd_out[127:0])
| ({128{aese_e2_q & ~aesemc_e2_q}} & aese_out[127:0])
| ({128{aesmc_e2_q}} & aesmc_out[127:0])
| ({128{aesemc_e2_q}} & aesemc_out[127:0])
| ({128{aesimc_e2_q}} & aesimc_out[127:0])
| ({128{aesdimc_e2_q}} & aesdimc_out[127:0])
| ({128{sha_inst_e2_q}} & crypt2_d_e2_q[127:0])
| ({128{pmull_e2_q}} & pmull_out[127:0]);
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: ucrypt2_e3
if (ival_e2_q==1'b1) begin
crypt2_out_e3_q[127:0] <= `MAIA_DFF_DELAY crypt2_out_e2[127:0];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e2_q==1'b0));
else begin
crypt2_out_e3_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
//-----------------------------------------------------------------------------
// regional clock gating (RCG) terms
//-----------------------------------------------------------------------------
assign crypt2_active = (ival_e1_q | ival_e2_q);
endmodule
//ARMAUTO UNDEF START
`define MAIA_UNDEFINE
`include "maia_header.v"
`undef MAIA_UNDEFINE
//ARMAUTO UNDEF END

View File

@ -0,0 +1,713 @@
//-----------------------------------------------------------------------------
// The confidential and proprietary information contained in this file may
// only be used by a person authorised under and to the extent permitted
// by a subsisting licensing agreement from ARM Limited.
//
// (C) COPYRIGHT 2013-2014 ARM Limited.
// ALL RIGHTS RESERVED
//
// This entire notice must be reproduced on all copies of this file
// and copies of this file may only be made by a person if such person is
// permitted to do so under the terms of a subsisting license agreement
// from ARM Limited.
//
// Filename : $RCSfile: maia_cx_crypt3.v $
// Checked In : $Date: 2014-08-29 00:16:46 -0500 (Fri, 29 Aug 2014) $
// Revision : $Revision: 70482 $
// Release Information : Cortex-A72-r1p0-00rel0
//
//-----------------------------------------------------------------------------
// Verilog-2001 (IEEE Std 1364-2001)
//-----------------------------------------------------------------------------
//#
//# Overview
//# ========
//#
// This block does the following operations:
// - SHA 3-input operations: sha1cpm, sha1su0, sha256h, sha256h2, sha256su1
//#
//# Module Declaration
//# ==================
//#
`include "maia_header.v"
module maia_cx_crypt3 (
//#
//# Interface Signals
//# =================
//#
// Global inputs
ck_gclkcx_crypt,
cx_reset3,
// Control inputs
//
// This block has 3x128-bit inputs for each instruction, so it requires two cycles just to
// get its operands. In E1, we receive two of the operands (qn and qm) and ival_e1_q,
// which allows the operands to be stored in flops. We also get inputs indicating which
// instruction is to be computed.
//
// At some later cycle, we receive the 3rd operand, qd, and ival_e2_q, indicating that
// we should begin the computation.
//
// There are 4 execution stages, E2-E5.
ival_e1_q,
sha1c_e1_q,
sha1p_e1_q,
sha1m_e1_q,
sha256h_e1_q,
sha256h2_e1_q,
sha256su1_e1_q,
ival_e2_q,
// Data inputs
qn_e1_q,
qm_e1_q,
qd_e2_q,
// Outputs
crypt3_out_e6_q,
crypt3_active
);
//#
//# Interface Signals
//# =================
//#
// Global inputs
input ck_gclkcx_crypt;
input cx_reset3;
// Control inputs
//
// This block has 3x128-bit inputs for each instruction, so it requires two cycles just to
// get its operands. In E1, we receive two of the operands (qn and qm) and ival_e1_q,
// which allows the operands to be stored in flops. We also get inputs indicating which
// instruction is to be computed.
//
// At some later cycle, we receive the 3rd operand, qd, and ival_e2_q, indicating that
// we should begin the computation.
//
// There are 4 execution stages, E2-E5.
input ival_e1_q;
input sha1c_e1_q; // sha hash update (choose)
input sha1p_e1_q; // sha hash update (parity)
input sha1m_e1_q; // sha hash update (majority)
input sha256h_e1_q; // sha256 hash update
input sha256h2_e1_q; // sha256 hash update 2
input sha256su1_e1_q; // sha256 schedule update 1
input ival_e2_q;
// Data inputs
input [127:0] qn_e1_q; // qn arrives with first uop on {srcb,srca}
input [127:0] qm_e1_q; // qm arrives with first uop on {srcd,srcc}
input [127:0] qd_e2_q; // qd arrives with second uop on {srcb,srca}
// Outputs
output [127:0] crypt3_out_e6_q;
output crypt3_active;
//#
//# Internal Signals - Automatic Declarations
//# =========================================
//#
wire [127: 0] crypt3_out_e5;
reg [127: 0] crypt3_out_e6_q;
wire firstop_recvd_e1;
reg firstop_recvd_e2_q;
reg ival_e3_q;
reg ival_e4_q;
reg ival_e5_q;
wire [127: 0] newx_e2;
wire [127: 0] newx_e3;
wire [127: 0] newx_e4;
wire [127: 0] newy_e2;
wire [127: 0] newy_e3;
wire [127: 0] newy_e4;
reg [127: 0] qm_e2_q;
reg [127: 0] qn_e2_q;
wire [127: 0] sha1_xin_e2;
wire [ 31: 0] sha1_yin_e2;
wire [ 31: 0] sha1_zin_e2;
wire sha1c_e2;
reg sha1c_e2_q;
reg sha1c_e3_q;
reg sha1c_e4_q;
reg sha1c_e5_q;
wire sha1cpm_e2;
wire sha1cpm_e3;
wire sha1cpm_e4;
wire sha1cpm_e5;
wire [127: 0] sha1cpm_x_e2;
wire [127: 0] sha1cpm_x_e3;
wire [127: 0] sha1cpm_x_e4;
wire [127: 0] sha1cpm_x_e5;
wire [127: 0] sha1cpm_y_e2;
wire [127: 0] sha1cpm_y_e3;
wire [127: 0] sha1cpm_y_e4;
// verilint unused_sigs off
wire [ 31: 0] sha1cpm_y_e5;
// verilint unused_sigs on
wire sha1m_e2;
reg sha1m_e2_q;
reg sha1m_e3_q;
reg sha1m_e4_q;
reg sha1m_e5_q;
wire sha1p_e2;
reg sha1p_e2_q;
reg sha1p_e3_q;
reg sha1p_e4_q;
reg sha1p_e5_q;
wire [127: 0] sha256_xin_e2;
wire [127: 0] sha256_yin_e2;
wire [ 31: 0] sha256_zin_e2;
wire sha256h2_e2;
reg sha256h2_e2_q;
reg sha256h2_e3_q;
reg sha256h2_e4_q;
reg sha256h2_e5_q;
wire sha256h_e2;
reg sha256h_e2_q;
reg sha256h_e3_q;
reg sha256h_e4_q;
reg sha256h_e5_q;
wire [127: 0] sha256h_x_e2;
wire [127: 0] sha256h_x_e3;
wire [127: 0] sha256h_x_e4;
wire [127: 0] sha256h_x_e5;
wire [127: 0] sha256h_y_e2;
wire [127: 0] sha256h_y_e3;
wire [127: 0] sha256h_y_e4;
wire [127: 0] sha256h_y_e5;
wire sha256hh2_e2;
wire sha256hh2_e3;
wire sha256hh2_e4;
wire sha256su1_e2;
reg sha256su1_e2_q;
reg sha256su1_e3_q;
reg sha256su1_e4_q;
reg sha256su1_e5_q;
wire [ 63: 0] sha256su1_x_e3;
wire [ 63: 0] sha256su1_x_e4;
wire [127: 0] x_e2;
wire [127: 0] x_e3;
reg [127: 0] x_e3_q;
wire [127: 0] x_e4;
reg [127: 0] x_e4_q;
wire [127: 0] x_e5;
reg [127: 0] x_e5_q;
wire [127: 0] y_e2;
wire [127: 0] y_e3;
reg [127: 0] y_e3_q;
wire [127: 0] y_e4;
reg [127: 0] y_e4_q;
wire [127: 0] y_e5;
reg [127: 0] y_e5_q;
wire [127: 0] z_e2;
wire [ 95: 0] z_e3;
reg [ 95: 0] z_e3_q;
wire [ 63: 0] z_e4;
reg [ 63: 0] z_e4_q;
wire [ 31: 0] z_e5;
reg [ 31: 0] z_e5_q;
//#
//# Main Code
//# =========
//#
//
// set when ival_e1_q first received, and held until the 2nd uop (ival_e2_q) is received
assign firstop_recvd_e1 = (ival_e1_q | (firstop_recvd_e2_q & ~ival_e2_q));
// ival and instruction flops
// reset flop since 1st uop of crypto pair can be flushed due to SWDW nuke, thus might
// have received ival_e2_q without ever receiving ival_e1_q (since it was flushed). thus
// want firstop_recvd_e2_q to be 0 (not X) to stop X-prop
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt or posedge cx_reset3)
begin: ufirstop_recvd_e2_q
if (cx_reset3 == 1'b1)
firstop_recvd_e2_q <= `MAIA_DFF_DELAY {1{1'b0}};
`ifdef MAIA_XPROP_FLOP
else if (cx_reset3==1'b0)
firstop_recvd_e2_q <= `MAIA_DFF_DELAY firstop_recvd_e1;
else
firstop_recvd_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
`else
else
firstop_recvd_e2_q <= `MAIA_DFF_DELAY firstop_recvd_e1;
`endif
end
// verilint flop_checks on
// end of Macro DFF
// reset flop(s) since feeds into active signal used for RCG
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt or posedge cx_reset3)
begin: uival_e3_q
if (cx_reset3 == 1'b1)
ival_e3_q <= `MAIA_DFF_DELAY {1{1'b0}};
`ifdef MAIA_XPROP_FLOP
else if (cx_reset3==1'b0)
ival_e3_q <= `MAIA_DFF_DELAY ival_e2_q;
else
ival_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
`else
else
ival_e3_q <= `MAIA_DFF_DELAY ival_e2_q;
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt or posedge cx_reset3)
begin: uival_e4_q
if (cx_reset3 == 1'b1)
ival_e4_q <= `MAIA_DFF_DELAY {1{1'b0}};
`ifdef MAIA_XPROP_FLOP
else if (cx_reset3==1'b0)
ival_e4_q <= `MAIA_DFF_DELAY ival_e3_q;
else
ival_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
`else
else
ival_e4_q <= `MAIA_DFF_DELAY ival_e3_q;
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt or posedge cx_reset3)
begin: uival_e5_q
if (cx_reset3 == 1'b1)
ival_e5_q <= `MAIA_DFF_DELAY {1{1'b0}};
`ifdef MAIA_XPROP_FLOP
else if (cx_reset3==1'b0)
ival_e5_q <= `MAIA_DFF_DELAY ival_e4_q;
else
ival_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
`else
else
ival_e5_q <= `MAIA_DFF_DELAY ival_e4_q;
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uinst_e2
if (ival_e1_q==1'b1) begin
sha1c_e2_q <= `MAIA_DFF_DELAY sha1c_e1_q;
sha1p_e2_q <= `MAIA_DFF_DELAY sha1p_e1_q;
sha1m_e2_q <= `MAIA_DFF_DELAY sha1m_e1_q;
sha256h_e2_q <= `MAIA_DFF_DELAY sha256h_e1_q;
sha256h2_e2_q <= `MAIA_DFF_DELAY sha256h2_e1_q;
sha256su1_e2_q <= `MAIA_DFF_DELAY sha256su1_e1_q;
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e1_q==1'b0));
else begin
sha1c_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1p_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1m_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h2_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256su1_e2_q <= `MAIA_DFF_DELAY {1{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// stop X-prop if 1st uop was nuked due to swdw_nuke and 2nd was issued
assign sha1c_e2 = firstop_recvd_e2_q & sha1c_e2_q;
assign sha1p_e2 = firstop_recvd_e2_q & sha1p_e2_q;
assign sha1m_e2 = firstop_recvd_e2_q & sha1m_e2_q;
assign sha256h_e2 = firstop_recvd_e2_q & sha256h_e2_q;
assign sha256h2_e2 = firstop_recvd_e2_q & sha256h2_e2_q;
assign sha256su1_e2 = firstop_recvd_e2_q & sha256su1_e2_q;
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uinst_e3
if (ival_e2_q==1'b1) begin
sha1c_e3_q <= `MAIA_DFF_DELAY sha1c_e2;
sha1p_e3_q <= `MAIA_DFF_DELAY sha1p_e2;
sha1m_e3_q <= `MAIA_DFF_DELAY sha1m_e2;
sha256h_e3_q <= `MAIA_DFF_DELAY sha256h_e2;
sha256h2_e3_q <= `MAIA_DFF_DELAY sha256h2_e2;
sha256su1_e3_q <= `MAIA_DFF_DELAY sha256su1_e2;
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e2_q==1'b0));
else begin
sha1c_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1p_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1m_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h2_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256su1_e3_q <= `MAIA_DFF_DELAY {1{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uinst_e4
if (ival_e3_q==1'b1) begin
sha1c_e4_q <= `MAIA_DFF_DELAY sha1c_e3_q;
sha1p_e4_q <= `MAIA_DFF_DELAY sha1p_e3_q;
sha1m_e4_q <= `MAIA_DFF_DELAY sha1m_e3_q;
sha256h_e4_q <= `MAIA_DFF_DELAY sha256h_e3_q;
sha256h2_e4_q <= `MAIA_DFF_DELAY sha256h2_e3_q;
sha256su1_e4_q <= `MAIA_DFF_DELAY sha256su1_e3_q;
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e3_q==1'b0));
else begin
sha1c_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1p_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1m_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h2_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256su1_e4_q <= `MAIA_DFF_DELAY {1{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uinst_e5
if (ival_e4_q==1'b1) begin
sha1c_e5_q <= `MAIA_DFF_DELAY sha1c_e4_q;
sha1p_e5_q <= `MAIA_DFF_DELAY sha1p_e4_q;
sha1m_e5_q <= `MAIA_DFF_DELAY sha1m_e4_q;
sha256h_e5_q <= `MAIA_DFF_DELAY sha256h_e4_q;
sha256h2_e5_q <= `MAIA_DFF_DELAY sha256h2_e4_q;
sha256su1_e5_q <= `MAIA_DFF_DELAY sha256su1_e4_q;
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e4_q==1'b0));
else begin
sha1c_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1p_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha1m_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256h2_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
sha256su1_e5_q <= `MAIA_DFF_DELAY {1{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// E1
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uops_e2
if (ival_e1_q==1'b1) begin
qm_e2_q[127:0] <= `MAIA_DFF_DELAY qm_e1_q[127:0];
qn_e2_q[127:0] <= `MAIA_DFF_DELAY qn_e1_q[127:0];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e1_q==1'b0));
else begin
qm_e2_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
qn_e2_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// E2
assign x_e2[127:0] = qd_e2_q[127:0];
assign y_e2[127:0] = qn_e2_q[127:0];
assign z_e2[127:0] = qm_e2_q[127:0];
assign sha1_xin_e2[127:0] = {128{sha1cpm_e2}} & x_e2[127:0];
assign sha1_yin_e2[ 31:0] = { 32{sha1cpm_e2}} & y_e2[ 31:0];
assign sha1_zin_e2[ 31:0] = { 32{sha1cpm_e2}} & z_e2[ 31:0];
// sha1 hash update
maia_cx_sha1cpm usha1cpm_e2(
.choose (sha1c_e2_q),
.parity (sha1p_e2_q),
.majority (sha1m_e2_q),
.x (sha1_xin_e2[127:0]),
.y (sha1_yin_e2[31:0]),
.z (sha1_zin_e2[31:0]),
.newx (sha1cpm_x_e2[127:0]),
.newy (sha1cpm_y_e2[31:0])
);
assign sha1cpm_y_e2[127:32] = {96{sha1cpm_e2}} & y_e2[127:32];
assign sha256_xin_e2[127:0] = {128{sha256hh2_e2}} & x_e2[127:0];
assign sha256_yin_e2[127:0] = {128{sha256hh2_e2}} & y_e2[127:0];
assign sha256_zin_e2[ 31:0] = { 32{sha256hh2_e2}} & z_e2[ 31:0];
// sha256 hash update (1 and 2)
maia_cx_sha256h32 usha256h32_e2(
.x (sha256_xin_e2[127:0]),
.y (sha256_yin_e2[127:0]),
.z (sha256_zin_e2[31:0]),
.newx (sha256h_x_e2[127:0]),
.newy (sha256h_y_e2[127:0])
);
// mux results
assign sha1cpm_e2 = sha1c_e2 | sha1p_e2 | sha1m_e2;
assign sha256hh2_e2 = sha256h_e2 | sha256h2_e2;
assign newx_e2[127:0] = ({128{sha1cpm_e2 }} & sha1cpm_x_e2[127:0])
| ({128{sha256hh2_e2}} & sha256h_x_e2[127:0])
| ({128{sha256su1_e2}} & x_e2[127:0]);
assign newy_e2[127:0] = ({128{sha1cpm_e2 }} & sha1cpm_y_e2[127:0])
| ({128{sha256hh2_e2}} & sha256h_y_e2[127:0])
| ({128{sha256su1_e2}} & {z_e2[31:0], y_e2[127:32]});
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uops_e3
if (ival_e2_q==1'b1) begin
x_e3_q[127:0] <= `MAIA_DFF_DELAY newx_e2[127:0];
y_e3_q[127:0] <= `MAIA_DFF_DELAY newy_e2[127:0];
z_e3_q[95:0] <= `MAIA_DFF_DELAY z_e2[127:32];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e2_q==1'b0));
else begin
x_e3_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
y_e3_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
z_e3_q[95:0] <= `MAIA_DFF_DELAY {96{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// E3
assign x_e3[127:0] = x_e3_q[127:0];
assign y_e3[127:0] = y_e3_q[127:0];
assign z_e3[95:0] = z_e3_q[95:0];
// sha1 hash update
maia_cx_sha1cpm usha1cpm_e3(
.choose (sha1c_e3_q),
.parity (sha1p_e3_q),
.majority (sha1m_e3_q),
.x (x_e3[127:0]),
.y (y_e3[31:0]),
.z (z_e3[31:0]),
.newx (sha1cpm_x_e3[127:0]),
.newy (sha1cpm_y_e3[31:0])
);
assign sha1cpm_y_e3[127:32] = y_e3[127:32];
// sha256 hash update (1 and 2)
maia_cx_sha256h32 usha256h32_e3(
.x (x_e3[127:0]),
.y (y_e3[127:0]),
.z (z_e3[31:0]),
.newx (sha256h_x_e3[127:0]),
.newy (sha256h_y_e3[127:0])
);
// sha256 schedule update 1, cycle 1
maia_cx_sha256su1 usha256su1_e3(
.sha256su1_op (sha256su1_e3_q),
.x (x_e3[63:0]), // qd[63:0]
.y (y_e3[63:0]), // qn[95:32]
.z (z_e3[95:32]), // qm[127:64]
.newx (sha256su1_x_e3[63:0])
);
// mux results
assign sha1cpm_e3 = sha1c_e3_q | sha1p_e3_q | sha1m_e3_q;
assign sha256hh2_e3 = sha256h_e3_q | sha256h2_e3_q;
assign newx_e3[127:0] = ({128{sha1cpm_e3 }} & sha1cpm_x_e3[127:0])
| ({128{sha256hh2_e3 }} & sha256h_x_e3[127:0])
| ({128{sha256su1_e3_q}} & {x_e3[127:64], sha256su1_x_e3[63:0]});
assign newy_e3[127:0] = ({128{sha1cpm_e3 }} & sha1cpm_y_e3[127:0])
| ({128{sha256hh2_e3 }} & sha256h_y_e3[127:0])
| ({128{sha256su1_e3_q}} & {y_e3[127:0]});
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uops_e4
if (ival_e3_q==1'b1) begin
x_e4_q[127:0] <= `MAIA_DFF_DELAY newx_e3[127:0];
y_e4_q[127:0] <= `MAIA_DFF_DELAY newy_e3[127:0];
z_e4_q[63:0] <= `MAIA_DFF_DELAY z_e3[95:32];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e3_q==1'b0));
else begin
x_e4_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
y_e4_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
z_e4_q[63:0] <= `MAIA_DFF_DELAY {64{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// E4
assign x_e4[127:0] = x_e4_q[127:0];
assign y_e4[127:0] = y_e4_q[127:0];
assign z_e4[63:0] = z_e4_q[63:0];
// sha1 hash update
maia_cx_sha1cpm usha1cpm_e4(
.choose (sha1c_e4_q),
.parity (sha1p_e4_q),
.majority (sha1m_e4_q),
.x (x_e4[127:0]),
.y (y_e4[31:0]),
.z (z_e4[31:0]),
.newx (sha1cpm_x_e4[127:0]),
.newy (sha1cpm_y_e4[31:0])
);
assign sha1cpm_y_e4[127:32] = y_e4[127:32];
// sha256 hash update (1 and 2)
maia_cx_sha256h32 usha256h32_e4(
.x (x_e4[127:0]),
.y (y_e4[127:0]),
.z (z_e4[31:0]),
.newx (sha256h_x_e4[127:0]),
.newy (sha256h_y_e4[127:0])
);
// sha256 schedule update 1, cycle 2
maia_cx_sha256su1 usha256su1_e4(
.sha256su1_op (sha256su1_e4_q),
.x (x_e4[127:64]), // qd[127:64]
.y (y_e4[127:64]), // {qm[31:0], qn[127:96]}
.z (x_e4[63:0]), // sha256su1_x_e3[63:0]
.newx (sha256su1_x_e4[63:0])
);
// mux results
assign sha1cpm_e4 = sha1c_e4_q | sha1p_e4_q | sha1m_e4_q;
assign sha256hh2_e4 = sha256h_e4_q | sha256h2_e4_q;
assign newx_e4[127:0] = ({128{sha1cpm_e4 }} & sha1cpm_x_e4[127:0])
| ({128{sha256hh2_e4 }} & sha256h_x_e4[127:0])
| ({128{sha256su1_e4_q}} & {sha256su1_x_e4[63:0], x_e4[63:0]});
assign newy_e4[127:0] = ({128{sha1cpm_e4 }} & sha1cpm_y_e4[127:0])
| ({128{sha256hh2_e4 }} & sha256h_y_e4[127:0]);
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: uops_e5
if (ival_e4_q==1'b1) begin
x_e5_q[127:0] <= `MAIA_DFF_DELAY newx_e4[127:0];
y_e5_q[127:0] <= `MAIA_DFF_DELAY newy_e4[127:0];
z_e5_q[31:0] <= `MAIA_DFF_DELAY z_e4[63:32];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e4_q==1'b0));
else begin
x_e5_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
y_e5_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
z_e5_q[31:0] <= `MAIA_DFF_DELAY {32{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
// E5
assign x_e5[127:0] = x_e5_q[127:0];
assign y_e5[127:0] = y_e5_q[127:0];
assign z_e5[31:0] = z_e5_q[31:0];
// sha1 hash update
maia_cx_sha1cpm usha1cpm_e5(
.choose (sha1c_e5_q),
.parity (sha1p_e5_q),
.majority (sha1m_e5_q),
.x (x_e5[127:0]),
.y (y_e5[31:0]),
.z (z_e5[31:0]),
.newx (sha1cpm_x_e5[127:0]),
.newy (sha1cpm_y_e5[31:0])
);
// sha256 hash update (1 and 2)
maia_cx_sha256h32 usha256h32_e5(
.x (x_e5[127:0]),
.y (y_e5[127:0]),
.z (z_e5[31:0]),
.newx (sha256h_x_e5[127:0]),
.newy (sha256h_y_e5[127:0])
);
// mux results
assign sha1cpm_e5 = sha1c_e5_q | sha1p_e5_q | sha1m_e5_q;
assign crypt3_out_e5[127:0] = ({128{sha1cpm_e5}} & sha1cpm_x_e5[127:0])
| ({128{sha256h_e5_q}} & sha256h_x_e5[127:0])
| ({128{sha256h2_e5_q}} & sha256h_y_e5[127:0])
| ({128{sha256su1_e5_q}} & x_e5[127:0]);
// Macro DFF called
// verilint flop_checks off
always @(posedge ck_gclkcx_crypt)
begin: ures_e6
if (ival_e5_q==1'b1) begin
crypt3_out_e6_q[127:0] <= `MAIA_DFF_DELAY crypt3_out_e5[127:0];
end
`ifdef MAIA_XPROP_FLOP
else if ((ival_e5_q==1'b0));
else begin
crypt3_out_e6_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}};
end
`endif
end
// verilint flop_checks on
// end of Macro DFF
//-----------------------------------------------------------------------------
// regional clock gating (RCG) terms
//-----------------------------------------------------------------------------
assign crypt3_active = (ival_e1_q |
ival_e2_q |
ival_e3_q |
ival_e4_q |
ival_e5_q
);
endmodule
//ARMAUTO UNDEF START
`define MAIA_UNDEFINE
`include "maia_header.v"
`undef MAIA_UNDEFINE
//ARMAUTO UNDEF END

View File

@ -0,0 +1,41 @@
###############################################################################
# The confidential and proprietary information contained in this file may
# only be used by a person authorised under and to the extent permitted
# by a subsisting licensing agreement from ARM Limited.
#
# (C) COPYRIGHT 2011-2013 ARM Limited.
# ALL RIGHTS RESERVED
#
# This entire notice must be reproduced on all copies of this file
# and copies of this file may only be made by a person if such person is
# permitted to do so under the terms of a subsisting license agreement
# from ARM Limited.
#
###############################################################################
# Makefile.inc for crypto64
# setup source paths (crypto64)
crypto64_base = crypto64
crypto64_src = $(crypto64_base)/src
crypto64_obj = $(crypto64_base)/obj
crypto64_elf = $(crypto64_base)/elf
#rules for crypto64
crypto64_asm_obj = $(incl_obj)/benchmark_boot_a64.o $(incl_obj)/vectors.o $(incl_obj)/num_cpus_a64.o $(crypto64_obj)/cryptolib_asm64.o
crypto64_c_obj = $(incl_obj)/sys_a64.o $(incl_obj)/stackheap_a64.o $(crypto64_obj)/cryptodata.o $(crypto64_obj)/crypto_test.o
crypto64: clean_crypto64 $(crypto64_elf)/crypto64.elf
$(crypto64_obj)/%.o: $(crypto64_src)/%.c
$(CC_A64) $(CC_A64_OPTS) $< -o $@
$(crypto64_obj)/%.o: $(crypto64_src)/%.s
$(AS_A64) $(AS_A64_OPTS) $< -o $@
$(crypto64_elf)/crypto64.elf: $(crypto64_asm_obj) $(crypto64_c_obj)
$(LINK_A64) $(LINK_A64_OPTS) $(crypto64_asm_obj) $(crypto64_c_obj) -o $@
clean_crypto64:
\rm -f $(crypto64_asm_obj) $(crypto64_c_obj) $(crypto64_elf)/crypto64.elf

View File

@ -0,0 +1,80 @@
//-----------------------------------------------------------------------------
// The confidential and proprietary information contained in this file may
// only be used by a person authorised under and to the extent permitted
// by a subsisting licensing agreement from ARM Limited.
//
// (C) COPYRIGHT 2012-2013 ARM Limited.
// ALL RIGHTS RESERVED
//
// This entire notice must be reproduced on all copies of this file
// and copies of this file may only be made by a person if such person is
// permitted to do so under the terms of a subsisting license agreement
// from ARM Limited.
//
// SVN Information
//
// Checked In : $Date: 2013-03-19 09:12:51 +0000 (Tue, 19 Mar 2013) $
//
// Revision : $Revision: 241584 $
//
// Release Information :
//
//-----------------------------------------------------------------------------
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "cryptolib.h"
#include "cryptodata.h"
#include "benchmark.h"
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 1024
#endif
#ifndef ITERATIONS
#define ITERATIONS 10
#endif
uint8_t get_aes_index( int block_size)
{
uint8_t index = 0;
uint8_t i;
for (i=4; i<13; i++)
{
if ((block_size >> i) & 0x1)
{
index = i-4;
break;
}
}
return index;
}
int main()
{
uint32_t block_size;
uint8_t index;
uint32_t cmpres = 0;
uint8_t i;
block_size = BLOCK_SIZE;
uint8_t kv[176];
printf("AES128-ECB encryption\n");
index = get_aes_index(block_size);
BENCHSTART
for ( i = 0; i < ITERATIONS; i++)
{
aes128_key_expand(aes128_ecb_encrypt_key[index], kv);
LOOPSTART
aes128_ecb_encrypt(kv, aes128_ecb_encrypt_input[index], aes128_ecb_encrypt_output[index], block_size);
LOOPEND
}
cmpres |= memcmp(aes128_ecb_encrypt_output[index], aes128_ecb_encrypt_ref_output[index], block_size);
if (cmpres)
printf("AES128-ECB encryption failed\n");
BENCHFINISHED
}

View File

@ -0,0 +1,5 @@
extern const unsigned char aes128_ecb_encrypt_key[][16];
extern const unsigned char aes128_ecb_encrypt_input[][4096];
extern const unsigned char aes128_ecb_encrypt_ref_output[][4096];
extern unsigned char aes128_ecb_encrypt_output[][4096];

View File

@ -0,0 +1,26 @@
//-----------------------------------------------------------------------------
// The confidential and proprietary information contained in this file may
// only be used by a person authorised under and to the extent permitted
// by a subsisting licensing agreement from ARM Limited.
//
// (C) COPYRIGHT 2012-2013 ARM Limited.
// ALL RIGHTS RESERVED
//
// This entire notice must be reproduced on all copies of this file
// and copies of this file may only be made by a person if such person is
// permitted to do so under the terms of a subsisting license agreement
// from ARM Limited.
//
// SVN Information
//
// Checked In : $Date: 2013-03-19 09:12:51 +0000 (Tue, 19 Mar 2013) $
//
// Revision : $Revision: 241584 $
//
// Release Information :
//
//-----------------------------------------------------------------------------
extern void aes128_key_expand(const unsigned char *key_in, unsigned char *key_out);
extern void aes128_ecb_encrypt(const unsigned char *key, const unsigned char *in_data, unsigned char *out_data, unsigned int size);

View File

@ -0,0 +1,138 @@
;#-----------------------------------------------------------------------------
;# The confidential and proprietary information contained in this file may
;# only be used by a person authorised under and to the extent permitted
;# by a subsisting licensing agreement from ARM Limited.
;#
;# (C) COPYRIGHT 2012-2013 ARM Limited.
;# ALL RIGHTS RESERVED
;#
;# This entire notice must be reproduced on all copies of this file
;# and copies of this file may only be made by a person if such person is
;# permitted to do so under the terms of a subsisting license agreement
;# from ARM Limited.
;#
;# SVN Information
;#
;# Checked In : $Date: 2013-03-19 09:12:51 +0000 (Tue, 19 Mar 2013) $
;#
;# Revision : $Revision: 241584 $
;#
;# Release Information :
;#
;#-----------------------------------------------------------------------------
.section aes_code, "ax"
.global aes128_key_expand
.global aes128_ecb_encrypt
.align 6
rcon_array:
.word 0x00000001
.word 0x00000002
.word 0x00000004
.word 0x00000008
.word 0x00000010
.word 0x00000020
.word 0x00000040
.word 0x00000080
.word 0x0000001b
.word 0x00000036
.align 6
;# void aes128_key_expand(const unsigned char *key_in, unsigned char *key_out)
.type aes128_key_expand STT_FUNC
aes128_key_expand:
LD1 {v16.16B}, [x0]
MOVZ w2, #0x0e0d
DUP v17.16B, wzr
MOVK w2, #0x0c0f, lsl #16
DUP v19.4S, w2
ADR x3, rcon_array
MOV w4, #10
exp:
TBL v18.16B, {v16.16B}, v19.16B
LD1R {v21.4S}, [x3], #4
AESE v18.16B, v17.16B
EXT v20.16B, v17.16B, v16.16B, #12
SHA1SU0 v21.4S, v18.4S, v17.4S
EOR v22.16B, v16.16B, v20.16B
ST1 {v16.16B}, [x1], #16
SHA1SU0 v21.4S, v22.4S, v22.4S
TBL v18.16B, {v21.16B}, v19.16B
LD1R {v16.4S}, [x3], #4
AESE v18.16B, v17.16B
EXT v20.16B, v17.16B, v21.16B, #12
SHA1SU0 v16.4S, v18.4S, v17.4S
EOR v22.16B, v21.16B, v20.16B
ST1 {v21.16B}, [x1], #16
SUBS w4, w4, #2
SHA1SU0 v16.4S, v22.4S, v22.4S
B.NE exp
ST1 {v16.16B}, [x1]
RET
.macro aes_enc_round keyreg
AESE v0.16B, \keyreg
AESMC v0.16B, v0.16B
AESE v1.16B, \keyreg
AESMC v1.16B, v1.16B
AESE v2.16B, \keyreg
AESMC v2.16B, v2.16B
.endm
.macro aes_dec_round keyreg
AESD v0.16B, \keyreg
AESIMC v0.16B, v0.16B
AESD v1.16B, \keyreg
AESIMC v1.16B, v1.16B
AESD v2.16B, \keyreg
AESIMC v2.16B, v2.16B
.endm
;# void aes128_ecb_encrypt(const unsigned char *key, const unsigned char *in_data, unsigned char *out_data, unsigned int size)
.type aes128_ecb_encrypt STT_FUNC
aes128_ecb_encrypt:
;# Load the key
LD1 {v16.16B-v19.16B}, [x0], #64
LD1 {v20.16B-v23.16B}, [x0], #64
LD1 {v24.16B-v26.16B}, [x0]
load_ip:
;# Load data
LD1 {v0.16B-v2.16B}, [x1], #48
;# Rounds 1-9
aes_enc_round v16.16B
aes_enc_round v17.16B
aes_enc_round v18.16B
aes_enc_round v19.16B
aes_enc_round v20.16B
aes_enc_round v21.16B
aes_enc_round v22.16B
aes_enc_round v23.16B
aes_enc_round v24.16B
;# Round 10
AESE v0.16B, v25.16B
PRFM PLDL1KEEP, [x1, #64]
EOR v0.16B, v0.16B, v26.16B
SUBS x3, x3, #16
ST1 {v0.16B}, [x2], #16
B.EQ end_enc
AESE v1.16B, v25.16B
EOR v1.16B, v1.16B, v26.16B
SUBS x3, x3, #16
ST1 {v1.16B}, [x2], #16
B.EQ end_enc
AESE v2.16B, v25.16B
EOR v2.16B, v2.16B, v26.16B
SUBS x3, x3, #16
ST1 {v2.16B}, [x2], #16
B.GT load_ip
end_enc:
RET
.end