mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-02-07 14:54:17 +00:00
408 lines
6.7 KiB
ArmAsm
408 lines
6.7 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
|
/*
|
|
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
|
|
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
|
*/
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
#define MSB 0
|
|
#define LSB 3
|
|
#else
|
|
#define MSB 3
|
|
#define LSB 0
|
|
#endif
|
|
|
|
#define POLY1305_BLOCK_SIZE 16
|
|
.text
|
|
#define H0 $t0
|
|
#define H1 $t1
|
|
#define H2 $t2
|
|
#define H3 $t3
|
|
#define H4 $t4
|
|
|
|
#define R0 $t5
|
|
#define R1 $t6
|
|
#define R2 $t7
|
|
#define R3 $t8
|
|
|
|
#define O0 $s0
|
|
#define O1 $s4
|
|
#define O2 $v1
|
|
#define O3 $t9
|
|
#define O4 $s5
|
|
|
|
#define S1 $s1
|
|
#define S2 $s2
|
|
#define S3 $s3
|
|
|
|
#define SC $at
|
|
#define CA $v0
|
|
|
|
/* Input arguments */
|
|
#define poly $a0
|
|
#define src $a1
|
|
#define srclen $a2
|
|
#define hibit $a3
|
|
|
|
/* Location in the opaque buffer
|
|
* R[0..3], CA, H[0..4]
|
|
*/
|
|
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
|
|
#define PTR_POLY1305_CA (16 ) ## ($a0)
|
|
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
|
|
|
|
#define POLY1305_BLOCK_SIZE 16
|
|
#define POLY1305_STACK_SIZE 32
|
|
|
|
.set noat
|
|
.align 4
|
|
.globl poly1305_blocks_mips
|
|
.ent poly1305_blocks_mips
|
|
poly1305_blocks_mips:
|
|
.frame $sp, POLY1305_STACK_SIZE, $ra
|
|
/* srclen &= 0xFFFFFFF0 */
|
|
ins srclen, $zero, 0, 4
|
|
|
|
addiu $sp, -(POLY1305_STACK_SIZE)
|
|
|
|
/* check srclen >= 16 bytes */
|
|
beqz srclen, .Lpoly1305_blocks_mips_end
|
|
|
|
/* Calculate last round based on src address pointer.
|
|
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
|
|
*/
|
|
addu srclen, src
|
|
|
|
lw R0, PTR_POLY1305_R(0)
|
|
lw R1, PTR_POLY1305_R(1)
|
|
lw R2, PTR_POLY1305_R(2)
|
|
lw R3, PTR_POLY1305_R(3)
|
|
|
|
/* store the used save registers. */
|
|
sw $s0, 0($sp)
|
|
sw $s1, 4($sp)
|
|
sw $s2, 8($sp)
|
|
sw $s3, 12($sp)
|
|
sw $s4, 16($sp)
|
|
sw $s5, 20($sp)
|
|
|
|
/* load Hx and Carry */
|
|
lw CA, PTR_POLY1305_CA
|
|
lw H0, PTR_POLY1305_H(0)
|
|
lw H1, PTR_POLY1305_H(1)
|
|
lw H2, PTR_POLY1305_H(2)
|
|
lw H3, PTR_POLY1305_H(3)
|
|
lw H4, PTR_POLY1305_H(4)
|
|
|
|
/* Sx = Rx + (Rx >> 2) */
|
|
srl S1, R1, 2
|
|
srl S2, R2, 2
|
|
srl S3, R3, 2
|
|
addu S1, R1
|
|
addu S2, R2
|
|
addu S3, R3
|
|
|
|
addiu SC, $zero, 1
|
|
|
|
.Lpoly1305_loop:
|
|
lwl O0, 0+MSB(src)
|
|
lwl O1, 4+MSB(src)
|
|
lwl O2, 8+MSB(src)
|
|
lwl O3,12+MSB(src)
|
|
lwr O0, 0+LSB(src)
|
|
lwr O1, 4+LSB(src)
|
|
lwr O2, 8+LSB(src)
|
|
lwr O3,12+LSB(src)
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
wsbh O0
|
|
wsbh O1
|
|
wsbh O2
|
|
wsbh O3
|
|
rotr O0, 16
|
|
rotr O1, 16
|
|
rotr O2, 16
|
|
rotr O3, 16
|
|
#endif
|
|
|
|
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
|
|
addu H0, CA
|
|
sltu CA, H0, CA
|
|
addu O0, H0
|
|
sltu H0, O0, H0
|
|
addu CA, H0
|
|
|
|
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
|
|
addu H1, CA
|
|
sltu CA, H1, CA
|
|
addu O1, H1
|
|
sltu H1, O1, H1
|
|
addu CA, H1
|
|
|
|
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
|
|
addu H2, CA
|
|
sltu CA, H2, CA
|
|
addu O2, H2
|
|
sltu H2, O2, H2
|
|
addu CA, H2
|
|
|
|
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
|
|
addu H3, CA
|
|
sltu CA, H3, CA
|
|
addu O3, H3
|
|
sltu H3, O3, H3
|
|
addu CA, H3
|
|
|
|
/* h4 += (u32)(d3 >> 32) + padbit; */
|
|
addu H4, hibit
|
|
addu O4, H4, CA
|
|
|
|
/* D0 */
|
|
multu O0, R0
|
|
maddu O1, S3
|
|
maddu O2, S2
|
|
maddu O3, S1
|
|
mfhi CA
|
|
mflo H0
|
|
|
|
/* D1 */
|
|
multu O0, R1
|
|
maddu O1, R0
|
|
maddu O2, S3
|
|
maddu O3, S2
|
|
maddu O4, S1
|
|
maddu CA, SC
|
|
mfhi CA
|
|
mflo H1
|
|
|
|
/* D2 */
|
|
multu O0, R2
|
|
maddu O1, R1
|
|
maddu O2, R0
|
|
maddu O3, S3
|
|
maddu O4, S2
|
|
maddu CA, SC
|
|
mfhi CA
|
|
mflo H2
|
|
|
|
/* D4 */
|
|
mul H4, O4, R0
|
|
|
|
/* D3 */
|
|
multu O0, R3
|
|
maddu O1, R2
|
|
maddu O2, R1
|
|
maddu O3, R0
|
|
maddu O4, S3
|
|
maddu CA, SC
|
|
mfhi CA
|
|
mflo H3
|
|
|
|
addiu src, POLY1305_BLOCK_SIZE
|
|
|
|
/* h4 += (u32)(d3 >> 32); */
|
|
addu O4, H4, CA
|
|
/* h4 &= 3 */
|
|
andi H4, O4, 3
|
|
/* c = (h4 >> 2) + (h4 & ~3U); */
|
|
srl CA, O4, 2
|
|
ins O4, $zero, 0, 2
|
|
|
|
addu CA, O4
|
|
|
|
/* able to do a 16 byte block. */
|
|
bne src, srclen, .Lpoly1305_loop
|
|
|
|
/* restore the used save registers. */
|
|
lw $s0, 0($sp)
|
|
lw $s1, 4($sp)
|
|
lw $s2, 8($sp)
|
|
lw $s3, 12($sp)
|
|
lw $s4, 16($sp)
|
|
lw $s5, 20($sp)
|
|
|
|
/* store Hx and Carry */
|
|
sw CA, PTR_POLY1305_CA
|
|
sw H0, PTR_POLY1305_H(0)
|
|
sw H1, PTR_POLY1305_H(1)
|
|
sw H2, PTR_POLY1305_H(2)
|
|
sw H3, PTR_POLY1305_H(3)
|
|
sw H4, PTR_POLY1305_H(4)
|
|
|
|
.Lpoly1305_blocks_mips_end:
|
|
addiu $sp, POLY1305_STACK_SIZE
|
|
|
|
/* Jump Back */
|
|
jr $ra
|
|
.end poly1305_blocks_mips
|
|
.set at
|
|
|
|
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
|
|
#define MAC $a1
|
|
#define NONCE $a2
|
|
|
|
#define G0 $t5
|
|
#define G1 $t6
|
|
#define G2 $t7
|
|
#define G3 $t8
|
|
#define G4 $t9
|
|
|
|
.set noat
|
|
.align 4
|
|
.globl poly1305_emit_mips
|
|
.ent poly1305_emit_mips
|
|
poly1305_emit_mips:
|
|
/* load Hx and Carry */
|
|
lw CA, PTR_POLY1305_CA
|
|
lw H0, PTR_POLY1305_H(0)
|
|
lw H1, PTR_POLY1305_H(1)
|
|
lw H2, PTR_POLY1305_H(2)
|
|
lw H3, PTR_POLY1305_H(3)
|
|
lw H4, PTR_POLY1305_H(4)
|
|
|
|
/* Add left over carry */
|
|
addu H0, CA
|
|
sltu CA, H0, CA
|
|
addu H1, CA
|
|
sltu CA, H1, CA
|
|
addu H2, CA
|
|
sltu CA, H2, CA
|
|
addu H3, CA
|
|
sltu CA, H3, CA
|
|
addu H4, CA
|
|
|
|
/* compare to modulus by computing h + -p */
|
|
addiu G0, H0, 5
|
|
sltu CA, G0, H0
|
|
addu G1, H1, CA
|
|
sltu CA, G1, H1
|
|
addu G2, H2, CA
|
|
sltu CA, G2, H2
|
|
addu G3, H3, CA
|
|
sltu CA, G3, H3
|
|
addu G4, H4, CA
|
|
|
|
srl SC, G4, 2
|
|
|
|
/* if there was carry into 131st bit, h3:h0 = g3:g0 */
|
|
movn H0, G0, SC
|
|
movn H1, G1, SC
|
|
movn H2, G2, SC
|
|
movn H3, G3, SC
|
|
|
|
lwl G0, 0+MSB(NONCE)
|
|
lwl G1, 4+MSB(NONCE)
|
|
lwl G2, 8+MSB(NONCE)
|
|
lwl G3,12+MSB(NONCE)
|
|
lwr G0, 0+LSB(NONCE)
|
|
lwr G1, 4+LSB(NONCE)
|
|
lwr G2, 8+LSB(NONCE)
|
|
lwr G3,12+LSB(NONCE)
|
|
|
|
/* mac = (h + nonce) % (2^128) */
|
|
addu H0, G0
|
|
sltu CA, H0, G0
|
|
|
|
/* H1 */
|
|
addu H1, CA
|
|
sltu CA, H1, CA
|
|
addu H1, G1
|
|
sltu G1, H1, G1
|
|
addu CA, G1
|
|
|
|
/* H2 */
|
|
addu H2, CA
|
|
sltu CA, H2, CA
|
|
addu H2, G2
|
|
sltu G2, H2, G2
|
|
addu CA, G2
|
|
|
|
/* H3 */
|
|
addu H3, CA
|
|
addu H3, G3
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
wsbh H0
|
|
wsbh H1
|
|
wsbh H2
|
|
wsbh H3
|
|
rotr H0, 16
|
|
rotr H1, 16
|
|
rotr H2, 16
|
|
rotr H3, 16
|
|
#endif
|
|
|
|
/* store MAC */
|
|
swl H0, 0+MSB(MAC)
|
|
swl H1, 4+MSB(MAC)
|
|
swl H2, 8+MSB(MAC)
|
|
swl H3,12+MSB(MAC)
|
|
swr H0, 0+LSB(MAC)
|
|
swr H1, 4+LSB(MAC)
|
|
swr H2, 8+LSB(MAC)
|
|
swr H3,12+LSB(MAC)
|
|
|
|
jr $ra
|
|
.end poly1305_emit_mips
|
|
|
|
#define PR0 $t0
|
|
#define PR1 $t1
|
|
#define PR2 $t2
|
|
#define PR3 $t3
|
|
#define PT0 $t4
|
|
|
|
/* Input arguments CTX=$a0, KEY=$a1 */
|
|
|
|
.align 4
|
|
.globl poly1305_init_mips
|
|
.ent poly1305_init_mips
|
|
poly1305_init_mips:
|
|
lwl PR0, 0+MSB($a1)
|
|
lwl PR1, 4+MSB($a1)
|
|
lwl PR2, 8+MSB($a1)
|
|
lwl PR3,12+MSB($a1)
|
|
lwr PR0, 0+LSB($a1)
|
|
lwr PR1, 4+LSB($a1)
|
|
lwr PR2, 8+LSB($a1)
|
|
lwr PR3,12+LSB($a1)
|
|
|
|
/* store Hx and Carry */
|
|
sw $zero, PTR_POLY1305_CA
|
|
sw $zero, PTR_POLY1305_H(0)
|
|
sw $zero, PTR_POLY1305_H(1)
|
|
sw $zero, PTR_POLY1305_H(2)
|
|
sw $zero, PTR_POLY1305_H(3)
|
|
sw $zero, PTR_POLY1305_H(4)
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
wsbh PR0
|
|
wsbh PR1
|
|
wsbh PR2
|
|
wsbh PR3
|
|
rotr PR0, 16
|
|
rotr PR1, 16
|
|
rotr PR2, 16
|
|
rotr PR3, 16
|
|
#endif
|
|
|
|
lui PT0, 0x0FFF
|
|
ori PT0, 0xFFFC
|
|
|
|
/* AND 0x0fffffff; */
|
|
ext PR0, PR0, 0, (32-4)
|
|
|
|
/* AND 0x0ffffffc; */
|
|
and PR1, PT0
|
|
and PR2, PT0
|
|
and PR3, PT0
|
|
|
|
/* store Rx */
|
|
sw PR0, PTR_POLY1305_R(0)
|
|
sw PR1, PTR_POLY1305_R(1)
|
|
sw PR2, PTR_POLY1305_R(2)
|
|
sw PR3, PTR_POLY1305_R(3)
|
|
|
|
/* Jump Back */
|
|
jr $ra
|
|
.end poly1305_init_mips
|