mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-02-07 06:44:18 +00:00
230 lines
6.7 KiB
ArmAsm
230 lines
6.7 KiB
ArmAsm
|
/*
|
||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||
|
All rights reserved.
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions are met:
|
||
|
* Redistributions of source code must retain the above copyright
|
||
|
notice, this list of conditions and the following disclaimer.
|
||
|
* Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in the
|
||
|
documentation and/or other materials provided with the distribution.
|
||
|
* Neither the name of the copyright holder nor the
|
||
|
names of its contributors may be used to endorse or promote products
|
||
|
derived from this software without specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
|
||
|
/* Prevent the stack from becoming executable */
|
||
|
#if defined(__linux__) && defined(__ELF__)
|
||
|
.section .note.GNU-stack,"",%progbits
|
||
|
#endif
|
||
|
|
||
|
.text
|
||
|
.arch armv6
|
||
|
.object_arch armv4
|
||
|
.arm
|
||
|
.altmacro
|
||
|
.p2align 2
|
||
|
|
||
|
.macro myfunc fname
|
||
|
.func fname
|
||
|
.global fname
|
||
|
/* For ELF format also set function visibility to hidden */
|
||
|
#ifdef __ELF__
|
||
|
.hidden fname
|
||
|
.type fname, %function
|
||
|
#endif
|
||
|
fname:
|
||
|
.endm
|
||
|
|
||
|
.macro hashmix A, B, C
|
||
|
sub A, A, C
|
||
|
eor A, A, C, ror #32-4
|
||
|
add C, C, B
|
||
|
sub B, B, A
|
||
|
eor B, B, A, ror #32-6
|
||
|
add A, A, C
|
||
|
sub C, C, B
|
||
|
eor C, C, B, ror #32-8
|
||
|
add B, B, A
|
||
|
sub A, A, C
|
||
|
eor A, A, C, ror #32-16
|
||
|
add C, C, B
|
||
|
sub B, B, A
|
||
|
eor B, B, A, ror #32-19
|
||
|
add A, A, C
|
||
|
sub C, C, B
|
||
|
eor C, C, B, ror #32-4
|
||
|
add B, B, A
|
||
|
.endm
|
||
|
|
||
|
.macro hashfinal A, B, C
|
||
|
eor C, C, B
|
||
|
sub C, C, B, ror #32-14
|
||
|
eor A, A, C
|
||
|
sub A, A, C, ror #32-11
|
||
|
eor B, B, A
|
||
|
sub B, B, A, ror #32-25
|
||
|
eor C, C, B
|
||
|
sub C, C, B, ror #32-16
|
||
|
eor A, A, C
|
||
|
sub A, A, C, ror #32-4
|
||
|
eor B, B, A
|
||
|
sub B, B, A, ror #32-14
|
||
|
eor C, C, B
|
||
|
sub C, C, B, ror #32-24
|
||
|
.endm
|
||
|
|
||
|
/*
|
||
|
* uint32_t khrn_hashword(const uint32_t *k, int length, uint32_t initval);
|
||
|
* On entry:
|
||
|
* a1 = pointer to buffer
|
||
|
* a2 = number of 32-bit words
|
||
|
* a3 = seed
|
||
|
* On exit:
|
||
|
* a1 = hash value
|
||
|
*/
|
||
|
|
||
|
.set prefetch_distance, 2
|
||
|
|
||
|
myfunc khrn_hashword
|
||
|
S .req a1
|
||
|
N .req a2
|
||
|
AA .req a3
|
||
|
BB .req a4
|
||
|
CC .req v1
|
||
|
DAT0 .req v2
|
||
|
DAT1 .req ip
|
||
|
DAT2 .req lr
|
||
|
|
||
|
ldr BB, =0xDEADBEEF
|
||
|
push {CC, DAT0, lr}
|
||
|
add AA, AA, N, lsl #2
|
||
|
add AA, AA, BB
|
||
|
mov BB, AA
|
||
|
mov CC, AA
|
||
|
|
||
|
/* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
|
||
|
cmp N, #(prefetch_distance+2)*32/4
|
||
|
blo 170f
|
||
|
|
||
|
/* Long case */
|
||
|
/* Adjust N to simplify inner loop termination. We want it to
|
||
|
* stop when there are (prefetch_distance+1) complete cache
|
||
|
* lines to go. */
|
||
|
sub N, N, #(prefetch_distance+2)*32/4
|
||
|
bic DAT0, S, #31
|
||
|
.set OFFSET, 0
|
||
|
.rept prefetch_distance+1
|
||
|
pld [DAT0, #OFFSET]
|
||
|
.set OFFSET, OFFSET+32
|
||
|
.endr
|
||
|
and DAT1, S, #0x1C
|
||
|
cmp DAT1, #0x0C
|
||
|
bhs 156f
|
||
|
154: /* Now at first complete triple-word within cacheline, with at
|
||
|
* least one prefetch to go (but no prefetch required until we
|
||
|
* have processed at least 2 triple-words) */
|
||
|
ldmia S!, {DAT0, DAT1, DAT2}
|
||
|
sub N, N, #12/4
|
||
|
add AA, AA, DAT0
|
||
|
add BB, BB, DAT1
|
||
|
add CC, CC, DAT2
|
||
|
hashmix AA, BB, CC
|
||
|
156: ldmia S!, {DAT0, DAT1, DAT2}
|
||
|
sub N, N, #12/4
|
||
|
add AA, AA, DAT0
|
||
|
add BB, BB, DAT1
|
||
|
add CC, CC, DAT2
|
||
|
hashmix AA, BB, CC
|
||
|
tst S, #0x10
|
||
|
bne 156b
|
||
|
bic DAT0, S, #0x1F
|
||
|
and DAT1, S, #0x1F
|
||
|
pld [DAT0, #prefetch_distance*32]
|
||
|
adds DAT1, N, DAT1, lsr #2
|
||
|
bpl 154b
|
||
|
/* Just before the final (prefetch_distance+1) 32-byte blocks,
|
||
|
* deal with final preload */
|
||
|
cmp DAT1, #-32/4
|
||
|
beq 157f
|
||
|
pld [DAT0, #(prefetch_distance+1)*32]
|
||
|
157: add N, N, #(prefetch_distance+2)*32/4 - 1 - 3
|
||
|
158: ldmia S!, {DAT0, DAT1, DAT2}
|
||
|
add AA, AA, DAT0
|
||
|
add BB, BB, DAT1
|
||
|
add CC, CC, DAT2
|
||
|
hashmix AA, BB, CC
|
||
|
subs N, N, #3
|
||
|
bhs 158b
|
||
|
cmp N, #-2
|
||
|
ldr DAT0, [S], #4
|
||
|
ldrhs DAT1, [S], #4
|
||
|
ldrhi DAT2, [S], #4
|
||
|
add AA, AA, DAT0
|
||
|
addhs BB, BB, DAT1
|
||
|
addhi CC, CC, DAT2
|
||
|
hashfinal AA, BB, CC
|
||
|
mov a1, CC
|
||
|
pop {CC, DAT0, pc}
|
||
|
|
||
|
170: /* Short case */
|
||
|
cmp N, #1
|
||
|
blo 199f
|
||
|
bic DAT0, S, #31
|
||
|
pld [DAT0]
|
||
|
add DAT1, S, N, lsl #2
|
||
|
sub DAT1, DAT1, #1
|
||
|
bic DAT1, DAT1, #31
|
||
|
cmp DAT1, DAT0
|
||
|
beq 92f
|
||
|
91: add DAT0, DAT0, #32
|
||
|
cmp DAT0, DAT1
|
||
|
pld [DAT0]
|
||
|
bne 91b
|
||
|
92: sub N, N, #1
|
||
|
b 176f
|
||
|
175: ldmia S!, {DAT0, DAT1, DAT2}
|
||
|
add AA, AA, DAT0
|
||
|
add BB, BB, DAT1
|
||
|
add CC, CC, DAT2
|
||
|
hashmix AA, BB, CC
|
||
|
176: subs N, N, #3
|
||
|
bhs 175b
|
||
|
cmp N, #-2
|
||
|
ldr DAT0, [S], #4
|
||
|
ldrhs DAT1, [S], #4
|
||
|
ldrhi DAT2, [S], #4
|
||
|
add AA, AA, DAT0
|
||
|
addhs BB, BB, DAT1
|
||
|
addhi CC, CC, DAT2
|
||
|
hashfinal AA, BB, CC
|
||
|
199: mov a1, CC
|
||
|
pop {CC, DAT0, pc}
|
||
|
|
||
|
.unreq S
|
||
|
.unreq N
|
||
|
.unreq AA
|
||
|
.unreq BB
|
||
|
.unreq CC
|
||
|
.unreq DAT0
|
||
|
.unreq DAT1
|
||
|
.unreq DAT2
|
||
|
.endfunc
|
||
|
|
||
|
.ltorg
|
||
|
|