/* $NetBSD: octeon_cop2var.h,v 1.2 2020/06/18 13:52:08 simonb Exp $ */
/*
* TODO:
*
* - Utilize prefetch.
*
* - Implement loop in CBC operations. Take an argument of the number of
* blocks. Better if prefetch is used too.
*
* - In AES and DES buffer block loop, merge encrypt / decrypt. Take a
* direction argument (int dir, 0 => encrypt, 1 => decrypt) then branch.
*/
#ifndef _OCTEON_COP2VAR_H_
#define _OCTEON_COP2VAR_H_
#ifdef __OCTEON_USEUN__
#define CNASM_ULD(r, o, b) "uld %["#r"], "#o"(%["#b"]) \n\t"
#define CNASM_USD(r, o, b) "usd %["#r"], "#o"(%["#b"]) \n\t"
#define CNASM_ULW(r, o, b) "ulw %["#r"], "#o"(%["#b"]) \n\t"
#define CNASM_USW(r, o, b) "usw %["#r"], "#o"(%["#b"]) \n\t"
#else
#define __CNASM_ULH(i, r, o, x, b) i" %["#r"], ("#o" + "#x")(%["#b"]) \n\t"
#define __CNASM_ULS(p, r, o, l, h, b) __CNASM_ULH(#p"l", r, o, l, b) \
__CNASM_ULH(#p"r", r, o, h, b)
#define CNASM_ULD(r, o, b) __CNASM_ULS(ld, r, o, 0, 7, b)
#define CNASM_USD(r, o, b) __CNASM_ULS(sd, r, o, 0, 7, b)
#define CNASM_ULW(r, o, b) __CNASM_ULS(lw, r, o, 0, 3, b)
#define CNASM_USW(r, o, b) __CNASM_ULS(sw, r, o, 0, 3, b)
#endif
#define CNASM_ALD(r, o, b) "ld %["#r"], "#o"(%["#b"]) \n\t"
#define CNASM_ASD(r, o, b) "sd %["#r"], "#o"(%["#b"]) \n\t"
#undef __s
#define __s(s) #s /* stringify */
#define CNASM_MT2(r, n, o) "dmtc2 %["#r"], ("__s(n)" + "#o") \n\t"
#define CNASM_MF2(r, n, o) "dmfc2 %["#r"], ("__s(n)" + "#o") \n\t"
#define CNASM_MT2ZERO(n, o) "dmtc2 $0, ("__s(n)" + "#o") \n\t"
#define CNASM_MT2ZERO(n, o) "dmtc2 $0, ("__s(n)" + "#o") \n\t"
#define CNASM_START() ".set push \n\t" \
".set mips64 \n\t" \
".set arch=octeon \n\t" \
".set noreorder \n\t"
#define CNASM_END() ".set pop"
#define __aligned_t uint64_t
#define __unaligned_t uint8_t
/* -------------------------------------------------------------------------- */
/* AES */
#define __octeon_cop2_aes_set_key_au_vaddr64(au, AU) \
static inline void \
octeon_cop2_aes_set_key_##au##_vaddr64(uint64_t key, uint32_t klen) \
{ \
uint64_t tmp0, tmp1, tmp2, tmp3; \
\
asm volatile ( \
CNASM_START() \
/* %[cnt] is either 4 (256), 3 (192), or 2 (128) */ \
/* Each operation set AESKEYLEN of cop2 also */ \
/* >= 64 */ \
CNASM_##AU##LD(tmp0, 0, key) \
" subu %[cnt], %[cnt], 1 \n" \
" beqz %[cnt], 1f \n" \
CNASM_MT2(tmp0, CVM_MT_AES_KEY, 0) /* delay slot */ \
/* >= 128 */ \
CNASM_##AU##LD(tmp1, 8, key) \
" subu %[cnt], %[cnt], 1 \n" \
" beqz %[cnt], 1f \n" \
CNASM_MT2(tmp1, CVM_MT_AES_KEY, 1) /* delay slot */ \
/* >= 192 */ \
CNASM_##AU##LD(tmp2, 16, key) \
" subu %[cnt], %[cnt], 1 \n" \
" beqz %[cnt], 1f \n" \
CNASM_MT2(tmp2, CVM_MT_AES_KEY, 2) /* delay slot */ \
/* >= 256 */ \
CNASM_##AU##LD(tmp3, 24, key) \
CNASM_MT2(tmp3, CVM_MT_AES_KEY, 3) \
/* done */ \
"1: \n" \
CNASM_END() \
: [tmp0] "=&r" (tmp0), \
[tmp1] "=&r" (tmp1), \
[tmp2] "=&r" (tmp2), \
[tmp3] "=&r" (tmp3) \
: [key] "d" (key), \
[cnt] "d" (klen >> 6)); \
}
#define __octeon_cop2_aes_set_key_au_ptr(au, AU, ptr) \
static inline void \
octeon_cop2_aes_set_key_##au(ptr key, uint32_t klen) \
{ \
octeon_cop2_aes_set_key_##au##_vaddr64((intptr_t)key, klen); \
}
#define __octeon_cop2_aes_set_key_au(au, AU) \
__octeon_cop2_aes_set_key_au_vaddr64(au, AU) \
__octeon_cop2_aes_set_key_au_ptr(au, AU, __##au##_t *)
#define __octeon_cop2_aes_set_key \
__octeon_cop2_aes_set_key_au(aligned, A) \
__octeon_cop2_aes_set_key_au(unaligned, U)
__octeon_cop2_aes_set_key
static inline void
octeon_cop2_aes_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1;
asm volatile (
CNASM_START()
/* Store the IV to cop2 */
CNASM_ULD(tmp0, 0, iv)
CNASM_ULD(tmp1, 8, iv)
CNASM_MT2(tmp0, CVM_MT_AES_IV, 0)
CNASM_MT2(tmp1, CVM_MT_AES_IV, 1)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_aes_set_iv_unaligned(uint8_t *iv)
{
octeon_cop2_aes_set_iv_unaligned_vaddr64((intptr_t)iv);
}
#define __octeon_cop2_aes_ed_16_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_aes_##ed##_16_##au##_vaddr64(uint64_t d, uint64_t s) \
{ \
uint64_t tmp0, tmp1; \
\
asm volatile ( \
CNASM_START() \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_##AU##LD(tmp1, 8, s) \
CNASM_MT2(tmp0, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp1, CVM_MT_AES_##ED##1, 0) \
CNASM_MF2(tmp0, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp1, CVM_MF_AES_RESINP, 1) \
CNASM_##AU##SD(tmp0, 0, d) \
CNASM_##AU##SD(tmp1, 8, d) \
CNASM_END() \
: [tmp0] "=&r" (tmp0), \
[tmp1] "=&r" (tmp1) \
: [d] "d" (d), \
[s] "d" (s)); \
}
#define __octeon_cop2_aes_ed_16_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_aes_##ed##_16_##au(ptr d, ptr s) \
{ \
octeon_cop2_aes_##ed##_16_##au##_vaddr64((intptr_t)d, (intptr_t)s); \
}
#define __octeon_cop2_aes_ed_16_au(ed, ED, au, AU) \
__octeon_cop2_aes_ed_16_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_aes_ed_16_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_aes_ed_16(ed, ED) \
__octeon_cop2_aes_ed_16_au(ed, ED, aligned, A) \
__octeon_cop2_aes_ed_16_au(ed, ED, unaligned, U)
#define __octeon_cop2_aes_16 \
__octeon_cop2_aes_ed_16(encrypt, ENC) \
__octeon_cop2_aes_ed_16(decrypt, DEC) \
__octeon_cop2_aes_ed_16(cbc_encrypt, ENC_CBC) \
__octeon_cop2_aes_ed_16(cbc_decrypt, DEC_CBC)
__octeon_cop2_aes_16
#define __octeon_cop2_aes_ed_block_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_aes_##ed##_block_##au##_vaddr64(uint64_t d, uint64_t s, int n) \
{ \
uint64_t tmp0, tmp1; \
uint64_t x = d + 16 * n; \
\
asm volatile ( \
CNASM_START() \
"1: \n" \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_##AU##LD(tmp1, 8, s) \
CNASM_MT2(tmp0, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp1, CVM_MT_AES_##ED##1, 0) \
CNASM_MF2(tmp0, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp1, CVM_MF_AES_RESINP, 1) \
CNASM_##AU##SD(tmp0, 0, d) \
CNASM_##AU##SD(tmp1, 8, d) \
" daddu %[d], %[d], 16 \n" \
" bne %[d], %[x], 1b \n" \
" daddu %[s], %[s], 16 \n" /* delay slot */ \
CNASM_END() \
: [d] "=d" (d), \
[s] "=d" (s), \
[tmp0] "=&r" (tmp0), \
[tmp1] "=&r" (tmp1) \
: "0" (d), \
"1" (s), \
[x] "d" (x)); \
}
#define __octeon_cop2_aes_ed_block_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_aes_##ed##_block_##au(ptr d, ptr s, int n) \
{ \
octeon_cop2_aes_##ed##_block_##au##_vaddr64((intptr_t)d, (intptr_t)s, n); \
}
#define __octeon_cop2_aes_ed_block_au(ed, ED, au, AU) \
__octeon_cop2_aes_ed_block_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_aes_ed_block_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_aes_ed_block(ed, ED) \
__octeon_cop2_aes_ed_block_au(ed, ED, aligned, A) \
__octeon_cop2_aes_ed_block_au(ed, ED, unaligned, U)
#define __octeon_cop2_aes_block \
/* __octeon_cop2_aes_ed_block(encrypt, ENC) */ \
/* __octeon_cop2_aes_ed_block(decrypt, DEC) */ \
__octeon_cop2_aes_ed_block(cbc_encrypt, ENC_CBC) \
__octeon_cop2_aes_ed_block(cbc_decrypt, DEC_CBC)
__octeon_cop2_aes_block
#define __octeon_cop2_aes_ed_64_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_aes_##ed##_64_##au##_vaddr64(uint64_t d, uint64_t s) \
{ \
uint64_t tmp0, tmp1, tmp2, tmp3; \
\
asm volatile ( \
CNASM_START() \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_##AU##LD(tmp1, 8, s) \
CNASM_MT2(tmp0, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp1, CVM_MT_AES_##ED##1, 0) \
CNASM_##AU##LD(tmp2, 16, s) \
CNASM_##AU##LD(tmp3, 24, s) \
CNASM_MF2(tmp0, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp1, CVM_MF_AES_RESINP, 1) \
CNASM_MT2(tmp2, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp3, CVM_MT_AES_##ED##1, 0) \
CNASM_##AU##SD(tmp0, 0, d) \
CNASM_##AU##SD(tmp1, 8, d) \
CNASM_MF2(tmp2, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp3, CVM_MF_AES_RESINP, 1) \
CNASM_##AU##SD(tmp2, 16, d) \
CNASM_##AU##SD(tmp3, 24, d) \
CNASM_##AU##LD(tmp0, 32, s) \
CNASM_##AU##LD(tmp1, 40, s) \
CNASM_MT2(tmp0, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp1, CVM_MT_AES_##ED##1, 0) \
CNASM_##AU##LD(tmp2, 48, s) \
CNASM_##AU##LD(tmp3, 56, s) \
CNASM_MF2(tmp0, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp1, CVM_MF_AES_RESINP, 1) \
CNASM_MT2(tmp2, CVM_MT_AES_##ED##0, 0) \
CNASM_MT2(tmp3, CVM_MT_AES_##ED##1, 0) \
CNASM_##AU##SD(tmp0, 32, d) \
CNASM_##AU##SD(tmp1, 40, d) \
CNASM_MF2(tmp2, CVM_MF_AES_RESINP, 0) \
CNASM_MF2(tmp3, CVM_MF_AES_RESINP, 1) \
CNASM_##AU##SD(tmp2, 48, d) \
CNASM_##AU##SD(tmp3, 56, d) \
CNASM_END() \
: [tmp0] "=&r" (tmp0), \
[tmp1] "=&r" (tmp1), \
[tmp2] "=&r" (tmp2), \
[tmp3] "=&r" (tmp3) \
: [d] "d" (d), \
[s] "d" (s)); \
}
#define __octeon_cop2_aes_ed_64_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_aes_##ed##_64_##au(ptr d, ptr s) \
{ \
octeon_cop2_aes_##ed##_64_##au##_vaddr64((intptr_t)d, (intptr_t)s); \
}
#define __octeon_cop2_aes_ed_64_au(ed, ED, au, AU) \
__octeon_cop2_aes_ed_64_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_aes_ed_64_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_aes_ed_64(ed, ED) \
__octeon_cop2_aes_ed_64_au(ed, ED, aligned, A) \
__octeon_cop2_aes_ed_64_au(ed, ED, unaligned, U)
#define __octeon_cop2_aes_64 \
/* __octeon_cop2_aes_ed_64(encrypt, ENC) */ \
/* __octeon_cop2_aes_ed_64(decrypt, DEC) */ \
__octeon_cop2_aes_ed_64(cbc_encrypt, ENC_CBC) \
__octeon_cop2_aes_ed_64(cbc_decrypt, DEC_CBC)
__octeon_cop2_aes_64
/* -------------------------------------------------------------------------- */
/* DES */
static inline void
octeon_cop2_des_set_key_unaligned_vaddr64(uint64_t k1, uint64_t k2, uint64_t k3)
{
uint64_t tmp0, tmp1, tmp2;
asm volatile (
CNASM_START()
/* Set key */
CNASM_ULD(tmp0, 0, k1)
CNASM_ULD(tmp1, 0, k2)
CNASM_ULD(tmp2, 0, k3)
CNASM_MT2(tmp0, CVM_MT_3DES_KEY, 0)
CNASM_MT2(tmp1, CVM_MT_3DES_KEY, 1)
CNASM_MT2(tmp2, CVM_MT_3DES_KEY, 2)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2)
: [k1] "d" (k1),
[k2] "d" (k2),
[k3] "d" (k3));
}
static inline void
octeon_cop2_des_set_key_unaligned(uint64_t *k1, uint64_t *k2, uint64_t *k3)
{
octeon_cop2_des_set_key_unaligned_vaddr64((intptr_t)k1, (intptr_t)k2, (intptr_t)k3);
}
static inline void
octeon_cop2_des_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0;
asm volatile (
CNASM_START()
/* Load IV to a register */
CNASM_ULD(tmp0, 0, iv)
/* Store the IV to cop2 */
CNASM_MT2(tmp0, CVM_MT_3DES_IV, 0)
CNASM_END()
: [tmp0] "=&r" (tmp0)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_des_set_iv_unaligned(uint8_t *iv)
{
octeon_cop2_des_set_iv_unaligned_vaddr64((intptr_t)iv);
}
#define __octeon_cop2_des_ed_8_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_des_##ed##_8_##au##_vaddr64(uint64_t d, uint64_t s) \
{ \
uint64_t tmp0; \
\
asm volatile ( \
CNASM_START() \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_MT2(tmp0, CVM_MT_3DES_##ED, 0) \
CNASM_MF2(tmp0, CVM_MF_3DES_RESULT, 0) \
CNASM_##AU##SD(tmp0, 0, s) \
CNASM_END() \
: [tmp0] "=&r" (tmp0) \
: [d] "d" (d), \
[s] "d" (s)); \
}
#define __octeon_cop2_des_ed_8_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_des_##ed##_8_##au(ptr d, ptr s) \
{ \
octeon_cop2_des_##ed##_8_##au##_vaddr64((intptr_t)d, (intptr_t)s); \
}
#define __octeon_cop2_des_ed_8_au(ed, ED, au, AU) \
__octeon_cop2_des_ed_8_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_des_ed_8_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_des_ed_8(ed, ED) \
__octeon_cop2_des_ed_8_au(ed, ED, aligned, A) \
__octeon_cop2_des_ed_8_au(ed, ED, unaligned, U)
#define __octeon_cop2_des_8 \
__octeon_cop2_des_ed_8(encrypt, ENC) \
__octeon_cop2_des_ed_8(decrypt, DEC) \
__octeon_cop2_des_ed_8(cbc_encrypt, ENC_CBC) \
__octeon_cop2_des_ed_8(cbc_decrypt, DEC_CBC)
__octeon_cop2_des_8
#define __octeon_cop2_des_ed_block_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_des_##ed##_block_##au##_vaddr64(uint64_t d, uint64_t s, int n) \
{ \
uint64_t tmp0; \
uint64_t x = d + 8 * n; \
\
asm volatile ( \
CNASM_START() \
"1: \n" \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_MT2(tmp0, CVM_MT_3DES_##ED, 0) \
CNASM_MF2(tmp0, CVM_MF_3DES_RESULT, 0) \
CNASM_##AU##SD(tmp0, 0, d) \
" daddu %[d], %[d], 8 \n" \
" bne %[d], %[x], 1b \n" \
" daddu %[s], %[s], 8 \n" \
CNASM_END() \
: [d] "=d" (d), \
[s] "=d" (s), \
[tmp0] "=&r" (tmp0) \
: "0" (d), \
"1" (s), \
[x] "d" (x)); \
}
#define __octeon_cop2_des_ed_block_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_des_##ed##_block_##au(ptr d, ptr s, int n) \
{ \
octeon_cop2_des_##ed##_block_##au##_vaddr64((intptr_t)d, (intptr_t)s, n); \
}
#define __octeon_cop2_des_ed_block_au(ed, ED, au, AU) \
__octeon_cop2_des_ed_block_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_des_ed_block_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_des_ed_block(ed, ED) \
__octeon_cop2_des_ed_block_au(ed, ED, aligned, A) \
__octeon_cop2_des_ed_block_au(ed, ED, unaligned, U)
#define __octeon_cop2_des_block \
/* __octeon_cop2_des_ed_block(encrypt, ENC) */ \
/* __octeon_cop2_des_ed_block(decrypt, DEC) */ \
__octeon_cop2_des_ed_block(cbc_encrypt, ENC_CBC) \
__octeon_cop2_des_ed_block(cbc_decrypt, DEC_CBC)
__octeon_cop2_des_block
#define __octeon_cop2_des_ed_64_au_vaddr64(ed, ED, au, AU) \
static inline void \
octeon_cop2_des_##ed##_64_##au##_vaddr64(uint64_t d, uint64_t s) \
{ \
uint64_t tmp0, tmp1, tmp2, tmp3; \
\
asm volatile ( \
CNASM_START() \
CNASM_##AU##LD(tmp0, 0, s) \
CNASM_##AU##LD(tmp1, 8, s) \
CNASM_MT2(tmp0, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##LD(tmp2, 16, s) \
CNASM_MF2(tmp0, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp1, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##LD(tmp3, 24, s) \
CNASM_MF2(tmp1, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp2, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##SD(tmp0, 0, d) \
CNASM_MF2(tmp2, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp3, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##SD(tmp1, 8, d) \
CNASM_MF2(tmp3, CVM_MF_3DES_RESULT, 0) \
CNASM_##AU##SD(tmp2, 16, d) \
CNASM_##AU##SD(tmp3, 24, d) \
CNASM_##AU##LD(tmp0, 32, s) \
CNASM_##AU##LD(tmp1, 40, s) \
CNASM_MT2(tmp0, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##LD(tmp2, 48, s) \
CNASM_MF2(tmp0, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp1, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##LD(tmp3, 56, s) \
CNASM_MF2(tmp1, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp2, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##SD(tmp0, 32, d) \
CNASM_MF2(tmp2, CVM_MF_3DES_RESULT, 0) \
CNASM_MT2(tmp3, CVM_MT_3DES_##ED, 0) \
CNASM_##AU##SD(tmp1, 40, d) \
CNASM_MF2(tmp3, CVM_MF_3DES_RESULT, 0) \
CNASM_##AU##SD(tmp2, 48, d) \
CNASM_##AU##SD(tmp3, 56, d) \
CNASM_END() \
: [tmp0] "=&r" (tmp0), \
[tmp1] "=&r" (tmp1), \
[tmp2] "=&r" (tmp2), \
[tmp3] "=&r" (tmp3) \
: [d] "d" (d), \
[s] "d" (s)); \
}
#define __octeon_cop2_des_ed_64_au_ptr(ed, ED, au, AU, ptr) \
static inline void \
octeon_cop2_des_##ed##_64_##au(ptr d, ptr s) \
{ \
octeon_cop2_des_##ed##_64_##au##_vaddr64((intptr_t)d, (intptr_t)s); \
}
#define __octeon_cop2_des_ed_64_au(ed, ED, au, AU) \
__octeon_cop2_des_ed_64_au_vaddr64(ed, ED, au, AU) \
__octeon_cop2_des_ed_64_au_ptr(ed, ED, au, AU, __##au##_t *)
#define __octeon_cop2_des_ed_64(ed, ED) \
__octeon_cop2_des_ed_64_au(ed, ED, aligned, A) \
__octeon_cop2_des_ed_64_au(ed, ED, unaligned, U)
#define __octeon_cop2_des_64 \
/* __octeon_cop2_des_ed_64(encrypt, ENC) */ \
/* __octeon_cop2_des_ed_64(decrypt, DEC) */ \
__octeon_cop2_des_ed_64(cbc_encrypt, ENC_CBC) \
__octeon_cop2_des_ed_64(cbc_decrypt, DEC_CBC)
__octeon_cop2_des_64
/* -------------------------------------------------------------------------- */
/* MD5 */
static inline void
octeon_cop2_md5_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1;
asm volatile (
CNASM_START()
/* Load IV from context */
CNASM_ULD(tmp0, 0, iv)
CNASM_ULD(tmp1, 8, iv)
CNASM_MT2(tmp0, CVM_MT_HSH_IV, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_IV, 1)
CNASM_MT2ZERO( CVM_MT_HSH_IV, 2)
CNASM_MT2ZERO( CVM_MT_HSH_IV, 3)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_md5_set_iv_unaligned(uint64_t *iv)
{
octeon_cop2_md5_set_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_md5_get_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1;
asm volatile (
CNASM_START()
/* Store IV to context */
CNASM_MF2(tmp0, CVM_MF_HSH_IV, 0)
CNASM_MF2(tmp1, CVM_MF_HSH_IV, 1)
CNASM_USD(tmp0, 0, iv)
CNASM_USD(tmp1, 8, iv)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_md5_get_iv_unaligned(uint64_t *iv)
{
octeon_cop2_md5_get_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_md5_update_unaligned_vaddr64(uint64_t src)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Update HASH */
CNASM_ULD(tmp0, 0, src)
CNASM_ULD(tmp1, 8, src)
CNASM_ULD(tmp2, 16, src)
CNASM_ULD(tmp3, 24, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_DAT, 3)
CNASM_ULD(tmp0, 32, src)
CNASM_ULD(tmp1, 40, src)
CNASM_ULD(tmp2, 48, src)
CNASM_ULD(tmp3, 56, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 4)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 5)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 6)
CNASM_MT2(tmp3, CVM_MT_HSH_STANDARD5, 0)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [src] "d" (src));
}
static inline void
octeon_cop2_md5_update_unaligned(uint64_t *src)
{
octeon_cop2_md5_update_unaligned_vaddr64((intptr_t)src);
}
/* -------------------------------------------------------------------------- */
/* SHA1 */
static inline void
octeon_cop2_sha1_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2;
asm volatile (
CNASM_START()
/* Load IV from context */
CNASM_ULD(tmp0, 0, iv)
CNASM_ULD(tmp1, 8, iv)
CNASM_ULW(tmp2, 16, iv)
"dsll %[tmp2], %[tmp2], 32 \n\t"
CNASM_MT2(tmp0, CVM_MT_HSH_IV, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_IV, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_IV, 2)
CNASM_MT2ZERO( CVM_MT_HSH_IV, 3)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha1_set_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha1_set_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha1_get_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2;
asm volatile (
CNASM_START()
/* Store IV to context */
CNASM_MF2(tmp0, CVM_MF_HSH_IV, 0)
CNASM_MF2(tmp1, CVM_MF_HSH_IV, 1)
CNASM_MF2(tmp2, CVM_MF_HSH_IV, 2)
CNASM_USD(tmp0, 0, iv)
CNASM_USD(tmp1, 8, iv)
"dsrl %[tmp2], %[tmp2], 32 \n\t"
CNASM_USW(tmp2, 16, iv)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha1_get_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha1_get_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha1_update_unaligned_vaddr64(uint64_t src)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Update HASH */
CNASM_ULD(tmp0, 0, src)
CNASM_ULD(tmp1, 8, src)
CNASM_ULD(tmp2, 16, src)
CNASM_ULD(tmp3, 24, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_DAT, 3)
CNASM_ULD(tmp0, 32, src)
CNASM_ULD(tmp1, 40, src)
CNASM_ULD(tmp2, 48, src)
CNASM_ULD(tmp3, 56, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 4)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 5)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 6)
CNASM_MT2(tmp3, CVM_MT_HSH_STARTSHA, 0)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [src] "d" (src));
}
static inline void
octeon_cop2_sha1_update_unaligned(uint8_t *src)
{
octeon_cop2_sha1_update_unaligned_vaddr64((intptr_t)src);
}
/* -------------------------------------------------------------------------- */
/* SHA256 */
static inline void
octeon_cop2_sha256_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Load IV from context */
CNASM_ULD(tmp0, 0, iv)
CNASM_ULD(tmp1, 8, iv)
CNASM_ULD(tmp2, 16, iv)
CNASM_ULD(tmp3, 24, iv)
CNASM_MT2(tmp0, CVM_MT_HSH_IV, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_IV, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_IV, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_IV, 3)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha256_set_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha256_set_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha256_get_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Store IV to context */
CNASM_MF2(tmp0, CVM_MF_HSH_IV, 0)
CNASM_MF2(tmp1, CVM_MF_HSH_IV, 1)
CNASM_MF2(tmp2, CVM_MF_HSH_IV, 2)
CNASM_MF2(tmp3, CVM_MF_HSH_IV, 3)
CNASM_USD(tmp0, 0, iv)
CNASM_USD(tmp1, 8, iv)
CNASM_USD(tmp2, 16, iv)
CNASM_USD(tmp3, 24, iv)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha256_get_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha256_get_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha256_update_unaligned_vaddr64(uint64_t src)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Update HASH */
CNASM_ULD(tmp0, 0, src)
CNASM_ULD(tmp1, 8, src)
CNASM_ULD(tmp2, 16, src)
CNASM_ULD(tmp3, 24, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_DAT, 3)
CNASM_ULD(tmp0, 32, src)
CNASM_ULD(tmp1, 40, src)
CNASM_ULD(tmp2, 48, src)
CNASM_ULD(tmp3, 56, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DAT, 4)
CNASM_MT2(tmp1, CVM_MT_HSH_DAT, 5)
CNASM_MT2(tmp2, CVM_MT_HSH_DAT, 6)
CNASM_MT2(tmp3, CVM_MT_HSH_STARTSHA256, 0)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [src] "d" (src));
}
static inline void
octeon_cop2_sha256_update_unaligned(uint8_t *src)
{
octeon_cop2_sha256_update_unaligned_vaddr64((intptr_t)src);
}
/* -------------------------------------------------------------------------- */
/* SHA512 */
static inline void
octeon_cop2_sha512_set_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Load IV from context */
CNASM_ULD(tmp0, 0, iv)
CNASM_ULD(tmp1, 8, iv)
CNASM_ULD(tmp2, 16, iv)
CNASM_ULD(tmp3, 24, iv)
CNASM_MT2(tmp0, CVM_MT_HSH_IVW, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_IVW, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_IVW, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_IVW, 3)
CNASM_ULD(tmp0, 32, iv)
CNASM_ULD(tmp1, 40, iv)
CNASM_ULD(tmp2, 48, iv)
CNASM_ULD(tmp3, 56, iv)
CNASM_MT2(tmp0, CVM_MT_HSH_IVW, 4)
CNASM_MT2(tmp1, CVM_MT_HSH_IVW, 5)
CNASM_MT2(tmp2, CVM_MT_HSH_IVW, 6)
CNASM_MT2(tmp3, CVM_MT_HSH_IVW, 7)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha512_set_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha512_set_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha512_get_iv_unaligned_vaddr64(uint64_t iv)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Store IV to context */
CNASM_MF2(tmp0, CVM_MF_HSH_IVW, 0)
CNASM_MF2(tmp1, CVM_MF_HSH_IVW, 1)
CNASM_MF2(tmp2, CVM_MF_HSH_IVW, 2)
CNASM_MF2(tmp3, CVM_MF_HSH_IVW, 3)
CNASM_USD(tmp0, 0, iv)
CNASM_USD(tmp1, 8, iv)
CNASM_USD(tmp2, 16, iv)
CNASM_USD(tmp3, 24, iv)
CNASM_MF2(tmp0, CVM_MF_HSH_IVW, 4)
CNASM_MF2(tmp1, CVM_MF_HSH_IVW, 5)
CNASM_MF2(tmp2, CVM_MF_HSH_IVW, 6)
CNASM_MF2(tmp3, CVM_MF_HSH_IVW, 7)
CNASM_USD(tmp0, 32, iv)
CNASM_USD(tmp1, 40, iv)
CNASM_USD(tmp2, 48, iv)
CNASM_USD(tmp3, 56, iv)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [iv] "d" (iv));
}
static inline void
octeon_cop2_sha512_get_iv_unaligned(uint8_t *iv)
{
octeon_cop2_sha512_get_iv_unaligned_vaddr64((intptr_t)iv);
}
static inline void
octeon_cop2_sha512_update_unaligned_vaddr64(uint64_t src)
{
uint64_t tmp0, tmp1, tmp2, tmp3;
asm volatile (
CNASM_START()
/* Update HASH */
CNASM_ULD(tmp0, 0, src)
CNASM_ULD(tmp1, 8, src)
CNASM_ULD(tmp2, 16, src)
CNASM_ULD(tmp3, 24, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DATW, 0)
CNASM_MT2(tmp1, CVM_MT_HSH_DATW, 1)
CNASM_MT2(tmp2, CVM_MT_HSH_DATW, 2)
CNASM_MT2(tmp3, CVM_MT_HSH_DATW, 3)
CNASM_ULD(tmp0, 32, src)
CNASM_ULD(tmp1, 40, src)
CNASM_ULD(tmp2, 48, src)
CNASM_ULD(tmp3, 56, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DATW, 4)
CNASM_MT2(tmp1, CVM_MT_HSH_DATW, 5)
CNASM_MT2(tmp2, CVM_MT_HSH_DATW, 6)
CNASM_MT2(tmp3, CVM_MT_HSH_DATW, 7)
CNASM_ULD(tmp0, 64, src)
CNASM_ULD(tmp1, 72, src)
CNASM_ULD(tmp2, 80, src)
CNASM_ULD(tmp3, 88, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DATW, 8)
CNASM_MT2(tmp1, CVM_MT_HSH_DATW, 9)
CNASM_MT2(tmp2, CVM_MT_HSH_DATW, 10)
CNASM_MT2(tmp3, CVM_MT_HSH_DATW, 11)
CNASM_ULD(tmp0, 96, src)
CNASM_ULD(tmp1, 104, src)
CNASM_ULD(tmp2, 112, src)
CNASM_ULD(tmp3, 120, src)
CNASM_MT2(tmp0, CVM_MT_HSH_DATW, 12)
CNASM_MT2(tmp1, CVM_MT_HSH_DATW, 13)
CNASM_MT2(tmp2, CVM_MT_HSH_DATW, 14)
CNASM_MT2(tmp3, CVM_MT_HSH_STARTSHA512, 0)
CNASM_END()
: [tmp0] "=&r" (tmp0),
[tmp1] "=&r" (tmp1),
[tmp2] "=&r" (tmp2),
[tmp3] "=&r" (tmp3)
: [src] "d" (src));
}
static inline void
octeon_cop2_sha512_update_unaligned(uint8_t *src)
{
octeon_cop2_sha512_update_unaligned_vaddr64((intptr_t)src);
}
/* -------------------------------------------------------------------------- */
/* CRC */
/* XXX */
#ifdef notyet
static inline void
octeon_cop2_crc_polynomial(val)
{
__asm __volatile (
CNASM_START()
" dmtc2 %[val], 0x4200"
CNASM_END()
:
: [val] "d" (val))
}
#define CVMX_MT_CRC_IV(val) \
__asm __volatile (__PUSH "dmtc2 %0,0x0201" __POP :: "d"(val))
#define CVMX_MT_CRC_BYTE_REFLECT(val) \
__asm __volatile (__PUSH "dmtc2 %0,0x0214" __POP :: "d"(val))
#define CVMX_MT_CRC_HALF_REFLECT(val) \
__asm __volatile (__PUSH "dmtc2 %0,0x0215" __POP :: "d"(val))
#define CVMX_MT_CRC_DWORD_REFLECT(val) \
__asm __volatile (__PUSH "dmtc2 %0,0x1217" __POP :: "d"(val))
#define CVMX_MF_CRC_IV_REFLECT(val) \
__asm __volatile (__PUSH "dmfc2 %0,0x0203" __POP : "=d"(val))
static inline void
octeon_cop2_crc_reflect(XXX)
{
__asm __volatile (
CNASM_START()
" and %[val], %[len], 15 \n"
" beq %[val], %[len], 2f \n"
" subu %[tmp], %[len], %[val] \n"
" move %[len], %[val] \n"
" addu %[tmp], %[buf] \n"
" .align 3 \n"
"1: \n"
CNASM_ULD(val, 0, buf)
" addu %[buf], 16 \n"
CNASM_MT2(val, CVM_MT_CRC_DWORD_REFLECT, 0)
CNASM_ULD(val, -8, buf)
" bne %[buf], %[tmp], 1b \n"
CNASM_MT2(val, CVM_MT_CRC_DWORD_REFLECT, 0)
" .align 3 \n"
"2: and %[val], %[len], 1 \n"
" beq %[val], %[len], 4f \n"
" subu %[tmp], %[len], %[val] \n"
" move %[len], %[val] \n"
" addu %[tmp], %[buf] \n"
" .align 3 \n"
"3: addu %[buf], 2 \n"
" lhu %[val], -2(%[buf]) \n"
" bne %[buf], %[tmp], 3b \n"
CNASM_MT2(val, CVM_MT_CRC_HALF_REFLECT, 0)
" .align 3 \n"
"4: beqz %[len], 5f \n"
" nop \n"
" lbu %[val], 0(%[buf]) \n"
CNASM_MT2(val, CVM_MT_CRC_BYTE_REFLECT, 0)
" .align 3 \n"
"5: \n"
CNASM_END()
: [len] "=d" (len),
[buf] "=d" (buf),
[val] "=d" (val),
[tmp] "=d" (tmp)
: "0" (len),
"1" (buf)
);
#endif
/* -------------------------------------------------------------------------- */
/* GFM */
/* XXX */
#endif /* _OCTEON_COP2VAR_H_ */