luckfox-pico-sdk/sysdrv/source/kernel/arch/x86/crypto/ghash-clmulni-intel_asm.S
luckfox-eng29 8f34c2760d project:build.sh: Added fastboot support; custom modifications to U-Boot and kernel implemented using patches.
project:cfg:BoardConfig_IPC: Added fastboot BoardConfig file and firmware post-scripts, distinguishing between
the BoardConfigs for Luckfox Pico Pro and Luckfox Pico Max. project:app: Added fastboot_client and rk_smart_door
for quick boot applications; updated rkipc app to adapt to the latest media library. media:samples: Added more
usage examples. media:rockit: Fixed bugs; removed support for retrieving data frames from VPSS. media:isp:
Updated rkaiq library and related tools to support connection to RKISP_Tuner. sysdrv:Makefile: Added support for
compiling drv_ko on Luckfox Pico Ultra W using Ubuntu; added support for custom root filesystem.
sysdrv:tools:board: Updated Buildroot optional mirror sources, updated some software versions, and stored device
tree files and configuration files that undergo multiple modifications for U-Boot and kernel separately.
sysdrv:source:mcu: Used RISC-V MCU SDK with RT-Thread system, mainly for initializing camera AE during quick
boot. sysdrv:source:uboot: Added support for fastboot; added high baud rate DDR bin for serial firmware upgrades.
sysdrv:source:kernel: Upgraded to version 5.10.160; increased NPU frequency for RV1106G3; added support for
fastboot.

Signed-off-by: luckfox-eng29 <eng29@luckfox.com>
2024-10-14 09:47:04 +08:00

133 lines
2.7 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains accelerated part of ghash
* implementation. More information about PCLMULQDQ can be found at:
*
* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Vinodh Gopal
* Erdinc Ozturk
* Deniz Karakoyunlu
*/
#include <linux/linkage.h>
#include <asm/frame.h>
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
#define DATA %xmm0
#define SHASH %xmm1
#define T1 %xmm2
#define T2 %xmm3
#define T3 %xmm4
#define BSWAP %xmm5
#define IN1 %xmm6
.text
/*
* __clmul_gf128mul_ble: internal ABI
* input:
* DATA: operand1
* SHASH: operand2, hash_key << 1 mod poly
* output:
* DATA: operand1 * operand2 mod poly
* changed:
* T1
* T2
* T3
*/
SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
movaps DATA, T1
pshufd $0b01001110, DATA, T2
pshufd $0b01001110, SHASH, T3
pxor DATA, T2
pxor SHASH, T3
pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
movaps T2, T3
pslldq $8, T3
psrldq $8, T2
pxor T3, DATA
pxor T2, T1 # <T1:DATA> is result of
# carry-less multiplication
# first phase of the reduction
movaps DATA, T3
psllq $1, T3
pxor DATA, T3
psllq $5, T3
pxor DATA, T3
psllq $57, T3
movaps T3, T2
pslldq $8, T2
psrldq $8, T3
pxor T2, DATA
pxor T3, T1
# second phase of the reduction
movaps DATA, T2
psrlq $5, T2
pxor DATA, T2
psrlq $1, T2
pxor DATA, T2
psrlq $1, T2
pxor T2, T1
pxor T1, DATA
RET
SYM_FUNC_END(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
SYM_FUNC_START(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA
movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP
pshufb BSWAP, DATA
call __clmul_gf128mul_ble
pshufb BSWAP, DATA
movups DATA, (%rdi)
FRAME_END
RET
SYM_FUNC_END(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
* const u128 *shash);
*/
SYM_FUNC_START(clmul_ghash_update)
FRAME_BEGIN
cmp $16, %rdx
jb .Lupdate_just_ret # check length
movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
pshufb BSWAP, DATA
.align 4
.Lupdate_loop:
movups (%rsi), IN1
pshufb BSWAP, IN1
pxor IN1, DATA
call __clmul_gf128mul_ble
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lupdate_loop
pshufb BSWAP, DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
FRAME_END
RET
SYM_FUNC_END(clmul_ghash_update)