project:cfg:BoardConfig_IPC: Added fastboot BoardConfig file and firmware post-scripts, distinguishing between the BoardConfigs for Luckfox Pico Pro and Luckfox Pico Max. project:app: Added fastboot_client and rk_smart_door for quick boot applications; updated rkipc app to adapt to the latest media library. media:samples: Added more usage examples. media:rockit: Fixed bugs; removed support for retrieving data frames from VPSS. media:isp: Updated rkaiq library and related tools to support connection to RKISP_Tuner. sysdrv:Makefile: Added support for compiling drv_ko on Luckfox Pico Ultra W using Ubuntu; added support for custom root filesystem. sysdrv:tools:board: Updated Buildroot optional mirror sources, updated some software versions, and stored device tree files and configuration files that undergo multiple modifications for U-Boot and kernel separately. sysdrv:source:mcu: Used RISC-V MCU SDK with RT-Thread system, mainly for initializing camera AE during quick boot. sysdrv:source:uboot: Added support for fastboot; added high baud rate DDR bin for serial firmware upgrades. sysdrv:source:kernel: Upgraded to version 5.10.160; increased NPU frequency for RV1106G3; added support for fastboot. Signed-off-by: luckfox-eng29 <eng29@luckfox.com>
428 lines
15 KiB
C
428 lines
15 KiB
C
/*
|
|
* Copyright © 2011 Siarhei Siamashka <siarhei.siamashka@gmail.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include "asm-opt.h"
|
|
|
|
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
|
|
|
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
|
|
|
#if defined(__i386__) || defined(__amd64__)
|
|
#define FEATURES_ID "flags"
|
|
#elif defined(__arm__)
|
|
#define FEATURES_ID "Features"
|
|
#elif defined(__mips__)
|
|
#define FEATURES_ID "cpu model"
|
|
#else
|
|
#define FEATURES_ID "?"
|
|
#endif
|
|
|
|
static int check_feature (char *buffer, const char *feature)
|
|
{
|
|
char *p;
|
|
if (*feature == 0)
|
|
return 0;
|
|
if (strncmp(buffer, FEATURES_ID, strlen(FEATURES_ID)) != 0)
|
|
return 0;
|
|
buffer += strlen(FEATURES_ID);
|
|
while (isspace(*buffer))
|
|
buffer++;
|
|
|
|
/* Check if 'feature' is present in the buffer as a separate word */
|
|
while ((p = strstr(buffer, feature))) {
|
|
if (p > buffer && !isspace(*(p - 1))) {
|
|
buffer++;
|
|
continue;
|
|
}
|
|
p += strlen(feature);
|
|
if (*p != 0 && !isspace(*p)) {
|
|
buffer++;
|
|
continue;
|
|
}
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int parse_proc_cpuinfo(int bufsize, const char *feature)
|
|
{
|
|
char *buffer = (char *)malloc(bufsize);
|
|
FILE *fd;
|
|
int feature_support = 0;
|
|
|
|
if (!buffer)
|
|
return 0;
|
|
|
|
fd = fopen("/proc/cpuinfo", "r");
|
|
if (fd) {
|
|
while (fgets(buffer, bufsize, fd)) {
|
|
if (!strchr(buffer, '\n') && !feof(fd)) {
|
|
/* "impossible" happened - insufficient size of the buffer! */
|
|
fclose(fd);
|
|
free(buffer);
|
|
return -1;
|
|
}
|
|
if (check_feature(buffer, feature))
|
|
feature_support = 1;
|
|
}
|
|
fclose(fd);
|
|
}
|
|
free(buffer);
|
|
return feature_support;
|
|
}
|
|
|
|
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
|
|
|
int check_cpu_feature(const char *feature)
|
|
{
|
|
int bufsize = 1024;
|
|
int result;
|
|
while ((result = parse_proc_cpuinfo(bufsize, feature)) == -1)
|
|
{
|
|
bufsize *= 2;
|
|
if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
|
|
return 0;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#else
|
|
|
|
int check_cpu_feature(const char *feature)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
static bench_info empty[] = { { NULL, 0, NULL } };
|
|
|
|
#if defined(__i386__) || defined(__amd64__)
|
|
|
|
#include "x86-sse2.h"
|
|
|
|
static bench_info x86_sse2[] =
|
|
{
|
|
{ "MOVSB copy", 0, aligned_block_copy_movsb },
|
|
{ "MOVSD copy", 0, aligned_block_copy_movsd },
|
|
{ "SSE2 copy", 0, aligned_block_copy_sse2 },
|
|
{ "SSE2 nontemporal copy", 0, aligned_block_copy_nt_sse2 },
|
|
{ "SSE2 copy prefetched (32 bytes step)", 0, aligned_block_copy_pf32_sse2 },
|
|
{ "SSE2 copy prefetched (64 bytes step)", 0, aligned_block_copy_pf64_sse2 },
|
|
{ "SSE2 nontemporal copy prefetched (32 bytes step)", 0, aligned_block_copy_nt_pf32_sse2 },
|
|
{ "SSE2 nontemporal copy prefetched (64 bytes step)", 0, aligned_block_copy_nt_pf64_sse2 },
|
|
{ "SSE2 2-pass copy", 1, aligned_block_copy_sse2 },
|
|
{ "SSE2 2-pass copy prefetched (32 bytes step)", 1, aligned_block_copy_pf32_sse2 },
|
|
{ "SSE2 2-pass copy prefetched (64 bytes step)", 1, aligned_block_copy_pf64_sse2 },
|
|
{ "SSE2 2-pass nontemporal copy", 1, aligned_block_copy_nt_sse2 },
|
|
{ "SSE2 fill", 0, aligned_block_fill_sse2 },
|
|
{ "SSE2 nontemporal fill", 0, aligned_block_fill_nt_sse2 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info x86_sse2_fb[] =
|
|
{
|
|
{ "MOVSD copy (from framebuffer)", 0, aligned_block_copy_movsd },
|
|
{ "MOVSD 2-pass copy (from framebuffer)", 1, aligned_block_copy_movsd },
|
|
{ "SSE2 copy (from framebuffer)", 0, aligned_block_copy_sse2 },
|
|
{ "SSE2 2-pass copy (from framebuffer)", 1, aligned_block_copy_sse2 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static int check_sse2_support(void)
|
|
{
|
|
#ifdef __amd64__
|
|
return 1; /* We assume that all 64-bit processors have SSE2 support */
|
|
#else
|
|
int cpuid_feature_information;
|
|
__asm__ volatile (
|
|
/* According to Intel manual, CPUID instruction is supported
|
|
* if the value of ID bit (bit 21) in EFLAGS can be modified */
|
|
"pushf\n"
|
|
"movl (%%esp), %0\n"
|
|
"xorl $0x200000, (%%esp)\n" /* try to modify ID bit */
|
|
"popf\n"
|
|
"pushf\n"
|
|
"xorl (%%esp), %0\n" /* check if ID bit changed */
|
|
"jz 1f\n"
|
|
"push %%eax\n"
|
|
"push %%ebx\n"
|
|
"push %%ecx\n"
|
|
"mov $1, %%eax\n"
|
|
"cpuid\n"
|
|
"pop %%ecx\n"
|
|
"pop %%ebx\n"
|
|
"pop %%eax\n"
|
|
"1:\n"
|
|
"popf\n"
|
|
: "=d" (cpuid_feature_information)
|
|
:
|
|
: "cc");
|
|
return cpuid_feature_information & (1 << 26);
|
|
#endif
|
|
}
|
|
|
|
bench_info *get_asm_benchmarks(void)
|
|
{
|
|
if (check_sse2_support())
|
|
return x86_sse2;
|
|
else
|
|
return empty;
|
|
}
|
|
|
|
bench_info *get_asm_framebuffer_benchmarks(void)
|
|
{
|
|
if (check_sse2_support())
|
|
return x86_sse2_fb;
|
|
else
|
|
return empty;
|
|
}
|
|
|
|
#elif defined(__arm__neon__)
|
|
|
|
#include "arm-neon.h"
|
|
|
|
static bench_info arm_neon[] =
|
|
{
|
|
{ "NEON read", 0, aligned_block_read_neon },
|
|
{ "NEON read prefetched (32 bytes step)", 0, aligned_block_read_pf32_neon },
|
|
{ "NEON read prefetched (64 bytes step)", 0, aligned_block_read_pf64_neon },
|
|
{ "NEON read 2 data streams", 0, aligned_block_read2_neon },
|
|
{ "NEON read 2 data streams prefetched (32 bytes step)", 0, aligned_block_read2_pf32_neon },
|
|
{ "NEON read 2 data streams prefetched (64 bytes step)", 0, aligned_block_read2_pf64_neon },
|
|
{ "NEON copy", 0, aligned_block_copy_neon },
|
|
{ "NEON copy prefetched (32 bytes step)", 0, aligned_block_copy_pf32_neon },
|
|
{ "NEON copy prefetched (64 bytes step)", 0, aligned_block_copy_pf64_neon },
|
|
{ "NEON unrolled copy", 0, aligned_block_copy_unrolled_neon },
|
|
{ "NEON unrolled copy prefetched (32 bytes step)", 0, aligned_block_copy_unrolled_pf32_neon },
|
|
{ "NEON unrolled copy prefetched (64 bytes step)", 0, aligned_block_copy_unrolled_pf64_neon },
|
|
{ "NEON copy backwards", 0, aligned_block_copy_backwards_neon },
|
|
{ "NEON copy backwards prefetched (32 bytes step)", 0, aligned_block_copy_backwards_pf32_neon },
|
|
{ "NEON copy backwards prefetched (64 bytes step)", 0, aligned_block_copy_backwards_pf64_neon },
|
|
{ "NEON 2-pass copy", 1, aligned_block_copy_neon },
|
|
{ "NEON 2-pass copy prefetched (32 bytes step)", 1, aligned_block_copy_pf32_neon },
|
|
{ "NEON 2-pass copy prefetched (64 bytes step)", 1, aligned_block_copy_pf64_neon },
|
|
{ "NEON unrolled 2-pass copy", 1, aligned_block_copy_unrolled_neon },
|
|
{ "NEON unrolled 2-pass copy prefetched (32 bytes step)", 1, aligned_block_copy_unrolled_pf32_neon },
|
|
{ "NEON unrolled 2-pass copy prefetched (64 bytes step)", 1, aligned_block_copy_unrolled_pf64_neon },
|
|
{ "NEON fill", 0, aligned_block_fill_neon },
|
|
{ "NEON fill backwards", 0, aligned_block_fill_backwards_neon },
|
|
{ "VFP copy", 0, aligned_block_copy_vfp },
|
|
{ "VFP 2-pass copy", 1, aligned_block_copy_vfp },
|
|
{ "ARM fill (STRD)", 0, aligned_block_fill_strd_armv5te },
|
|
{ "ARM fill (STM with 8 registers)", 0, aligned_block_fill_stm8_armv4 },
|
|
{ "ARM fill (STM with 4 registers)", 0, aligned_block_fill_stm4_armv4 },
|
|
{ "ARM copy prefetched (incr pld)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM copy prefetched (wrap pld)", 0, aligned_block_copy_wrap_armv5te },
|
|
{ "ARM 2-pass copy prefetched (incr pld)", 1, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy prefetched (wrap pld)", 1, aligned_block_copy_wrap_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info arm_v5te_vfp[] =
|
|
{
|
|
{ "VFP copy", 0, aligned_block_copy_vfp },
|
|
{ "VFP 2-pass copy", 1, aligned_block_copy_vfp },
|
|
{ "ARM fill (STRD)", 0, aligned_block_fill_strd_armv5te },
|
|
{ "ARM fill (STM with 8 registers)", 0, aligned_block_fill_stm8_armv4 },
|
|
{ "ARM fill (STM with 4 registers)", 0, aligned_block_fill_stm4_armv4 },
|
|
{ "ARM copy prefetched (incr pld)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM copy prefetched (wrap pld)", 0, aligned_block_copy_wrap_armv5te },
|
|
{ "ARM 2-pass copy prefetched (incr pld)", 1, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy prefetched (wrap pld)", 1, aligned_block_copy_wrap_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info arm_v5te[] =
|
|
{
|
|
{ "ARM fill (STRD)", 0, aligned_block_fill_strd_armv5te },
|
|
{ "ARM fill (STM with 8 registers)", 0, aligned_block_fill_stm8_armv4 },
|
|
{ "ARM fill (STM with 4 registers)", 0, aligned_block_fill_stm4_armv4 },
|
|
{ "ARM copy prefetched (incr pld)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM copy prefetched (wrap pld)", 0, aligned_block_copy_wrap_armv5te },
|
|
{ "ARM 2-pass copy prefetched (incr pld)", 1, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy prefetched (wrap pld)", 1, aligned_block_copy_wrap_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info arm_v4[] =
|
|
{
|
|
{ "ARM fill (STM with 8 registers)", 0, aligned_block_fill_stm8_armv4 },
|
|
{ "ARM fill (STM with 4 registers)", 0, aligned_block_fill_stm4_armv4 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
bench_info *get_asm_benchmarks(void)
|
|
{
|
|
if (check_cpu_feature("neon"))
|
|
return arm_neon;
|
|
else if (check_cpu_feature("edsp") && check_cpu_feature("vfp"))
|
|
return arm_v5te_vfp;
|
|
else if (check_cpu_feature("edsp"))
|
|
return arm_v5te;
|
|
else
|
|
return arm_v4;
|
|
}
|
|
|
|
static bench_info arm_neon_fb[] =
|
|
{
|
|
{ "NEON read (from framebuffer)", 0, aligned_block_read_neon },
|
|
{ "NEON copy (from framebuffer)", 0, aligned_block_copy_neon },
|
|
{ "NEON 2-pass copy (from framebuffer)", 1, aligned_block_copy_neon },
|
|
{ "NEON unrolled copy (from framebuffer)", 0, aligned_block_copy_unrolled_neon },
|
|
{ "NEON 2-pass unrolled copy (from framebuffer)", 1, aligned_block_copy_unrolled_neon },
|
|
{ "VFP copy (from framebuffer)", 0, aligned_block_copy_vfp },
|
|
{ "VFP 2-pass copy (from framebuffer)", 1, aligned_block_copy_vfp },
|
|
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info arm_v5te_vfp_fb[] =
|
|
{
|
|
{ "VFP copy (from framebuffer)", 0, aligned_block_copy_vfp },
|
|
{ "VFP 2-pass copy (from framebuffer)", 1, aligned_block_copy_vfp },
|
|
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info arm_v5te_fb[] =
|
|
{
|
|
{ "ARM copy (from framebuffer)", 0, aligned_block_copy_incr_armv5te },
|
|
{ "ARM 2-pass copy (from framebuffer)", 1, aligned_block_copy_incr_armv5te },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
bench_info *get_asm_framebuffer_benchmarks(void)
|
|
{
|
|
if (check_cpu_feature("neon"))
|
|
return arm_neon_fb;
|
|
else if (check_cpu_feature("edsp") && check_cpu_feature("vfp"))
|
|
return arm_v5te_vfp_fb;
|
|
else if (check_cpu_feature("edsp"))
|
|
return arm_v5te_fb;
|
|
else
|
|
return empty;
|
|
}
|
|
|
|
#elif defined(__aarch64__)
|
|
|
|
#include "aarch64-asm.h"
|
|
|
|
static bench_info aarch64_neon[] =
|
|
{
|
|
{ "NEON LDP/STP copy", 0, aligned_block_copy_ldpstp_q_aarch64 },
|
|
{ "NEON LDP/STP copy pldl2strm (32 bytes step)", 0, aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64 },
|
|
{ "NEON LDP/STP copy pldl2strm (64 bytes step)", 0, aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64 },
|
|
{ "NEON LDP/STP copy pldl1keep (32 bytes step)", 0, aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64 },
|
|
{ "NEON LDP/STP copy pldl1keep (64 bytes step)", 0, aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64 },
|
|
{ "NEON LD1/ST1 copy", 0, aligned_block_copy_ld1st1_aarch64 },
|
|
{ "NEON STP fill", 0, aligned_block_fill_stp_q_aarch64 },
|
|
{ "NEON STNP fill", 0, aligned_block_fill_stnp_q_aarch64 },
|
|
{ "ARM LDP/STP copy", 0, aligned_block_copy_ldpstp_x_aarch64 },
|
|
{ "ARM STP fill", 0, aligned_block_fill_stp_x_aarch64 },
|
|
{ "ARM STNP fill", 0, aligned_block_fill_stnp_x_aarch64 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
static bench_info aarch64_neon_fb[] =
|
|
{
|
|
{ "NEON LDP/STP copy (from framebuffer)", 0, aligned_block_copy_ldpstp_q_aarch64 },
|
|
{ "NEON LDP/STP 2-pass copy (from framebuffer)", 1, aligned_block_copy_ldpstp_q_aarch64 },
|
|
{ "NEON LD1/ST1 copy (from framebuffer)", 0, aligned_block_copy_ld1st1_aarch64 },
|
|
{ "NEON LD1/ST1 2-pass copy (from framebuffer)", 1, aligned_block_copy_ld1st1_aarch64 },
|
|
{ "ARM LDP/STP copy (from framebuffer)", 0, aligned_block_copy_ldpstp_x_aarch64 },
|
|
{ "ARM LDP/STP 2-pass copy (from framebuffer)", 1, aligned_block_copy_ldpstp_x_aarch64 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
bench_info *get_asm_benchmarks(void)
|
|
{
|
|
return aarch64_neon;
|
|
}
|
|
|
|
bench_info *get_asm_framebuffer_benchmarks(void)
|
|
{
|
|
return aarch64_neon_fb;
|
|
}
|
|
|
|
#elif defined(__mips__) && defined(_ABIO32)
|
|
|
|
#include "mips-32.h"
|
|
|
|
static bench_info mips_32[] =
|
|
{
|
|
{ "MIPS32 copy prefetched (32 bytes step)", 0, aligned_block_copy_pf32_mips32 },
|
|
{ "MIPS32 2-pass copy prefetched (32 bytes step)", 1, aligned_block_copy_pf32_mips32 },
|
|
{ "MIPS32 fill prefetched (32 bytes step)", 0, aligned_block_fill_pf32_mips32 },
|
|
{ NULL, 0, NULL }
|
|
};
|
|
|
|
bench_info *get_asm_benchmarks(void)
|
|
{
|
|
/* Enable only for MIPS32 processors which have 32 bytes cache line */
|
|
if (check_cpu_feature("MIPS 24K") ||
|
|
check_cpu_feature("MIPS 24Kc") ||
|
|
check_cpu_feature("MIPS 24Kf") ||
|
|
check_cpu_feature("MIPS 74K") ||
|
|
check_cpu_feature("MIPS 74Kc") ||
|
|
check_cpu_feature("MIPS 74Kf") ||
|
|
check_cpu_feature("MIPS 1004K") ||
|
|
check_cpu_feature("MIPS 1004Kc") ||
|
|
check_cpu_feature("MIPS 1004Kf") ||
|
|
check_cpu_feature("MIPS 1074K") ||
|
|
check_cpu_feature("MIPS 1074Kc") ||
|
|
check_cpu_feature("MIPS 1074Kf"))
|
|
{
|
|
return mips_32;
|
|
}
|
|
else
|
|
{
|
|
return empty;
|
|
}
|
|
}
|
|
|
|
bench_info *get_asm_framebuffer_benchmarks(void)
|
|
{
|
|
return empty;
|
|
}
|
|
|
|
#else
|
|
|
|
bench_info *get_asm_benchmarks(void)
|
|
{
|
|
return empty;
|
|
}
|
|
|
|
bench_info *get_asm_framebuffer_benchmarks(void)
|
|
{
|
|
return empty;
|
|
}
|
|
|
|
#endif
|