project:build.sh: Added fastboot support; custom modifications to U-Boot and kernel implemented using patches.

project:cfg:BoardConfig_IPC: Added fastboot BoardConfig file and firmware post-scripts, distinguishing between
the BoardConfigs for Luckfox Pico Pro and Luckfox Pico Max. project:app: Added fastboot_client and rk_smart_door
for quick boot applications; updated rkipc app to adapt to the latest media library. media:samples: Added more
usage examples. media:rockit: Fixed bugs; removed support for retrieving data frames from VPSS. media:isp:
Updated rkaiq library and related tools to support connection to RKISP_Tuner. sysdrv:Makefile: Added support for
compiling drv_ko on Luckfox Pico Ultra W using Ubuntu; added support for custom root filesystem.
sysdrv:tools:board: Updated Buildroot optional mirror sources, updated some software versions, and stored device
tree files and configuration files that undergo multiple modifications for U-Boot and kernel separately.
sysdrv:source:mcu: Used RISC-V MCU SDK with RT-Thread system, mainly for initializing camera AE during quick
boot. sysdrv:source:uboot: Added support for fastboot; added high baud rate DDR bin for serial firmware upgrades.
sysdrv:source:kernel: Upgraded to version 5.10.160; increased NPU frequency for RV1106G3; added support for
fastboot.

Signed-off-by: luckfox-eng29 <eng29@luckfox.com>
This commit is contained in:
luckfox-eng29
2024-08-21 10:05:47 +08:00
parent e79fd21975
commit 8f34c2760d
20902 changed files with 6567362 additions and 11248383 deletions

View File

@@ -15,7 +15,6 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
#include <asm/asm-prototypes.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -213,6 +212,14 @@ static void jit_fill_hole(void *area, unsigned int size)
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
/*
* Program specific offsets of labels in the code; these rely on the
* JIT doing at least 2 passes, recording the position on the first
* pass, only to generate the correct offset on the second pass.
*/
int tail_call_direct_label;
int tail_call_indirect_label;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -372,20 +379,40 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
static int get_pop_bytes(bool *callee_regs_used)
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
int bytes = 0;
u8 *prog = *pprog;
int cnt = 0;
if (callee_regs_used[3])
bytes += 2;
if (callee_regs_used[2])
bytes += 2;
if (callee_regs_used[1])
bytes += 2;
if (callee_regs_used[0])
bytes += 1;
#ifdef CONFIG_RETPOLINE
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else
#endif
EMIT2(0xFF, 0xE0 + reg);
return bytes;
*pprog = prog;
}
static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
emit_jump(&prog, &__x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
if (IS_ENABLED(CONFIG_SLS))
EMIT1(0xCC); /* int3 */
}
*pprog = prog;
}
/*
@@ -403,30 +430,12 @@ static int get_pop_bytes(bool *callee_regs_used)
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
u32 stack_depth)
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 42;
int off2 = 31;
int off3 = 9;
int cnt = 0;
/* count the additional bytes used for popping callee regs from stack
* that need to be taken into account for each of the offsets that
* are used for bailing out of the tail call
*/
pop_bytes = get_pop_bytes(callee_regs_used);
off1 += pop_bytes;
off2 += pop_bytes;
off3 += pop_bytes;
if (stack_depth) {
off1 += 7;
off2 += 7;
off3 += 7;
}
u8 *prog = *pprog, *start = *pprog;
int cnt = 0, offset;
/*
* rdi - pointer to ctx
@@ -441,8 +450,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -450,8 +460,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -464,12 +475,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JE, offset); /* je out */
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
@@ -486,42 +496,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
RETPOLINE_RCX_BPF_JIT();
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
/* out: */
ctx->tail_call_indirect_label = prog - start;
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
u8 **pprog, int addr, u8 *image,
bool *callee_regs_used, u32 stack_depth)
u8 **pprog, u8 *ip,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 20;
int poke_off;
int cnt = 0;
/* count the additional bytes used for popping callee regs to stack
* that need to be taken into account for jump offset that is used for
* bailing out from of the tail call when limit is reached
*/
pop_bytes = get_pop_bytes(callee_regs_used);
off1 += pop_bytes;
/*
* total bytes for:
* - nop5/ jmpq $off
* - pop callee regs
* - sub rsp, $val if depth > 0
* - pop rax
*/
poke_off = X86_PATCH_SIZE + pop_bytes + 1;
if (stack_depth) {
poke_off += 7;
off1 += 7;
}
u8 *prog = *pprog, *start = *pprog;
int cnt = 0, offset;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -529,28 +518,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
EMIT2(X86_JA, off1); /* ja out */
offset = ctx->tail_call_direct_label - (prog + 2 - start);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
/* out: */
ctx->tail_call_direct_label = prog - start;
*pprog = prog;
}
@@ -1141,8 +1132,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
if (boot_cpu_has(X86_FEATURE_XMM2))
/* Emit 'lfence' */
EMIT3(0x0F, 0xAE, 0xE8);
EMIT_LFENCE();
break;
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -1258,8 +1248,9 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
-(bpf_prog->aux->stack_depth + 8));
-round_up(bpf_prog->aux->stack_depth, 8) - 8);
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
return -EINVAL;
} else {
@@ -1271,13 +1262,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
&prog, addrs[i], image,
&prog, image + addrs[i - 1],
callee_regs_used,
bpf_prog->aux->stack_depth);
bpf_prog->aux->stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
bpf_prog->aux->stack_depth);
bpf_prog->aux->stack_depth,
image + addrs[i - 1],
ctx);
break;
/* cond jump */
@@ -1462,7 +1456,7 @@ emit_jmp:
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
default:
@@ -1903,7 +1897,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
EMIT1(0xC3); /* ret */
emit_return(&prog, prog);
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
@@ -1916,26 +1910,6 @@ cleanup:
return ret;
}
static int emit_fallback_jump(u8 **pprog)
{
u8 *prog = *pprog;
int err = 0;
#ifdef CONFIG_RETPOLINE
/* Note that this assumes the the compiler uses external
* thunks for indirect calls. Both clang and GCC use the same
* naming convention for external thunks.
*/
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
#else
int cnt = 0;
EMIT2(0xFF, 0xE2); /* jmp rdx */
#endif
*pprog = prog;
return err;
}
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
@@ -1957,9 +1931,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
if (err)
return err;
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
if (err)
return err;
emit_indirect_jump(&prog, 2 /* rdx */, prog);
*pprog = prog;
return 0;

View File

@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/asm-prototypes.h>
#include <linux/bpf.h>
/*
@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
*pprog = prog;
}
static int emit_jmp_edx(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
#ifdef CONFIG_RETPOLINE
EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
#else
EMIT2(0xFF, 0xE2);
#endif
*pprog = prog;
return cnt;
}
/*
* Generate the following code:
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
static void emit_bpf_tail_call(u8 **pprog)
static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* eax == ctx (1st arg)
* edx == prog->bpf_func + prologue_size
*/
RETPOLINE_EDX_BPF_JIT();
cnt += emit_jmp_edx(&prog, ip + cnt);
if (jmp_label1 == -1)
jmp_label1 = cnt;
@@ -1929,7 +1945,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
}
case BPF_JMP | BPF_TAIL_CALL:
emit_bpf_tail_call(&prog);
emit_bpf_tail_call(&prog, image + addrs[i - 1]);
break;
/* cond jump */