Nios II crt0 简要分析
0赞Crt0,实际上指的是C runtime,而“0”则代表它是最开始运行的部分。在不同CPU体系的GNU/gcc编译器中,它是程序最开始的地方。而根据不同CPU体系或者同一CPU体系也有使用不同版本C库的区别。对于Altera的nios2-elf-gcc而言,它默认连接的C库是一个占用代码空间稍微大一些的库,当然,也可以自己指定crt0.o,即不连接默认的C库,而后面所有的过程使用自己编写的代码实现,在nios2-elf-gcc连接时可以指定参数 -msys-crt0=/path_to_your_crt0/crt0.o 。 Altera的Nios II在生成BSP的时候,如果指定采用smallC库的话,Altera生成的BSP代码中就包含了crt0.S。
下面,笔者考虑使用没有MMU/MPU,没有数据缓存,使用内嵌中断控制器,无调试单元,不考虑仿真等因素下crt0.S的简化版,并作简要分析。
#include "nios2.h"
/*************************************************************************\
| EXTERNAL REFERENCES |
\*************************************************************************/
/*
* The entry point for user code is either "main" in hosted mode, or
* "alt_main" in standalone mode. These are explicitly referenced here,
* to ensure they are built into the executable. This allows the user
* to build them into libraries, rather than supplying them in object
* files at link time.
*/
.globl main
.globl alt_main
/*
* Create a reference to the software multiply/divide and trap handers,
* so that if they are provided, they will appear in the executable.
*/
.globl alt_exception_muldiv
.globl alt_exception_trap
/*
* Linker defined symbols used to initialize bss.
*/
.globl __bss_start
.globl __bss_end
/*************************************************************************\
| RESET SECTION (.entry) |
\*************************************************************************/
/*
* This is the reset entry point for Nios II.
*
* At reset, only the cache line which contain the reset vector is
* initialized by the hardware. The code within the first cache line
* initializes the remainder of the instruction cache.
*/
.section .entry, "xa"
.align 5
/*
* Explicitly allow the use of r1 (the assembler temporary register)
* within this code. This register is normally reserved for the use of
* the assembler.
*/
.set noat
/*
* Some tools want to know where the reset vector is.
* Code isn't always provided at the reset vector but at least the
* __reset label always contains the reset vector address because
* it is defined at the start of the .entry section.
*/
.globl __reset
.type __reset, @function
__reset:
/*
* Initialize the instruction cache if present (i.e. size > 0) and
* reset code is allowed unless optimizing for RTL simulation.
* RTL simulations can ensure the instruction cache is already initialized
* so skipping this loop speeds up RTL simulation.
*/
#if NIOS2_ICACHE_SIZE > 0 && defined(ALT_ALLOW_CODE_AT_RESET)
/* Assume the instruction cache size is always a power of two. */
#if NIOS2_ICACHE_SIZE > 0x8000
movhi r2, %hi(NIOS2_ICACHE_SIZE)
#else
movui r2, NIOS2_ICACHE_SIZE
#endif
0:
initi r2
addi r2, r2, -NIOS2_ICACHE_LINE_SIZE
bgt r2, zero, 0b
1:
/*
* The following debug information tells the ISS not to run the loop above
* but to perform its actions using faster internal code.
*/
.pushsection .debug_alt_sim_info
.int 1, 1, 0b, 1b
.popsection
#endif /* Initialize Instruction Cache */
/* Jump to the _start entry point in the .text section. */
movhi r1, %hi(_start)
ori r1, r1, %lo(_start)
jmp r1
.size __reset, . - __reset
/*
* When not using exit, provide an _exit symbol to prevent unresolved
* references to _exit from the linker script.
*/
#ifdef ALT_NO_EXIT
.globl _exit
_exit:
#endif
/*************************************************************************\
| TEXT SECTION (.text) |
\*************************************************************************/
/*
* Start of the .text section, and also the code entry point when
* the code is executed by a bootloader rather than directly from reset.
*/
.section .text
.align 2
.globl _start
.type _start, @function
_start:
/*
* Initialize the data cache if present (i.e. size > 0) and not
* optimizing for RTL simulation.
* RTL simulations can ensure the data cache is already initialized
* so skipping this loop speeds up RTL simulation.
*/
#if NIOS2_DCACHE_SIZE > 0
/* Assume the data cache size is always a power of two. */
#if NIOS2_DCACHE_SIZE > 0x8000
movhi r2, %hi(NIOS2_DCACHE_SIZE)
#else
movui r2, NIOS2_DCACHE_SIZE
#endif
0:
initd 0(r2)
addi r2, r2, -NIOS2_DCACHE_LINE_SIZE
bgt r2, zero, 0b
1:
/*
* The following debug information tells the ISS not to run the loop above
* but to perform its actions using faster internal code.
*/
.pushsection .debug_alt_sim_info
.int 2, 1, 0b, 1b
.popsection
#endif /* Initialize Data Cache */
/*
* Now that the caches are initialized, set up the stack pointer.
* The value provided by the linker is assumed to be correctly aligned.
*
*/
movhi sp, %hi(__alt_stack_pointer)
ori sp, sp, %lo(__alt_stack_pointer)
/* Set up the global pointer. */
movhi gp, %hi(_gp)
ori gp, gp, %lo(_gp)
/*
* Clear the BSS if not optimizing for RTL simulation.
*
* This uses the symbols: __bss_start and __bss_end, which are defined
* by the linker script. They mark the begining and the end of the bss
* region. The linker script guarantees that these values are word aligned.
*/
movhi r2, %hi(__bss_start)
ori r2, r2, %lo(__bss_start)
movhi r3, %hi(__bss_end)
ori r3, r3, %lo(__bss_end)
beq r2, r3, 1f
0:
stw zero, (r2)
addi r2, r2, 4
bltu r2, r3, 0b
1:
/*
* The following debug information tells the ISS not to run the loop above
* but to perform its actions using faster internal code.
*/
.pushsection .debug_alt_sim_info
.int 3, 1, 0b, 1b
.popsection
/*
* The alt_load() facility is normally used when there is no bootloader.
* It copies some sections into RAM so it acts like a mini-bootloader.
*/
call alt_load
/*
* Set up the stack limit (if required). The linker has set up the
* copy of the variable which is in memory.
*/
/* Call the C entry point. It should never return. */
call main
/* Wait in infinite loop in case alt_main does return. */
alt_after_alt_main:
br alt_after_alt_main
.size _start, . - _start
/*
* Add information about the stack base if stack overflow checking is enabled.
*/
.globl alt_stack_limit_value
.section .sdata,"aws",@progbits
.align 2
.type alt_stack_limit_value, @object
.size alt_stack_limit_value, 4
alt_stack_limit_value:
.long __alt_stack_limit
以下是反汇编得到的一些信息:

首先,先介绍一下对于我这个系统,连接脚本指定了几个section的分布为:.entry, .exceptions, .text, 复位地址为0x0,复位后,设置指令缓存,跳转至_start入口,然后设置数据缓存,设置栈顶寄存器,设置全局指针寄存器,需要注意的是,跟MIPS32体系类似,本身并没有提供push,pop操作,虽然这些寄存器有了约定的名字,作为固定的使用,但本质上来说还是通用寄存器,至于怎么用,取决于编译器。接下来,判断.bss段的大小,这里保存了一些未初始化的全局变量等,清零之,这时候就可以使用r0寄存器来方便操作。接下来,调用alt_load,这个函数需要完成的使命是,根据连接脚本的设定,判断需要搬移的数据,然后搬移到连接脚本指定的位置,主要是可读可写段(rw-).rwdata,异常入口exception handler,只读数据段(r--).rodata,接下来,跳转至C语言主函数入口main。
