YangLyu

Nios II crt0 简要分析

0
阅读(3428)

    Crt0,实际上指的是C runtime,而“0”则代表它是最开始运行的部分。在不同CPU体系的GNU/gcc编译器中,它是程序最开始的地方。而根据不同CPU体系或者同一CPU体系也有使用不同版本C库的区别。对于Altera的nios2-elf-gcc而言,它默认连接的C库是一个占用代码空间稍微大一些的库,当然,也可以自己指定crt0.o,即不连接默认的C库,而后面所有的过程使用自己编写的代码实现,在nios2-elf-gcc连接时可以指定参数  -msys-crt0=/path_to_your_crt0/crt0.o 。 Altera的Nios II在生成BSP的时候,如果指定采用smallC库的话,Altera生成的BSP代码中就包含了crt0.S。

    下面,笔者考虑使用没有MMU/MPU,没有数据缓存,使用内嵌中断控制器,无调试单元,不考虑仿真等因素下crt0.S的简化版,并作简要分析。

#include "nios2.h"


/*************************************************************************\
|                         EXTERNAL REFERENCES                             |
\*************************************************************************/

/*
 * The entry point for user code is either "main" in hosted mode, or
 * "alt_main" in standalone mode. These are explicitly referenced here,
 * to ensure they are built into the executable. This allows the user
 * to build them into libraries, rather than supplying them in object
 * files at link time.
 */
.globl main
.globl alt_main

/*
 * Create a reference to the software multiply/divide and trap handers,
 * so that if they are provided, they will appear in the executable.
 */
.globl alt_exception_muldiv
.globl alt_exception_trap


/*
 * Linker defined symbols used to initialize bss.
 */
.globl __bss_start
.globl __bss_end

/*************************************************************************\
|                         RESET SECTION (.entry)                          |
\*************************************************************************/

/*
 * This is the reset entry point for Nios II.
 *
 * At reset, only the cache line which contain the reset vector is
 * initialized by the hardware. The code within the first cache line
 * initializes the remainder of the instruction cache.
 */

    .section .entry, "xa"
    .align 5

/*
 * Explicitly allow the use of r1 (the assembler temporary register)
 * within this code. This register is normally reserved for the use of
 * the assembler.
 */
    .set noat

/*
 * Some tools want to know where the reset vector is.
 * Code isn't always provided at the reset vector but at least the
 * __reset label always contains the reset vector address because
 * it is defined at the start of the .entry section.
 */

    .globl __reset
    .type __reset, @function
__reset:

/*
 * Initialize the instruction cache if present (i.e. size > 0) and
 * reset code is allowed unless optimizing for RTL simulation.
 * RTL simulations can ensure the instruction cache is already initialized
 * so skipping this loop speeds up RTL simulation.
 */

#if NIOS2_ICACHE_SIZE > 0 && defined(ALT_ALLOW_CODE_AT_RESET)
    /* Assume the instruction cache size is always a power of two. */
#if NIOS2_ICACHE_SIZE > 0x8000
    movhi r2, %hi(NIOS2_ICACHE_SIZE)
#else
    movui r2, NIOS2_ICACHE_SIZE
#endif

0:
    initi r2
    addi r2, r2, -NIOS2_ICACHE_LINE_SIZE
    bgt r2, zero, 0b
1:

    /*
     * The following debug information tells the ISS not to run the loop above
     * but to perform its actions using faster internal code.
     */
    .pushsection .debug_alt_sim_info
    .int 1, 1, 0b, 1b
    .popsection
#endif /* Initialize Instruction Cache */

    /* Jump to the _start entry point in the .text section. */
    movhi r1, %hi(_start)
    ori r1, r1, %lo(_start)
    jmp r1

    .size __reset, . - __reset

/*
 * When not using exit, provide an _exit symbol to prevent unresolved
 * references to _exit from the linker script.
 */
#ifdef ALT_NO_EXIT
    .globl _exit
_exit:
#endif

/*************************************************************************\
|                          TEXT SECTION (.text)                           |
\*************************************************************************/

/*
 * Start of the .text section, and also the code entry point when
 * the code is executed by a bootloader rather than directly from reset.
 */
    .section .text
    .align 2

    .globl _start
    .type _start, @function
_start:
/*
 * Initialize the data cache if present (i.e. size > 0) and not
 * optimizing for RTL simulation.
 * RTL simulations can ensure the data cache is already initialized
 * so skipping this loop speeds up RTL simulation.
 */

#if NIOS2_DCACHE_SIZE > 0

    /* Assume the data cache size is always a power of two. */
#if NIOS2_DCACHE_SIZE > 0x8000
    movhi r2, %hi(NIOS2_DCACHE_SIZE)
#else
    movui r2, NIOS2_DCACHE_SIZE
#endif

0:
    initd 0(r2)
    addi r2, r2, -NIOS2_DCACHE_LINE_SIZE
    bgt r2, zero, 0b
1:

    /*
     * The following debug information tells the ISS not to run the loop above
     * but to perform its actions using faster internal code.
     */
    .pushsection .debug_alt_sim_info
    .int 2, 1, 0b, 1b
    .popsection

#endif /* Initialize Data Cache */

    /*
     * Now that the caches are initialized, set up the stack pointer.
     * The value provided by the linker is assumed to be correctly aligned.
     *
  */
 
    movhi sp, %hi(__alt_stack_pointer)
    ori sp, sp, %lo(__alt_stack_pointer)

    /* Set up the global pointer. */
    movhi gp, %hi(_gp)
    ori gp, gp, %lo(_gp)


/*
 * Clear the BSS if not optimizing for RTL simulation.
 *
 * This uses the symbols: __bss_start and __bss_end, which are defined
 * by the linker script. They mark the begining and the end of the bss
 * region. The linker script guarantees that these values are word aligned.
 */
 
 movhi r2, %hi(__bss_start)
    ori r2, r2, %lo(__bss_start)

    movhi r3, %hi(__bss_end)
    ori r3, r3, %lo(__bss_end)

    beq r2, r3, 1f

0:
    stw zero, (r2)
    addi r2, r2, 4
    bltu r2, r3, 0b

1:
    /*
     * The following debug information tells the ISS not to run the loop above
     * but to perform its actions using faster internal code.
     */
    .pushsection .debug_alt_sim_info
    .int 3, 1, 0b, 1b
    .popsection

/*
 * The alt_load() facility is normally used when there is no bootloader.
 * It copies some sections into RAM so it acts like a mini-bootloader.
 */
 call alt_load

/*
 * Set up the stack limit (if required).  The linker has set up the
 * copy of the variable which is in memory.
 */

    /* Call the C entry point. It should never return. */
    call main

    /* Wait in infinite loop in case alt_main does return. */
alt_after_alt_main:
    br alt_after_alt_main

    .size _start, . - _start

/*
 * Add information about the stack base if stack overflow checking is enabled.
 */
    .globl  alt_stack_limit_value
    .section .sdata,"aws",@progbits
    .align  2
    .type   alt_stack_limit_value, @object
    .size   alt_stack_limit_value, 4
alt_stack_limit_value:
    .long   __alt_stack_limit


 

    以下是反汇编得到的一些信息:

    首先,先介绍一下对于我这个系统,连接脚本指定了几个section的分布为:.entry, .exceptions, .text, 复位地址为0x0,复位后,设置指令缓存,跳转至_start入口,然后设置数据缓存,设置栈顶寄存器,设置全局指针寄存器,需要注意的是,跟MIPS32体系类似,本身并没有提供push,pop操作,虽然这些寄存器有了约定的名字,作为固定的使用,但本质上来说还是通用寄存器,至于怎么用,取决于编译器。接下来,判断.bss段的大小,这里保存了一些未初始化的全局变量等,清零之,这时候就可以使用r0寄存器来方便操作。接下来,调用alt_load,这个函数需要完成的使命是,根据连接脚本的设定,判断需要搬移的数据,然后搬移到连接脚本指定的位置,主要是可读可写段(rw-).rwdata,异常入口exception handler,只读数据段(r--).rodata,接下来,跳转至C语言主函数入口main。