Есть вот такая функция для тестирования fpu:
float __attribute__ ((noinline)) testfpu(float a) {
return __builtin_sqrtf(__builtin_sqrtf(__builtin_sqrtf(a)));
}
/ (fcn) sym.testfpu (thumb) 80
| ; CALL XREF from 0x08001cb6 (sym.testfpu)
| 0x08001c50 b1eec07a vsqrt.f32 s14, s0
| 0x08001c54 08b5 push {r3, lr}
| 0x08001c56 b4ee477a vcmp.f32 s14, s14
| 0x08001c5a f1ee10fa vmrs apsr_nzcv, fpscr
| ,=< 0x08001c5e 0ed1 bne 0x8001c7e
| | ; JMP XREF from 0x08001c86 (sym.testfpu)
| .--> 0x08001c60 f1eec77a vsqrt.f32 s15, s14
| || 0x08001c64 f4ee677a vcmp.f32 s15, s15
| || 0x08001c68 f1ee10fa vmrs apsr_nzcv, fpscr
| ,===< 0x08001c6c 11d1 bne 0x8001c92
| ||| ; JMP XREF from 0x08001c9e (sym.testfpu)
| .----> 0x08001c6e b1eee70a vsqrt.f32 s0, s15
| |||| 0x08001c72 b4ee400a vcmp.f32 s0, s0
| |||| 0x08001c76 f1ee10fa vmrs apsr_nzcv, fpscr
| ,=====< 0x08001c7a 05d1 bne 0x8001c88
| ||||| ; JMP XREF from 0x08001c90 (sym.testfpu)
| .------> 0x08001c7c 08bd pop {r3, pc}
| |||||`-> 0x08001c7e 08f067f9 bl sym.sqrtf
| ||||| 0x08001c82 b0ee407a vmov.f32 s14, s0
| ||||`==< 0x08001c86 ebe7 b 0x8001c60
| |`-----> 0x08001c88 b0ee670a vmov.f32 s0, s15
| | || 0x08001c8c 08f060f9 bl sym.sqrtf
| `======< 0x08001c90 f4e7 b 0x8001c7c
| |`---> 0x08001c92 b0ee470a vmov.f32 s0, s14
| | 0x08001c96 08f05bf9 bl sym.sqrtf
| | 0x08001c9a f0ee407a vmov.f32 s15, s0
\ `====< 0x08001c9e e6e7 b 0x8001c6e
-O2 -mfpu=vfpv4-d16 -mfloat-abi=hard
PS: Забавно, что без использования FPU код получается куда проще:
/ (fcn) sym.testfpu (thumb) 18
| ; CALL XREF from 0x08001b30 (sym.testfpu)
| 0x08001b0c 08b5 push {r3, lr}
| 0x08001b0e 08f0fff8 bl sym.sqrtf
| 0x08001b12 08f0fdf8 bl sym.sqrtf
| 0x08001b16 bde80840 pop.w {r3, lr}
\ ,=< 0x08001b1a 08f0f9b8 b.w sym.sqrtf