[BUG] listGET_OWNER_OF_HEAD_ENTRY in fucntion xTaskResumeAll is over-optimized on xtensa platform
superroc opened this issue · 8 comments
Describe the bug
When more than 2 items exist in xPendingReadyList, pxTCB dose not update after the first loop, but retain the first value.
Target
- Development board: [SoC in developing procedure]
- Instruction Set Architecture: [tensilica hifi3z]
- IDE and version: [Xtensa Development Environment Version 10.1.11.3000]
- Toolchain and version: [RI-2022.9-win32 14.09 patch 20220930767596]
Host
- Host OS: [windows]
- Version: [windows 7]
Screenshots
this screenshot shows compile options which does not work well.
Additional context
the following is the compilation result of xTaskResumeAll:
78402998 <xTaskResumeAll>:
{
78402998: 004136 entry a1, 32
configASSERT( uxSchedulerSuspended );
7840299b: 0020c0 memw
7840299e: b3fc599d394e { l32r a3, 78402684 (7821c3c0 <pxReadyTasksLists>); movi a10, 179 }
784029a4: 7bf8457dbf4e { l32r a11, 784025a0 (78204b50 <uxTopUsedPriority+0x4>); addmi a10, a10, 0x800 }
784029aa: 282322 l32i a2, a3, 160
784029ad: 42cc bnez.n a2, 784029b5 <xTaskResumeAll+0x1d>
784029af: 0020f0 nop
784029b2: 0234a5 call8 78404cfc <vAssertCalled>
784029b5 <xTaskResumeAll+0x1d>:
if( xSchedulerRunning != pdFALSE )
784029b5: 0020c0 memw
784029b8: 272322 l32i a2, a3, 156
784029bb: 026216 beqz a2, 784029e5 <xTaskResumeAll+0x4d>
uint32_t ps = XT_RSR_PS ();
784029be: 03e620 rsr.ps a2
784029c1: f9f541 l32r a4, 78401198 (70f30 <_memmap_cacheattr_wb_base+0x2caee>)
XT_WSR_PS ((ps & ~0xFU) | (level & 0xFU));
784029c4: 102240 and a2, a2, a4
784029c7: 02c222 addi a2, a2, 2
784029ca: 13e620 wsr.ps a2
XT_RSYNC ();
784029cd: 002010 rsync
( pxCurrentTCB->uxCriticalNesting )++;
784029d0: 0020c0 memw
784029d3: 232322 l32i a2, a3, 140
784029d6: 102242 l32i a4, a2, 64
784029d9: 01c442 addi a4, a4, 1
784029dc: 106242 s32i a4, a2, 64
if( pxCurrentTCB->uxCriticalNesting == 1 )
784029df: 0020c0 memw
784029e2: 232322 l32i a2, a3, 140
--uxSchedulerSuspended;
784029e5: 0020c0 memw
784029e8: 282322 l32i a2, a3, 160
784029eb: 220b addi.n a2, a2, -1
784029ed: 0020c0 memw
784029f0: 286322 s32i a2, a3, 160
if( uxSchedulerSuspended == ( UBaseType_t ) pdFALSE )
784029f3: 0020c0 memw
784029f6: 4ac182022e { movi.n a2, 0; l32i a4, a3, 160 }
784029fb: 348c beqz.n a4, 78402a02 <xTaskResumeAll+0x6a>
taskEXIT_CRITICAL();
784029fd: ffe4e5 call8 7840284c <vTaskExitCritical>
return xAlreadyYielded;
78402a00: f01d retw.n
78402a02 <xTaskResumeAll+0x6a>:
if( uxCurrentNumberOfTasks > ( UBaseType_t ) 0U )
78402a02: 0020c0 memw
78402a05: 262342 l32i a4, a3, 152
78402a08: ff1416 beqz a4, 784029fd <xTaskResumeAll+0x65>
while( listLIST_IS_EMPTY( &xPendingReadyList ) == pdFALSE )
78402a0b: 0020c0 memw
78402a0e: 958d09f3434e { l32i a4, a3, 248; l32i a6, a3, 0x104 }
78402a14: 40036bc30228010e { movi a5, 1; nop; beqz.w18 a4, 78402ae4 <xTaskResumeAll+0x14c> }
78402a1c: 3648 l32i.n a4, a6, 12
78402a1e: 41da65743e { l32i.n a7, a4, 48; l32i a11, a4, 44 }
78402a23: 05e234a42e { l32i.n a10, a4, 24; addi a8, a4, 28 }
78402a28: c481b3c3945e { addi a9, a4, 8; addx4 a6, a7, a7 }
78402a2e: a06630 addx4 a6, a6, a3
listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
78402a31: 41c24ec43e { l32i.n a12, a4, 36; l32i a13, a4, 32 }
78402a36: 1be8 l32i.n a14, a11, 4
78402a38: 2dc9 s32i.n a12, a13, 8
78402a3a: 94c8 l32i.n a12, a4, 36
78402a3c: 00038681f7681c0e { s32i a13, a12, 4; nop; bne.w18 a14, a8, 78402a46 <xTaskResumeAll+0xae> }
78402a44: 1bc9 s32i.n a12, a11, 4
78402a46: 0020c0 memw
78402a49: 0bc8 l32i.n a12, a11, 0
78402a4b: cc0b addi.n a12, a12, -1
78402a4d: 0020c0 memw
78402a50: 0bc9 s32i.n a12, a11, 0
listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
78402a52: 419a26b42e { l32i.n a11, a4, 16; l32i a12, a4, 12 }
78402a57: 1ad8 l32i.n a13, a10, 4
78402a59: 2cb9 s32i.n a11, a12, 8
78402a5b: 44b8 l32i.n a11, a4, 16
78402a5d: 00039681f6e01b0e { s32i a12, a11, 4; nop; bne.w18 a13, a9, 78402a67 <xTaskResumeAll+0xcf> }
78402a65: 1ab9 s32i.n a11, a10, 4
78402a67: 0020c0 memw
78402a6a: 0ab8 l32i.n a11, a10, 0
78402a6c: bb0b addi.n a11, a11, -1
78402a6e: 0020c0 memw
78402a71: 0ab9 s32i.n a11, a10, 0
prvAddTaskToReadyList( pxTCB );
78402a73: 0020c0 memw
78402a76: 2c23a2 l32i a10, a3, 176
78402a79: 05ba77 bgeu a10, a7, 78402a82 <xTaskResumeAll+0xea>
78402a7c: 0020c0 memw
78402a7f: 2c6372 s32i a7, a3, 176
78402a82: 16a8 l32i.n a10, a6, 4
78402a84: 34a9 s32i.n a10, a4, 12
78402a86: 2ab8 l32i.n a11, a10, 8
78402a88: 44b9 s32i.n a11, a4, 16
78402a8a: 2ab8 l32i.n a11, a10, 8
78402a8c: 2a99 s32i.n a9, a10, 8
78402a8e: 1b99 s32i.n a9, a11, 4
78402a90: 0020c0 memw
78402a93: 06a8 l32i.n a10, a6, 0
78402a95: aa1b addi.n a10, a10, 1
78402a97: 0020c0 memw
78402a9a: 06a9 s32i.n a10, a6, 0
if( pxTCB->uxPriority >= pxCurrentTCB->uxPriority )
78402a9c: 0020c0 memw
78402a9f: 2323a2 l32i a10, a3, 140
78402aa2: caa8 l32i.n a10, a10, 48
78402aa4: 0537a7 bltu a7, a10, 78402aad <xTaskResumeAll+0x115>
xYieldPending = pdTRUE;
78402aa7: 0020c0 memw
78402aaa: 2d6352 s32i a5, a3, 180
while( listLIST_IS_EMPTY( &xPendingReadyList ) == pdFALSE )
78402aad: 0020c0 memw
78402ab0: 4ef1860b2e { movi.n a11, 0; l32i a12, a3, 248 }
78402ab5: 06ad mov.n a10, a6
78402ab7: f76c56 bnez a12, 78402a31 <xTaskResumeAll+0x99>
listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
78402aba: 820032643e { s32i.n a6, a4, 24; movi a5, 0 }
listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
78402abf: b459 s32i.n a5, a4, 44
if( listLIST_IS_EMPTY( pxDelayedTaskList ) != pdFALSE )
78402ac1: 0020c0 memw
78402ac4: 2a2342 l32i a4, a3, 168
78402ac7: 0020c0 memw
78402aca: 0448 l32i.n a4, a4, 0
78402acc: 40036a8f8226ff0e { movi a4, -1; nop; beqz.w18 a4, 78402ade <xTaskResumeAll+0x146> }
xNextTaskUnblockTime = listGET_ITEM_VALUE_OF_HEAD_ENTRY( pxDelayedTaskList );
78402ad4: 0020c0 memw
78402ad7: 2a2342 l32i a4, a3, 168
78402ada: 3448 l32i.n a4, a4, 12
78402adc: 0448 l32i.n a4, a4, 0
78402ade: 0020c0 memw
78402ae1: 2f6342 s32i a4, a3, 188
TickType_t xPendedCounts = xPendedTicks; /* Non-volatile copy. */
78402ae4: 0020c0 memw
78402ae7: 4e8182152e { movi.n a5, 1; l32i a4, a3, 192 }
if( xPendedCounts > ( TickType_t ) 0U )
78402aec: 949c beqz.n a4, 78402b09 <xTaskResumeAll+0x171>
if( xTaskIncrementTick() != pdFALSE )
78402aee: 0020f0 nop
78402af1: 002ea5 call8 78402ddc <xTaskIncrementTick>
78402af4: 4a8c beqz.n a10, 78402afc <xTaskResumeAll+0x164>
xYieldPending = pdTRUE;
78402af6: 0020c0 memw
78402af9: 2d6352 s32i a5, a3, 180
--xPendedCounts;
78402afc: 440b addi.n a4, a4, -1
} while( xPendedCounts > ( TickType_t ) 0U );
78402afe: fec456 bnez a4, 78402aee <xTaskResumeAll+0x156>
xPendedTicks = 0;
78402b01: 0020c0 memw
78402b04: 040c movi.n a4, 0
78402b06: 306342 s32i a4, a3, 192
if( xYieldPending != pdFALSE )
78402b09: 0020c0 memw
78402b0c: 2d2332 l32i a3, a3, 180
78402b0f: eea316 beqz a3, 784029fd <xTaskResumeAll+0x65>
taskYIELD_IF_USING_PREEMPTION();
78402b12: fe8165 call8 78401328 <vPortYield>
78402b15: 1ec3f702216e { movi a2, 1; j 784029fd <xTaskResumeAll+0x65> }
78402b1b <xTaskResumeAll+0x183>:
...
@superroc
Thanks for taking time to report this.
Just wondering if you have checked if the same application code works well with lower optimization settings?
Hi tony,
The same application code works well with lower optimization settings. "optimization level" is O0, "optimization for size" is None.
This seems to be a compiler problem. The code works well after I insert a nop instruction following listGET_OWNER_OF_HEAD_ENTRY when optimization level is O2.
pxTCB = listGET_OWNER_OF_HEAD_ENTRY( ( &xPendingReadyList ) );
__asm__ __volatile__("nop" : : : "memory");
listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
portMEMORY_BARRIER();
listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
prvAddTaskToReadyList( pxTCB );
If this is the only problem, I could circumvent it in this way, but I'm still worried about similar risks elsewhere in the code.
Can you check if portMEMORY_BARRIER
is defined in your portmacro.h?
This macro is undefined in my project
According you tips, I append defination
#define portMEMORY_BARRIER() __asm__ __volatile__("nop" : : : "memory")
in portmacro.h, and I checked the assembly file, it seems work well. Now it's midnight in my country and I don't have EVM board in my hand, I will test the change tomorrow morning.
Thanks a lot!
I have test this solution with different optimization level (O2 and O3) on my project and EVM board, it works well. Thank you very much for your advice.
Is it a already known problem? Should this patch be merged into portable/ThirdParty/XCC/Xtensa/portmacro.h?
I have test this solution with different optimization level (O2 and O3) on my project and EVM board, it works well.
Glad that it works for you.
Should this patch be merged into portable/ThirdParty/XCC/Xtensa/portmacro.h?
Yes, please raise a PR for it.