FreeRTOS/FreeRTOS-Kernel

[BUG] listGET_OWNER_OF_HEAD_ENTRY in fucntion xTaskResumeAll is over-optimized on xtensa platform

superroc opened this issue · 8 comments

Describe the bug
When more than 2 items exist in xPendingReadyList, pxTCB dose not update after the first loop, but retain the first value.

Target

  • Development board: [SoC in developing procedure]
  • Instruction Set Architecture: [tensilica hifi3z]
  • IDE and version: [Xtensa Development Environment Version 10.1.11.3000]
  • Toolchain and version: [RI-2022.9-win32 14.09 patch 20220930767596]

Host

  • Host OS: [windows]
  • Version: [windows 7]

Screenshots
this screenshot shows compile options which does not work well.
1723220925465

Additional context
the following is the compilation result of xTaskResumeAll:

78402998 <xTaskResumeAll>:
{
78402998:	004136               	entry	a1, 32
    configASSERT( uxSchedulerSuspended );
7840299b:	0020c0               	memw
7840299e:	b3fc599d394e              	{ l32r	a3, 78402684 (7821c3c0 <pxReadyTasksLists>); movi	a10, 179 }
784029a4:	7bf8457dbf4e              	{ l32r	a11, 784025a0 (78204b50 <uxTopUsedPriority+0x4>); addmi	a10, a10, 0x800 }
784029aa:	282322               	l32i	a2, a3, 160
784029ad:	42cc                	bnez.n	a2, 784029b5 <xTaskResumeAll+0x1d>
784029af:	0020f0               	nop
784029b2:	0234a5               	call8	78404cfc <vAssertCalled>

784029b5 <xTaskResumeAll+0x1d>:
        if( xSchedulerRunning != pdFALSE )
784029b5:	0020c0               	memw
784029b8:	272322               	l32i	a2, a3, 156
784029bb:	026216               	beqz	a2, 784029e5 <xTaskResumeAll+0x4d>
    uint32_t ps = XT_RSR_PS ();
784029be:	03e620               	rsr.ps	a2
784029c1:	f9f541               	l32r	a4, 78401198 (70f30 <_memmap_cacheattr_wb_base+0x2caee>)
    XT_WSR_PS ((ps & ~0xFU) | (level & 0xFU));
784029c4:	102240               	and	a2, a2, a4
784029c7:	02c222               	addi	a2, a2, 2
784029ca:	13e620               	wsr.ps	a2
    XT_RSYNC ();
784029cd:	002010               	rsync
            ( pxCurrentTCB->uxCriticalNesting )++;
784029d0:	0020c0               	memw
784029d3:	232322               	l32i	a2, a3, 140
784029d6:	102242               	l32i	a4, a2, 64
784029d9:	01c442               	addi	a4, a4, 1
784029dc:	106242               	s32i	a4, a2, 64
            if( pxCurrentTCB->uxCriticalNesting == 1 )
784029df:	0020c0               	memw
784029e2:	232322               	l32i	a2, a3, 140
        --uxSchedulerSuspended;
784029e5:	0020c0               	memw
784029e8:	282322               	l32i	a2, a3, 160
784029eb:	220b                	addi.n	a2, a2, -1
784029ed:	0020c0               	memw
784029f0:	286322               	s32i	a2, a3, 160
        if( uxSchedulerSuspended == ( UBaseType_t ) pdFALSE )
784029f3:	0020c0               	memw
784029f6:	4ac182022e            	{ movi.n	a2, 0; l32i	a4, a3, 160 }
784029fb:	348c                	beqz.n	a4, 78402a02 <xTaskResumeAll+0x6a>
    taskEXIT_CRITICAL();
784029fd:	ffe4e5               	call8	7840284c <vTaskExitCritical>
    return xAlreadyYielded;
78402a00:	f01d                	retw.n

78402a02 <xTaskResumeAll+0x6a>:
            if( uxCurrentNumberOfTasks > ( UBaseType_t ) 0U )
78402a02:	0020c0               	memw
78402a05:	262342               	l32i	a4, a3, 152
78402a08:	ff1416               	beqz	a4, 784029fd <xTaskResumeAll+0x65>
                while( listLIST_IS_EMPTY( &xPendingReadyList ) == pdFALSE )
78402a0b:	0020c0               	memw
78402a0e:	958d09f3434e              	{ l32i	a4, a3, 248; l32i	a6, a3, 0x104 }
78402a14:	40036bc30228010e 	{ movi	a5, 1; nop; beqz.w18	a4, 78402ae4 <xTaskResumeAll+0x14c> }
78402a1c:	3648                	l32i.n	a4, a6, 12
78402a1e:	41da65743e            	{ l32i.n	a7, a4, 48; l32i	a11, a4, 44 }
78402a23:	05e234a42e            	{ l32i.n	a10, a4, 24; addi	a8, a4, 28 }
78402a28:	c481b3c3945e              	{ addi	a9, a4, 8; addx4	a6, a7, a7 }
78402a2e:	a06630               	addx4	a6, a6, a3
                    listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
78402a31:	41c24ec43e            	{ l32i.n	a12, a4, 36; l32i	a13, a4, 32 }
78402a36:	1be8                	l32i.n	a14, a11, 4
78402a38:	2dc9                	s32i.n	a12, a13, 8
78402a3a:	94c8                	l32i.n	a12, a4, 36
78402a3c:	00038681f7681c0e 	{ s32i	a13, a12, 4; nop; bne.w18	a14, a8, 78402a46 <xTaskResumeAll+0xae> }
78402a44:	1bc9                	s32i.n	a12, a11, 4
78402a46:	0020c0               	memw
78402a49:	0bc8                	l32i.n	a12, a11, 0
78402a4b:	cc0b                	addi.n	a12, a12, -1
78402a4d:	0020c0               	memw
78402a50:	0bc9                	s32i.n	a12, a11, 0
                    listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
78402a52:	419a26b42e            	{ l32i.n	a11, a4, 16; l32i	a12, a4, 12 }
78402a57:	1ad8                	l32i.n	a13, a10, 4
78402a59:	2cb9                	s32i.n	a11, a12, 8
78402a5b:	44b8                	l32i.n	a11, a4, 16
78402a5d:	00039681f6e01b0e 	{ s32i	a12, a11, 4; nop; bne.w18	a13, a9, 78402a67 <xTaskResumeAll+0xcf> }
78402a65:	1ab9                	s32i.n	a11, a10, 4
78402a67:	0020c0               	memw
78402a6a:	0ab8                	l32i.n	a11, a10, 0
78402a6c:	bb0b                	addi.n	a11, a11, -1
78402a6e:	0020c0               	memw
78402a71:	0ab9                	s32i.n	a11, a10, 0
                    prvAddTaskToReadyList( pxTCB );
78402a73:	0020c0               	memw
78402a76:	2c23a2               	l32i	a10, a3, 176
78402a79:	05ba77               	bgeu	a10, a7, 78402a82 <xTaskResumeAll+0xea>
78402a7c:	0020c0               	memw
78402a7f:	2c6372               	s32i	a7, a3, 176
78402a82:	16a8                	l32i.n	a10, a6, 4
78402a84:	34a9                	s32i.n	a10, a4, 12
78402a86:	2ab8                	l32i.n	a11, a10, 8
78402a88:	44b9                	s32i.n	a11, a4, 16
78402a8a:	2ab8                	l32i.n	a11, a10, 8
78402a8c:	2a99                	s32i.n	a9, a10, 8
78402a8e:	1b99                	s32i.n	a9, a11, 4
78402a90:	0020c0               	memw
78402a93:	06a8                	l32i.n	a10, a6, 0
78402a95:	aa1b                	addi.n	a10, a10, 1
78402a97:	0020c0               	memw
78402a9a:	06a9                	s32i.n	a10, a6, 0
                    if( pxTCB->uxPriority >= pxCurrentTCB->uxPriority )
78402a9c:	0020c0               	memw
78402a9f:	2323a2               	l32i	a10, a3, 140
78402aa2:	caa8                	l32i.n	a10, a10, 48
78402aa4:	0537a7               	bltu	a7, a10, 78402aad <xTaskResumeAll+0x115>
                        xYieldPending = pdTRUE;
78402aa7:	0020c0               	memw
78402aaa:	2d6352               	s32i	a5, a3, 180
                while( listLIST_IS_EMPTY( &xPendingReadyList ) == pdFALSE )
78402aad:	0020c0               	memw
78402ab0:	4ef1860b2e            	{ movi.n	a11, 0; l32i	a12, a3, 248 }
78402ab5:	06ad                	mov.n	a10, a6
78402ab7:	f76c56               	bnez	a12, 78402a31 <xTaskResumeAll+0x99>
                    listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
78402aba:	820032643e            	{ s32i.n	a6, a4, 24; movi	a5, 0 }
                    listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
78402abf:	b459                	s32i.n	a5, a4, 44
    if( listLIST_IS_EMPTY( pxDelayedTaskList ) != pdFALSE )
78402ac1:	0020c0               	memw
78402ac4:	2a2342               	l32i	a4, a3, 168
78402ac7:	0020c0               	memw
78402aca:	0448                	l32i.n	a4, a4, 0
78402acc:	40036a8f8226ff0e 	{ movi	a4, -1; nop; beqz.w18	a4, 78402ade <xTaskResumeAll+0x146> }
        xNextTaskUnblockTime = listGET_ITEM_VALUE_OF_HEAD_ENTRY( pxDelayedTaskList );
78402ad4:	0020c0               	memw
78402ad7:	2a2342               	l32i	a4, a3, 168
78402ada:	3448                	l32i.n	a4, a4, 12
78402adc:	0448                	l32i.n	a4, a4, 0
78402ade:	0020c0               	memw
78402ae1:	2f6342               	s32i	a4, a3, 188
                    TickType_t xPendedCounts = xPendedTicks; /* Non-volatile copy. */
78402ae4:	0020c0               	memw
78402ae7:	4e8182152e            	{ movi.n	a5, 1; l32i	a4, a3, 192 }
                    if( xPendedCounts > ( TickType_t ) 0U )
78402aec:	949c                	beqz.n	a4, 78402b09 <xTaskResumeAll+0x171>
                            if( xTaskIncrementTick() != pdFALSE )
78402aee:	0020f0               	nop
78402af1:	002ea5               	call8	78402ddc <xTaskIncrementTick>
78402af4:	4a8c                	beqz.n	a10, 78402afc <xTaskResumeAll+0x164>
                                xYieldPending = pdTRUE;
78402af6:	0020c0               	memw
78402af9:	2d6352               	s32i	a5, a3, 180
                            --xPendedCounts;
78402afc:	440b                	addi.n	a4, a4, -1
                        } while( xPendedCounts > ( TickType_t ) 0U );
78402afe:	fec456               	bnez	a4, 78402aee <xTaskResumeAll+0x156>
                        xPendedTicks = 0;
78402b01:	0020c0               	memw
78402b04:	040c                	movi.n	a4, 0
78402b06:	306342               	s32i	a4, a3, 192
                if( xYieldPending != pdFALSE )
78402b09:	0020c0               	memw
78402b0c:	2d2332               	l32i	a3, a3, 180
78402b0f:	eea316               	beqz	a3, 784029fd <xTaskResumeAll+0x65>
                    taskYIELD_IF_USING_PREEMPTION();
78402b12:	fe8165               	call8	78401328 <vPortYield>
78402b15:	1ec3f702216e              	{ movi	a2, 1; j	784029fd <xTaskResumeAll+0x65> }

78402b1b <xTaskResumeAll+0x183>:
	...

@superroc
Thanks for taking time to report this.
Just wondering if you have checked if the same application code works well with lower optimization settings?

Hi tony,

The same application code works well with lower optimization settings. "optimization level" is O0, "optimization for size" is None.

This seems to be a compiler problem. The code works well after I insert a nop instruction following listGET_OWNER_OF_HEAD_ENTRY when optimization level is O2.

pxTCB = listGET_OWNER_OF_HEAD_ENTRY( ( &xPendingReadyList ) );
__asm__ __volatile__("nop" : : : "memory");
listREMOVE_ITEM( &( pxTCB->xEventListItem ) );
portMEMORY_BARRIER();
listREMOVE_ITEM( &( pxTCB->xStateListItem ) );
prvAddTaskToReadyList( pxTCB );

If this is the only problem, I could circumvent it in this way, but I'm still worried about similar risks elsewhere in the code.

Can you check if portMEMORY_BARRIER is defined in your portmacro.h?

This macro is undefined in my project

According you tips, I append defination
#define portMEMORY_BARRIER() __asm__ __volatile__("nop" : : : "memory")
in portmacro.h, and I checked the assembly file, it seems work well. Now it's midnight in my country and I don't have EVM board in my hand, I will test the change tomorrow morning.
Thanks a lot!

I have test this solution with different optimization level (O2 and O3) on my project and EVM board, it works well. Thank you very much for your advice.

Is it a already known problem? Should this patch be merged into portable/ThirdParty/XCC/Xtensa/portmacro.h?

I have test this solution with different optimization level (O2 and O3) on my project and EVM board, it works well.

Glad that it works for you.

Should this patch be merged into portable/ThirdParty/XCC/Xtensa/portmacro.h?

Yes, please raise a PR for it.