sfwa/ukf

Improve X8 dynamics performance on C6657

Closed this issue · 0 comments

Currently the X8 dynamics model appears to compile to suboptimal code. It's called 4 times for every sigma point for every timestamp, i.e. 196 times per millisecond, so if it currently takes (say) 500 cycles that's about 25% of the overall CPU usage.

At the moment the output code shows a local stack frame size of 388 bytes, which seems high, as well as very little pipelining going on (particularly in the mul_quat_vec3 call and the acceleration/angular acceleration update step):

;******************************************************************************
;* FUNCTION NAME: _ukf_state_x8_dynamics                                      *
;*                                                                            *
;*   Regs Modified     : A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,B0,*
;*                           B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,SP,   *
;*                           A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27, *
;*                           A28,A29,A30,A31,B16,B17,B18,B19,B20,B21,B22,B23, *
;*                           B24,B25,B26,B27,B28,B29,B30,B31                  *
;*   Regs Used         : A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,B0,*
;*                           B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,DP,SP,*
;*                           A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27, *
;*                           A28,A29,A30,A31,B16,B17,B18,B19,B20,B21,B22,B23, *
;*                           B24,B25,B26,B27,B28,B29,B30,B31                  *
;*   Local Frame Size  : 0 Args + 388 Auto + 48 Save = 436 byte               *
;******************************************************************************
_ukf_state_x8_dynamics:
;** --------------------------------------------------------------------------*
;----------------------------------------------------------------------
; 447 | real_t *restrict const control) {                                      
; 448 | assert(in && control);                                                 
; 449 | _nassert((size_t)in % 8 == 0);                                         
; 450 | _nassert((size_t)control % 8 == 0);                                    
; 452 | real_t yaw_rate = in->angular_velocity[Z],                             
; 453 |        pitch_rate = in->angular_velocity[Y],                           
; 454 |        roll_rate = in->angular_velocity[X];                            
; 457 | real_t ned_airflow[3], airflow[3];                                     
; 458 | real_t v2, v_inv, horizontal_v2, vertical_v2, vertical_v, vertical_v_in
;     | v,                                                                     
; 459 |        airflow_x2, airflow_y2, airflow_z2;                             
; 461 | ned_airflow[X] = in->wind_velocity[X] - in->velocity[X];               
; 462 | ned_airflow[Y] = in->wind_velocity[Y] - in->velocity[Y];               
;----------------------------------------------------------------------
           STW     .D2T1   A14,*SP--(8)      ; |447| 
           STDW    .D2T2   B13:B12,*SP--     ; |447| 
           STDW    .D2T2   B11:B10,*SP--     ; |447| 
           STDW    .D2T1   A13:A12,*SP--     ; |447| 

           STDW    .D2T1   A11:A10,*SP--     ; |447| 
||         MV      .L1     A4,A10            ; |447| 

           STW     .D2T2   B3,*SP--(8)       ; |447| 
||         ADDAD   .D1     A10,9,A11         ; |464| 

;----------------------------------------------------------------------
; 463 | ned_airflow[Z] = in->wind_velocity[Z] - in->velocity[Z];               
;----------------------------------------------------------------------
           LDDW    .D1T1   *+A10(40),A19:A18 ; |463| 
           LDDW    .D1T1   *+A10(168),A21:A20 ; |463| 
           LDDW    .D1T1   *+A10(160),A17:A16 ; |462| 
           LDDW    .D1T1   *+A10(32),A7:A6   ; |462| 
           LDDW    .D1T1   *+A10(152),A9:A8  ; |461| 
           LDDW    .D1T1   *+A10(24),A5:A4   ; |461| 
           LDDW    .D1T1   *+A11(8),A25:A24  ; |177| 

           MV      .L1X    B4,A14            ; |447| 
||         LDDW    .D1T2   *+A11(16),B5:B4   ; |177| 

;----------------------------------------------------------------------
; 464 | _mul_quat_vec3(airflow, in->attitude, ned_airflow);                    
;----------------------------------------------------------------------
           FSUBDP  .L1     A21:A20,A19:A18,A19:A18 ; |463| 

           FSUBDP  .L1     A17:A16,A7:A6,A7:A6 ; |462| 
||         LDDW    .D1T1   *A11,A17:A16      ; |177| 

           FSUBDP  .L1     A9:A8,A5:A4,A5:A4 ; |461| 
           FMPYDP  .M1     A19:A18,A25:A24,A29:A28 ; |177| 
           FMPYDP  .M1X    A7:A6,B5:B4,A9:A8 ; |177| 
           FMPYDP  .M1     A5:A4,A25:A24,A21:A20 ; |177| 
           FMPYDP  .M1     A7:A6,A17:A16,A27:A26 ; |177| 
           FMPYDP  .M1     A19:A18,A17:A16,A23:A22 ; |178| 
           FMPYDP  .M2X    A5:A4,B5:B4,B7:B6 ; |178| 
           FSUBDP  .L1     A29:A28,A9:A8,A9:A8 ; |177| 
           FSUBDP  .L1     A27:A26,A21:A20,A21:A20 ; |177| 
           LDDW    .D1T1   *+A11(24),A27:A26 ; |177| 
           FSUBDP  .L2X    B7:B6,A23:A22,B7:B6 ; |178| 
           FADDDP  .L1     A9:A8,A9:A8,A9:A8 ; |177| 
           FADDDP  .L1     A21:A20,A21:A20,A23:A22 ; |177| 
           FADDDP  .L2     B7:B6,B7:B6,B7:B6 ; |178| 
           FMPYDP  .M1     A9:A8,A27:A26,A29:A28 ; |177| 
           FMPYDP  .M1     A23:A22,A27:A26,A21:A20 ; |181| 
           DADD    .L2X    0,A25:A24,B9:B8   ; |177| Define a twin register
           FMPYDP  .M1X    B7:B6,A27:A26,A27:A26 ; |178| 

           FADDDP  .L1     A29:A28,A5:A4,A29:A28 ; |177| 
||         FMPYDP  .M2X    A23:A22,B9:B8,B9:B8 ; |177| 

           FMPYDP  .M1     A9:A8,A25:A24,A25:A24 ; |181| 

           FMPYDP  .M1X    A9:A8,B5:B4,A9:A8 ; |178| 
||         FADDDP  .L1     A21:A20,A19:A18,A21:A20 ; |181| 

           FADDDP  .L1     A27:A26,A7:A6,A31:A30 ; |178| 
||         FMPYDP  .M2     B7:B6,B5:B4,B5:B4 ; |179| 

           FMPYDP  .M1X    B7:B6,A17:A16,A27:A26 ; |181| 
||         FADDDP  .L2X    B9:B8,A29:A28,B7:B6 ; |177| 

           FMPYDP  .M1     A23:A22,A17:A16,A17:A16 ; |180| 
||         FSUBDP  .L1     A21:A20,A25:A24,A21:A20 ; |181| 

           FADDDP  .L1     A9:A8,A31:A30,A9:A8 ; |178| 
           FSUBDP  .L2     B7:B6,B5:B4,B5:B4 ; |179| 

           FADDDP  .L1     A27:A26,A21:A20,A13:A12 ; |181| 
||         ADDK    .S2     -400,SP           ; |447| 

           FSUBDP  .L1     A9:A8,A17:A16,A9:A8 ; |180| 
||         ADDAW   .D2     SP,12,B6          ; |185| 

           STDW    .D2T2   B5:B4,*B6         ; |185| 
           STDW    .D2T1   A13:A12,*+B6(16)  ; |187| 
           STDW    .D2T1   A9:A8,*+B6(8)     ; |186| 
;----------------------------------------------------------------------
; 470 | airflow_x2 = airflow[X]*airflow[X];                                    
; 471 | airflow_y2 = airflow[Y]*airflow[Y];                                    
;----------------------------------------------------------------------
           LDDW    .D2T2   *+SP(48),B5:B4    ; |470| 
;----------------------------------------------------------------------
; 472 | airflow_z2 = airflow[Z]*airflow[Z];                                    
;----------------------------------------------------------------------
           FMPYDP  .M1     A13:A12,A13:A12,A9:A8 ; |472| 
           STDW    .D2T1   A5:A4,*+SP(16)    ; |461| 
           LDDW    .D1T1   *+A10(104),A5:A4  ; |454| 
           STDW    .D2T1   A19:A18,*+SP(32)  ; |463| 
           STDW    .D2T2   B5:B4,*+SP(184)   ; |470| 
;----------------------------------------------------------------------
; 474 | v2 = airflow_x2 + airflow_y2 + airflow_z2;                             
; 475 | v_inv = sqrt_inv(max(1.0, v2));                                        
; 477 | horizontal_v2 = airflow_y2 + airflow_x2;                               
; 478 | vertical_v2 = airflow_z2 + airflow_x2;                                 
;----------------------------------------------------------------------
           LDDW    .D2T2   *+SP(184),B7:B6   ; |472| 
           STDW    .D2T1   A7:A6,*+SP(24)    ; |462| 
           STDW    .D2T1   A5:A4,*+SP(152)   ; |454| 
           LDDW    .D1T1   *+A10(112),A5:A4  ; |453| 
           NOP             1
           FMPYDP  .M2     B7:B6,B5:B4,B5:B4 ; |470| 
           NOP             3
           STDW    .D2T2   B5:B4,*+SP(144)   ; |470| 

           FADDDP  .L2X    B5:B4,A9:A8,B11:B10 ; |479| 
||         LDDW    .D1T2   *+A10(120),B5:B4  ; |452| 

           STDW    .D2T1   A5:A4,*+SP(160)   ; |471| 
           NOP             3
           STDW    .D2T2   B5:B4,*+SP(168)   ; |452| 
           LDDW    .D2T2   *+SP(56),B5:B4    ; |471| 
;----------------------------------------------------------------------
; 479 | vertical_v = sqrt(vertical_v2);                                        
; 480 | vertical_v_inv = sqrt_inv(max(1.0, vertical_v2));                      
; 483 | real_t thrust, ve = 0.0025 * control[0];                               
; 484 | thrust = 0.5 * RHO * 0.025 * (ve * ve - airflow_x2);                   
; 485 | if (thrust < 0.0) {                                                    
; 486 |     thrust = 0.0;                                                      
; 490 | real_t alpha, sin_alpha, cos_alpha, sin_beta, cos_beta, a2, sin_cos_alp
;     | ha;                                                                    
;----------------------------------------------------------------------
           DADD    .L1X    0,B11:B10,A5:A4   ; |479| 
           NOP             3

           CALLP   .S2     sqrt,B3
||         STDW    .D2T2   B5:B4,*+SP(192)   ; |471| 

$C$RL14:   ; CALL OCCURS {sqrt} {0}          ; |479| 
;** --------------------------------------------------------------------------*
           STDW    .D2T1   A13:A12,*+SP(176) ; |475| 
           LDW     .D2T1   *+SP(176),A3      ; |475| 
;----------------------------------------------------------------------
; 491 | alpha = atan2(-airflow[Z], -airflow[X]);                               
;----------------------------------------------------------------------
           LDDW    .D2T1   *+SP(184),A13:A12 ; |491| 
           ZERO    .L2     B4                ; |491| 
           SET     .S2     B4,31,31,B4       ; |491| 
           STDW    .D2T1   A5:A4,*+SP(200)   ; |479| 

           STW     .D2T1   A3,*+SP(224)      ; |491| 
||         ZERO    .L1     A3                ; |491| 

           SET     .S1     A3,31,31,A4       ; |491| 
||         XOR     .L1X    A13,B4,A3         ; |491| 

;----------------------------------------------------------------------
; 493 | sin_alpha = -airflow[Z] * vertical_v_inv;                              
; 494 | cos_alpha = -airflow[X] * vertical_v_inv;                              
; 495 | sin_beta = airflow[Y] * v_inv;                                         
; 496 | cos_beta = vertical_v * v_inv;                                         
; 497 | sin_cos_alpha = sin_alpha * cos_alpha;                                 
; 499 | a2 = alpha * alpha;                                                    
; 502 | real_t lift, drag, side_force, pitch_moment, yaw_moment, roll_moment;  
; 504 | lift = -5 * a2 * alpha + a2 + 2.5 * alpha + 0.12;                      
; 505 | if (alpha < 0.0) {                                                     
; 506 |     lift = min(lift, 0.8 * sin_cos_alpha);                             
; 507 | } else {                                                               
; 508 |     lift = max(lift, 0.8 * sin_cos_alpha);                             
; 511 | drag = 0.05 + 0.7 * sin_alpha * sin_alpha;                             
; 512 | side_force = 0.3 * sin_beta * cos_beta;                                
; 514 | pitch_moment = 0.001 - 0.1 * sin_cos_alpha - 0.003 * pitch_rate -      
; 515 |                0.01 * (control[1] + control[2]);                       
; 516 | roll_moment = -0.03 * sin_beta - 0.015 * roll_rate +                   
; 517 |               0.025 * (control[1] - control[2]);                       
; 518 | yaw_moment = -0.02 * sin_beta - 0.05 * yaw_rate -                      
; 519 |              0.01 * (absval(control[1]) + absval(control[2]));         
; 522 | real_t qbar = RHO * horizontal_v2 * 0.5,                               
; 523 |        x_aero_f = qbar * (lift * sin_alpha - drag * cos_alpha -        
; 524 |                           side_force * sin_beta),                      
; 525 |        y_aero_f = qbar * side_force * cos_beta,                        
; 526 |        z_aero_f = qbar * (lift * cos_alpha + drag * sin_alpha);        
;----------------------------------------------------------------------
           STW     .D2T1   A3,*+SP(220)      ; |491| 
           LDW     .D2T1   *+SP(180),A3      ; |491| 
           MV      .L2X    A14,B12           ; |447| 
           STW     .D2T1   A12,*+SP(216)     ; |491| 
           LDDW    .D2T2   *+SP(216),B5:B4   ; |484| 
           NOP             1

           XOR     .L1     A3,A4,A3          ; |491| 
||         LDDW    .D2T1   *B12,A5:A4        ; |484| 

           STW     .D2T1   A3,*+SP(228)      ; |484| 
           NOP             3
           STDW    .D2T1   A5:A4,*+SP(208)   ; |484| 

           CALLP   .S2     atan2,B3
||         LDDW    .D2T1   *+SP(224),A5:A4   ; |484| 

$C$RL15:   ; CALL OCCURS {atan2} {0}         ; |491| 
;** --------------------------------------------------------------------------*
           LDDW    .D2T2   *+SP(192),B19:B18 ; |475| 
           LDDW    .D2T1   *+SP(176),A7:A6   ; |480| 
           LDDW    .D2T1   *+SP(176),A17:A16 ; |480| 
           FMPYDP  .M1     A13:A12,A13:A12,A19:A18 ; |475| 
           ZERO    .L2     B9
           FMPYDP  .M2     B19:B18,B19:B18,B7:B6 ; |475| 
           SET     .S2     B9,0x14,0x1d,B9
           FMPYDP  .M1     A17:A16,A7:A6,A7:A6 ; |475| 
           MV      .L1X    B9,A9             ; |480| 
           FADDDP  .L2X    B7:B6,A19:A18,B7:B6 ; |475| 
           ZERO    .L1     A8                ; |480| 
           CMPLTDP .S1X    B11:B10,A9:A8,A0  ; |480| 
           ZERO    .L1     A3
           FADDDP  .L1X    A7:A6,B7:B6,A7:A6 ; |475| 

           MV      .L2     B9,B5             ; |480| 
||         SET     .S1     A3,0x15,0x1d,A3
||         ZERO    .S2     B4                ; |480| 
||         ZERO    .D2     B8                ; |480| 
||         ZERO    .L1     A2                ; |480| 

   [!A0]   DADD    .L2     0,B11:B10,B5:B4   ; |480| 

           CMPLTDP .S1X    A7:A6,B9:B8,A0    ; |475| 
||         FMPYDP  .M2X    A3:A2,B5:B4,B17:B16 ; |21| 
||         STW     .D2T1   A3,*+SP(188)      ; |21| 
||         MV      .L1     A2,A3             ; |475| 

           STW     .D2T1   A3,*+SP(184)      ; |475| 

   [!A0]   DADD    .L1     0,A7:A6,A9:A8     ; |475| 
||         LDDW    .D2T1   *+SP(184),A7:A6   ; |475| 

           RSQRDP  .S2     B5:B4,B7:B6       ; |21| 
           RSQRDP  .S1     A9:A8,A19:A18     ; |21| 
           FMPYDP  .M2     B7:B6,B17:B16,B9:B8 ; |21| 
           ZERO    .L2     B27
           FMPYDP  .M1     A7:A6,A9:A8,A7:A6 ; |21| 
           SET     .S2     B27,0x13,0x1d,B27
           FMPYDP  .M2     B7:B6,B9:B8,B9:B8 ; |21| 
           ZERO    .L2     B26               ; |480| 
           FMPYDP  .M1     A19:A18,A7:A6,A17:A16 ; |21| 
;----------------------------------------------------------------------
; 531 | real_t g_accel[3], g[3] = { 0, 0, G_ACCEL };                           
;----------------------------------------------------------------------
           ADDAW   .D1X    DP,($P$T2$3),A3   ; |531| 
           FSUBDP  .L2     B27:B26,B9:B8,B9:B8 ; |21| 
           ZERO    .L2     B29:B28           ; |14| 
           FMPYDP  .M1     A19:A18,A17:A16,A17:A16 ; |21| 
           FMPYDP  .M2     B7:B6,B9:B8,B9:B8 ; |21| 
           CMPLTDP .S2     B5:B4,B29:B28,B0  ; |14| 
           NOP             2
           FSUBDP  .L2X    B27:B26,A17:A16,B7:B6 ; |21| 
           FMPYDP  .M2     B9:B8,B17:B16,B21:B20 ; |21| 
           LDDW    .D2T1   *+B12(8),A17:A16  ; |514| 
           FMPYDP  .M2X    A19:A18,B7:B6,B7:B6 ; |21| 
;----------------------------------------------------------------------
; 532 | _mul_quat_vec3(g_accel, in->attitude, g);                              
;----------------------------------------------------------------------
           MVK     .S2     45,B4             ; |531| 
           FMPYDP  .M2     B9:B8,B21:B20,B23:B22 ; |21| 
           MVKL    .S2     0x3fd33333,B13
           FMPYDP  .M2X    B7:B6,A7:A6,B21:B20 ; |21| 
           MVKL    .S2     0x3f999999,B31
           FSUBDP  .L2     B27:B26,B23:B22,B25:B24 ; |21| 
           MVKH    .S2     0x3fd33333,B13
           FMPYDP  .M2     B7:B6,B21:B20,B23:B22 ; |21| 
           FMPYDP  .M2     B9:B8,B25:B24,B21:B20 ; |21| 
           MVKH    .S2     0x3f999999,B31
           NOP             1
           FSUBDP  .L2     B27:B26,B23:B22,B9:B8 ; |21| 
           FMPYDP  .M2     B21:B20,B17:B16,B17:B16 ; |21| 
           MV      .S2     B27,B23           ; |21| 
           FMPYDP  .M2     B7:B6,B9:B8,B9:B8 ; |21| 
           MV      .L2     B26,B22           ; |21| 
           FMPYDP  .M2     B21:B20,B17:B16,B17:B16 ; |21| 
           ZERO    .L2     B26               ; |21| 
           FMPYDP  .M2X    B9:B8,A7:A6,B7:B6 ; |21| 
           MVKL    .S1     0x7fefffff,A7
           FSUBDP  .L2     B23:B22,B17:B16,B23:B22 ; |21| 
           MVK     .S2     34,B16            ; |531| 

           FMPYDP  .M2     B9:B8,B7:B6,B25:B24 ; |21| 
||         MVK     .S2     33,B6             ; |514| 

           STDW    .D2T1   A17:A16,*+SP[B6]  ; |514| 
           LDDW    .D2T1   *+B12(16),A17:A16 ; |514| 
           LDDW    .D1T2   *A3,B7:B6         ; |531| 
           FMPYDP  .M2     B21:B20,B23:B22,B3:B2 ; |21| 
           ADDAW   .D2     SP,28,B21         ; |531| 
           MVKH    .S1     0x7fefffff,A7
           STDW    .D2T1   A17:A16,*+SP[B16] ; |531| 
           LDDW    .D1T2   *+A3(16),B17:B16  ; |531| 

           STDW    .D2T2   B7:B6,*B21        ; |531| 
||         DADD    .L2X    0,A5:A4,B7:B6     ; |491| 

           LDDW    .D1T1   *+A3(8),A17:A16   ; |531| 
||         STDW    .D2T2   B7:B6,*+SP[B4]    ; |531| 

           LDW     .D2T2   *+SP(360),B6      ; |531| 
           LDW     .D2T2   *+SP(364),B7      ; |531| 
           MVK     .L1     0xffffffff,A6     ; |15| 
           FSUBDP  .L2     B27:B26,B25:B24,B23:B22 ; |21| 
   [ B0]   DADD    .L2X    0,A7:A6,B3:B2     ; |15| 
           MV      .S2     B6,B4             ; |531| 
           MV      .L2     B7,B5             ; |531| 

           FMPYDP  .M2     B7:B6,B5:B4,B25:B24 ; |504| 
||         ZERO    .L2     B5:B4             ; |14| 

           CMPLTDP .S2X    A9:A8,B5:B4,B0    ; |14| 
           STDW    .D2T2   B17:B16,*+B21(16) ; |531| 
           MVKL    .S2     0x47ae147b,B4

           MVKH    .S2     0x47ae147b,B4
||         STDW    .D2T1   A17:A16,*+B21(8)  ; |531| 

           STW     .D2T2   B4,*+SP(176)      ; |484| 
||         MVKL    .S2     0x3f647ae1,B4

           MVKH    .S2     0x3f647ae1,B4
||         LDDW    .D2T2   *+SP(192),B7:B6   ; |535| 

           STW     .D2T2   B4,*+SP(180)      ; |484| 
;----------------------------------------------------------------------
; 535 | in->acceleration[X] = (thrust + x_aero_f) * 0.26315789473684 + g_accel[
;     | X];                                                                    
;----------------------------------------------------------------------
           LDDW    .D2T2   *+SP(192),B5:B4   ; |535| 
           FMPYDP  .M2     B9:B8,B23:B22,B23:B22 ; |21| 
           ADDAW   .D2     SP,28,B8          ; |177| 
;----------------------------------------------------------------------
; 536 | in->acceleration[Y] = y_aero_f * 0.26315789473684 + g_accel[Y];        
; 537 | in->acceleration[Z] = -z_aero_f * 0.26315789473684 + g_accel[Z];       
;----------------------------------------------------------------------
           LDDW    .D2T1   *+SP(224),A5:A4   ; |535| 
           ZERO    .L1     A17

           FMPYDP  .M2     B7:B6,B5:B4,B11:B10 ; |471| 
||         LDDW    .D2T2   *+B8(8),B5:B4     ; |177| 

           LDDW    .D2T2   *B8,B7:B6         ; |177| 
           LDDW    .D1T1   *+A11(8),A19:A18  ; |177| 
           MVKH    .S1     0xc0140000,A17
           ZERO    .L1     A16               ; |504| 

           FMPYDP  .M2X    B3:B2,A5:A4,B17:B16 ; |493| 
||         LDDW    .D1T1   *A11,A5:A4        ; |177| 
||         STDW    .D2T2   B5:B4,*+SP(248)   ; |471| 
||         MVK     .S2     32,B4             ; |535| 

   [ B0]   DADD    .L2X    0,A7:A6,B23:B22   ; |15| 
||         FMPYDP  .M1X    A17:A16,B25:B24,A7:A6 ; |504| 
||         LDDW    .D1T1   *+A11(16),A17:A16 ; |177| 
||         STDW    .D2T2   B7:B6,*+SP[B4]    ; |535| 
||         MVKL    .S2     0x3f847ae1,B4

           MVKH    .S2     0x3f847ae1,B4
           STW     .D2T2   B4,*+SP(284)      ; |514| 
           LDDW    .D2T2   *+SP(208),B7:B6   ; |504| 
           LDDW    .D2T2   *+SP(176),B5:B4   ; |504| 
           STDW    .D2T1   A19:A18,*+SP(224) ; |471| 
           STDW    .D2T1   A5:A4,*+SP(232)   ; |177| 
           STDW    .D2T1   A17:A16,*+SP(240) ; |535| 
           LDDW    .D2T1   *+SP(240),A19:A18 ; |177| 

           FMPYDP  .M2     B5:B4,B7:B6,B27:B26 ; |484| 
||         LDDW    .D2T2   *+B8(16),B5:B4    ; |177| 

           LDDW    .D2T2   *+SP(248),B7:B6   ; |504| 
           LDW     .D2T2   *+SP(256),B28     ; |177| 
           LDW     .D2T2   *+SP(260),B29     ; |177| 
           MVKL    .S2     0x66666666,B20
           STDW    .D2T2   B5:B4,*+SP(208)   ; |484| 
           LDW     .D2T2   *+SP(360),B4      ; |484| 
           LDW     .D2T2   *+SP(364),B5      ; |484| 
           LDDW    .D2T1   *+SP(248),A17:A16 ; |177| 
           FMPYDP  .M2X    B29:B28,A19:A18,B29:B28 ; |178| 
           MVKH    .S2     0x66666666,B20
           MVKL    .S1     0x3fa99999,A3

           FMPYDP  .M2X    B5:B4,A7:A6,B5:B4 ; |504| 
||         LDDW    .D2T1   *+SP(232),A7:A6   ; |504| 

           MVKL    .S2     0x3fe66666,B21
           MVKH    .S1     0x3fa99999,A3
           MVKH    .S2     0x3fe66666,B21
           STW     .D2T1   A3,*+SP(316)      ; |518| 

           FMPYDP  .M2X    B7:B6,A7:A6,B9:B8 ; |177| 
||         LDW     .D2T2   *+SP(256),B6      ; |177| 

           LDW     .D2T2   *+SP(260),B7      ; |177| 
           LDDW    .D2T1   *+SP(224),A7:A6   ; |177| 
           FMPYDP  .M2     B21:B20,B17:B16,B1:B0 ; |535| 
           MVK     .S2     38,B21            ; |178| 
           LDW     .D2T1   *+SP(176),A3      ; |516| 
           STDW    .D2T2   B29:B28,*+SP[B21] ; |178| 

           FMPYDP  .M2X    B7:B6,A7:A6,B7:B6 ; |177| 
||         LDDW    .D2T1   *+SP(240),A7:A6   ; |177| 

           LDDW    .D2T2   *+SP(208),B29:B28 ; |178| 
           MVKL    .S1     0x9999999a,A4
           MVKH    .S1     0x9999999a,A4
           STW     .D2T1   A3,*+SP(288)      ; |514| 

           FMPYDP  .M1     A17:A16,A7:A6,A29:A28 ; |177| 
||         LDDW    .D2T1   *+SP(232),A7:A6   ; |178| 

           MV      .L1     A4,A3             ; |508| 
           STW     .D2T1   A3,*+SP(344)      ; |508| 
           LDW     .D2T1   *+SP(316),A3      ; |508| 
           DADD    .L1X    0,B23:B22,A21:A20 ; |24| 

           FMPYDP  .M2X    B29:B28,A7:A6,B23:B22 ; |178| 
||         LDW     .D2T1   *+SP(360),A6      ; |535| 

           LDW     .D2T1   *+SP(364),A7      ; |535| 
           ZERO    .L1     A9
           STW     .D2T1   A3,*+SP(348)      ; |535| 
           MVKH    .S1     0x40040000,A9

           ZERO    .L1     A8                ; |504| 
||         LDW     .D2T1   *+SP(176),A3      ; |516| 

           FMPYDP  .M1     A9:A8,A7:A6,A31:A30 ; |504| 
||         LDDW    .D2T1   *+SP(208),A9:A8   ; |508| 

           LDDW    .D2T1   *+SP(224),A7:A6   ; |508| 

           DADD    .L1X    0,B23:B22,A27:A26 ; |178| 
||         LDDW    .D2T2   *+SP(144),B23:B22 ; |508| 
||         MVKL    .S2     0x33333333,B12

           MVKH    .S2     0x33333333,B12

           FMPYDP  .M2X    A21:A20,B19:B18,B19:B18 ; |495| 
||         MVKL    .S2     0xeb851eb8,B20

           STW     .D2T1   A3,*+SP(320)      ; |514| 
||         MVKH    .S2     0xeb851eb8,B20

           FMPYDP  .M1     A9:A8,A7:A6,A25:A24 ; |177| 
||         LDDW    .D2T1   *+SP(216),A9:A8   ; |535| 
||         MVKL    .S2     0xbf947ae1,B21

           FADDDP  .L2     B23:B22,B11:B10,B23:B22 ; |477| 
||         FMPYDP  .M2     B17:B16,B1:B0,B11:B10 ; |535| 
||         LDW     .D2T1   *+SP(284),A3      ; |514| 
||         MVKH    .S2     0xbf947ae1,B21

           FMPYDP  .M2     B13:B12,B19:B18,B1:B0 ; |535| 
||         STW     .D2T2   B21,*+SP(292)     ; |518| 
||         MVKL    .S2     0x3fbeb851,B21

           MVKH    .S2     0x3fbeb851,B21
||         STW     .D2T2   B20,*+SP(296)     ; |514| 

           LDW     .D2T1   *+SP(268),A7      ; |535| 
||         MVKL    .S2     0x3ff39999,B29

           FMPYDP  .M1X    B3:B2,A9:A8,A9:A8 ; |494| 
||         MVK     .S2     42,B2             ; |535| 
||         LDW     .D2T1   *+SP(264),A6      ; |535| 

           STDW    .D2T2   B1:B0,*+SP[B2]    ; |535| 
||         MVKL    .S2     0x3f8eb851,B0
||         MVKL    .S1     0x3fe99999,A5

           MVKH    .S2     0x3f8eb851,B0
||         STW     .D2T1   A3,*+SP(324)      ; |518| 
||         MVKH    .S1     0x3fe99999,A5

           STW     .D2T2   B0,*+SP(300)      ; |516| 
||         MVKL    .S2     0x3fb99999,B0
||         MVKL    .S1     0xbf9eb851,A3

           MVKH    .S2     0x3fb99999,B0
||         LDW     .D2T1   *+SP(272),A16     ; |514| 
||         MVKH    .S1     0xbf9eb851,A3
||         MV      .L2X    A4,B30            ; |508| 

           STW     .D2T2   B0,*+SP(220)      ; |514| 
||         ABSDP   .S1     A7:A6,A19:A18     ; |518| 
||         MVKH    .S2     0x3ff39999,B29
||         MV      .L2X    A4,B28            ; |508| 
||         MV      .L1X    B20,A2            ; |514| 

;** --------------------------------------------------------------------------*

           FMPYDP  .M2     B27:B26,B27:B26,B5:B4 ; |484| 
||         FADDDP  .L2     B25:B24,B5:B4,B27:B26 ; |504| 

           LDW     .D2T2   *+SP(308),B25     ; |508| 
           LDW     .D2T2   *+SP(304),B24     ; |508| 
           FSUBDP  .L2     B9:B8,B7:B6,B9:B8 ; |177| 

           STDW    .D2T2   B5:B4,*+SP(192)   ; |484| 
||         MV      .L2X    A4,B4             ; |508| 

           STW     .D2T2   B4,*+SP(312)      ; |508| 

           STW     .D2T2   B4,*+SP(216)      ; |508| 
||         DADD    .S2X    0,A27:A26,B5:B4   ; |508| 

           FADDDP  .L2     B9:B8,B9:B8,B5:B4 ; |177| 
||         FSUBDP  .S2     B25:B24,B5:B4,B9:B8 ; |178| 

           FMPYDP  .M2     B29:B28,B23:B22,B23:B22 ; |535| 
           LDDW    .D1T2   *+A11(24),B7:B6   ; |177| 

           LDW     .D2T1   *+SP(276),A17     ; |514| 
||         MVK     .S2     47,B24            ; |535| 

           STDW    .D2T2   B5:B4,*+SP[B24]   ; |535| 

           MVK     .S2     41,B4             ; |535| 
||         LDW     .D2T2   *+SP(292),B5      ; |535| 

           STDW    .D2T2   B23:B22,*+SP[B4]  ; |535| 
           LDW     .D2T2   *+SP(288),B4      ; |535| 
           FSUBDP  .L1     A7:A6,A17:A16,A7:A6 ; |516| 
           MVK     .S2     38,B22            ; |518| 

           FSUBDP  .L1     A25:A24,A29:A28,A17:A16 ; |177| 
||         ABSDP   .S1     A17:A16,A25:A24   ; |518| 

;----------------------------------------------------------------------
; 550 | in->angular_acceleration[X] = qbar *                                   
;----------------------------------------------------------------------
           LDW     .D2T1   *+SP(348),A29     ; |550| 
           FMPYDP  .M2     B5:B4,B19:B18,B5:B4 ; |518| 
           MVKL    .S1     0x7e039104,A28
           MVKH    .S1     0x7e039104,A28
;----------------------------------------------------------------------
; 551 | (3.364222 * roll_moment + 0.27744448 * yaw_moment);                    
;----------------------------------------------------------------------
           STW     .D2T1   A28,*+SP(352)     ; |550| 

           STDW    .D2T2   B5:B4,*+SP[B22]   ; |518| 
||         FMPYDP  .M2X    B31:B30,A7:A6,B5:B4 ; |516| 

           MVK     .S2     36,B22            ; |516| 
           LDW     .D2T1   *+SP(344),A28     ; |550| 
           LDDW    .D2T1   *+SP(200),A7:A6   ; |516| 
           STDW    .D2T2   B5:B4,*+SP[B22]   ; |516| 
           LDW     .D2T2   *+SP(380),B5      ; |516| 
           LDW     .D2T2   *+SP(376),B4      ; |516| 
           LDW     .D2T2   *+SP(188),B29     ; |181| 
           FADDDP  .L1     A25:A24,A19:A18,A25:A24 ; |518| 
           FMPYDP  .M1X    A9:A8,B17:B16,A23:A22 ; |497| 
           FADDDP  .L1X    A31:A30,B27:B26,A27:A26 ; |504| 

           FMPYDP  .M2     B5:B4,B7:B6,B9:B8 ; |181| 
||         FADDDP  .L2     B9:B8,B9:B8,B5:B4 ; |178| 

           ZERO    .L2     B28               ; |535| 
           FMPYDP  .M1     A5:A4,A23:A22,A19:A18 ; |506| 

           FMPYDP  .M2     B5:B4,B7:B6,B3:B2 ; |178| 
||         STDW    .D2T2   B5:B4,*+SP(200)   ; |181| 
||         FADDDP  .L2X    A29:A28,B11:B10,B5:B4 ; |535| 

           FMPYDP  .M1     A5:A4,A23:A22,A17:A16 ; |508| 
||         FADDDP  .L1     A17:A16,A17:A16,A7:A6 ; |177| 
||         DADD    .S1     0,A7:A6,A5:A4     ; |479| 

           FMPYDP  .M1     A21:A20,A5:A4,A21:A20 ; |496| 
           STDW    .D2T2   B5:B4,*+SP(184)   ; |178| 
           LDW     .D2T2   *+SP(324),B5      ; |514| 
           LDW     .D2T2   *+SP(320),B4      ; |514| 
           FADDDP  .L1X    B21:B20,A27:A26,A5:A4 ; |504| 
           MVK     .S2     46,B20            ; |506| 
           MVKL    .S1     0x3fd1c1a6,A26
           MVKH    .S1     0x3fd1c1a6,A26
           FMPYDP  .M2X    B5:B4,A25:A24,B5:B4 ; |518| 
           LDW     .D2T1   *+SP(300),A27     ; |514| 
           STW     .D2T1   A26,*+SP(356)     ; |550| 
           LDW     .D2T1   *+SP(296),A26     ; |514| 

           DADD    .L2     0,B5:B4,B13:B12   ; |518| 
||         LDW     .D2T2   *+SP(336),B4      ; |514| 

           LDW     .D2T2   *+SP(340),B5      ; |514| 
           LDDW    .D2T2   *+SP(184),B23:B22 ; |484| 
           LDDW    .D2T1   *+SP(168),A29:A28 ; |484| 
           LDDW    .D2T1   *+SP(152),A25:A24 ; |514| 
           MVKL    .S2     0x3f689374,B25
           FMPYDP  .M2X    A21:A20,B5:B4,B5:B4 ; |535| 
           MVKH    .S2     0x3f689374,B25
           MVKL    .S2     0xbc6a7efa,B24
           FMPYDP  .M1     A27:A26,A25:A24,A25:A24 ; |516| 

           STDW    .D2T2   B5:B4,*+SP[B20]   ; |506| 
||         FMPYDP  .M2X    A7:A6,B7:B6,B5:B4 ; |177| 

           MVK     .S2     37,B6             ; |177| 
           LDDW    .D2T2   *+SP(144),B21:B20 ; |484| 
           FMPYDP  .M1X    A3:A2,B19:B18,A27:A26 ; |516| 
           STDW    .D2T2   B5:B4,*+SP[B6]    ; |177| 
           LDDW    .D2T2   *+SP(216),B5:B4   ; |484| 
           ZERO    .L2     B7:B6             ; |505| 
           FSUBDP  .L1     A27:A26,A25:A24,A25:A24 ; |516| 
           MVKH    .S2     0xbc6a7efa,B24
;----------------------------------------------------------------------
; 552 | in->angular_acceleration[Y] = qbar * 5.8823528 * pitch_moment;         
;----------------------------------------------------------------------
           LDDW    .D2T1   *+SP(240),A27:A26 ; |552| 
           FMPYDP  .M2X    B5:B4,A23:A22,B5:B4 ; |514| 
           LDW     .D2T1   *+SP(316),A23     ; |484| 
           LDW     .D2T1   *+SP(312),A22     ; |484| 
           CMPGTDP .S1     A5:A4,A17:A16,A1  ; |508| 
           STDW    .D2T2   B5:B4,*+SP(216)   ; |514| 
           LDDW    .D2T2   *+SP(192),B5:B4   ; |484| 
           CMPLTDP .S1     A5:A4,A19:A18,A0  ; |506| 
           FMPYDP  .M1     A23:A22,A29:A28,A23:A22 ; |518| 
           MVKL    .S1     0xf5c28f5d,A3
           MVKH    .S1     0xf5c28f5d,A3
           FSUBDP  .L2     B5:B4,B21:B20,B5:B4 ; |484| 
           FMPYDP  .M2X    A9:A8,B23:B22,B21:B20 ; |535| 
           LDW     .D2T2   *+SP(328),B22     ; |550| 
           LDW     .D2T2   *+SP(332),B23     ; |550| 

           DADD    .L1X    0,B5:B4,A31:A30   ; |535| 
||         LDW     .D2T2   *+SP(360),B4      ; |550| 

           LDW     .D2T2   *+SP(364),B5      ; |550| 
           STDW    .D2T2   B21:B20,*+SP(168) ; |535| 
           LDW     .D2T2   *+SP(308),B21     ; |550| 
           LDW     .D2T2   *+SP(304),B20     ; |550| 
           FMPYDP  .M2     B29:B28,B23:B22,B11:B10 ; |535| 

           CMPLTDP .S2     B5:B4,B7:B6,B0    ; |505| 
||         LDDW    .D2T2   *+SP(208),B7:B6   ; |550| 

           LDW     .D2T2   *+SP(368),B4      ; |550| 
           LDW     .D2T2   *+SP(372),B5      ; |550| 
           FSUBDP  .L2X    B21:B20,A23:A22,B23:B22 ; |518| 
           LDW     .D2T2   *+SP(288),B20     ; |550| 
           LDW     .D2T2   *+SP(292),B21     ; |550| 
           LDDW    .D2T1   *+SP(224),A23:A22 ; |550| 

           FMPYDP  .M2     B19:B18,B5:B4,B7:B6 ; |535| 
||         FADDDP  .L2     B9:B8,B7:B6,B5:B4 ; |181| 

           LDDW    .D2T2   *+SP(232),B9:B8   ; |514| 
   [ B0]   MVK     .L1     0x1,A1
           FADDDP  .L2X    B21:B20,A25:A24,B31:B30 ; |516| 
           STDW    .D2T2   B7:B6,*+SP(192)   ; |535| 
           LDDW    .D2T2   *+SP(160),B7:B6   ; |552| 
           DADD    .L1X    0,B5:B4,A29:A28   ; |535| 
           LDDW    .D2T2   *+SP(248),B5:B4   ; |552| 
           FMPYDP  .M1     A7:A6,A23:A22,A23:A22 ; |181| 
           FMPYDP  .M1     A7:A6,A27:A26,A7:A6 ; |178| 
   [!A1]   DADD    .L1     0,A17:A16,A5:A4   ; |508| 
   [!B0]   MVK     .L1     0x1,A0
           FADDDP  .L2     B3:B2,B5:B4,B21:B20 ; |178| 
           FMPYDP  .M2     B25:B24,B7:B6,B5:B4 ; |514| 
           LDDW    .D2T2   *+SP(200),B7:B6   ; |514| 
           STW     .D2T1   A3,*+SP(152)      ; |484| 
   [!A0]   DADD    .L1     0,A19:A18,A5:A4   ; |506| 
           STDW    .D2T2   B5:B4,*+SP(144)   ; |514| 
           LDW     .D2T2   *+SP(376),B4      ; |514| 
           LDW     .D2T2   *+SP(380),B5      ; |514| 
           LDDW    .D2T1   *+SP(232),A19:A18 ; |552| 
           MVKL    .S1     0x3f8f5c28,A3
           MVKH    .S1     0x3f8f5c28,A3
           STW     .D2T1   A3,*+SP(156)      ; |484| 

           DADD    .L1X    0,B5:B4,A27:A26   ; |514| 
||         DADD    .L2     0,B13:B12,B5:B4   ; |514| 

           FMPYDP  .M2     B7:B6,B9:B8,B5:B4 ; |181| 
||         FSUBDP  .L2     B23:B22,B5:B4,B7:B6 ; |518| 

           LDW     .D2T2   *+SP(256),B28     ; |514| 
           LDW     .D2T2   *+SP(260),B29     ; |514| 
           LDW     .D2T2   *+SP(300),B19     ; |514| 

           DADD    .L2     0,B5:B4,B9:B8     ; |181| 
||         MVKL    .S2     0x7e0e22d8,B4

           MVKH    .S2     0x7e0e22d8,B4
||         LDW     .D2T2   *+SP(380),B5      ; |552| 

           STW     .D2T2   B4,*+SP(160)      ; |552| 
           LDW     .D2T2   *+SP(376),B4      ; |552| 
           LDW     .D2T2   *+SP(296),B18     ; |514| 
           LDDW    .D2T2   *+SP(216),B23:B22 ; |514| 
           MVKL    .S1     0x3953dea4,A3
           MVKL    .S2     0x3f50624d,B27

           FMPYDP  .M1X    B5:B4,A19:A18,A19:A18 ; |180| 
||         MVKL    .S2     0x40178787,B4

           MVKH    .S2     0x40178787,B4
           STW     .D2T2   B4,*+SP(164)      ; |552| 
           LDW     .D2T2   *+SP(176),B4      ; |552| 

           DADD    .L1     0,A19:A18,A1:A0   ; |180| 
||         LDDW    .D2T1   *+SP(152),A19:A18 ; |514| 

           LDDW    .D2T1   *+SP(224),A17:A16 ; |514| 
||         MVKH    .S1     0x3953dea4,A3

           MVKL    .S2     0xd2f1a9fc,B26
||         STW     .D2T1   A3,*+SP(384)      ; |550| 

           MVKH    .S2     0x3f50624d,B27
||         FADDDP  .L2X    A7:A6,B21:B20,B1:B0 ; |178| 
||         LDW     .D2T1   *+SP(276),A7      ; |553| 

           STW     .D2T2   B4,*+SP(280)      ; |514| 
||         DADD    .L2X    0,A31:A30,B5:B4   ; |514| 
||         MVKH    .S2     0xd2f1a9fc,B26

           FMPYDP  .M2X    A19:A18,B5:B4,B19:B18 ; |484| 
||         FSUBDP  .L2     B27:B26,B23:B22,B5:B4 ; |514| 
||         FADDDP  .S2     B19:B18,B29:B28,B27:B26 ; |177| 
||         LDDW    .D2T2   *+SP(168),B23:B22 ; |484| 

           FMPYDP  .M2X    B17:B16,A5:A4,B25:B24 ; |535| 
||         FMPYDP  .M1     A27:A26,A17:A16,A17:A16 ; |177| 
||         LDW     .D2T1   *+SP(272),A6      ; |553| 

           FMPYDP  .M1     A9:A8,A5:A4,A27:A26 ; |537| 
||         LDW     .D2T1   *+SP(352),A8      ; |553| 

;----------------------------------------------------------------------
; 553 | in->angular_acceleration[Z] = qbar *                                   
;----------------------------------------------------------------------
           LDW     .D2T1   *+SP(356),A9      ; |553| 
           LDW     .D2T1   *+SP(264),A4      ; |553| 

           FSUBDP  .L2     B25:B24,B23:B22,B23:B22 ; |535| 
||         LDW     .D2T2   *+SP(368),B24     ; |553| 

           LDW     .D2T2   *+SP(372),B25     ; |553| 
||         MVKL    .S1     0x400ae9ed,A3

           LDW     .D2T1   *+SP(268),A5      ; |553| 
||         MVKH    .S1     0x400ae9ed,A3

           STW     .D2T1   A3,*+SP(388)      ; |550| 
           LDW     .D2T1   *+SP(352),A24     ; |535| 
           LDW     .D2T1   *+SP(356),A25     ; |535| 

           FMPYDP  .M2     B25:B24,B11:B10,B21:B20 ; |536| 
||         LDDW    .D2T2   *+SP(184),B25:B24 ; |553| 
||         MVKL    .S1     0x4003efa6,A13

           MVKL    .S1     0x3decca99,A12
||         FMPYDP  .M1X    A9:A8,B7:B6,A7:A6 ; |550| 
||         FADDDP  .L1     A7:A6,A5:A4,A9:A8 ; |514| 
||         LDW     .D2T1   *+SP(384),A4      ; |550| 

           MVKH    .S1     0x4003efa6,A13
||         LDW     .D2T1   *+SP(388),A5      ; |550| 

           MVKH    .S1     0x3decca99,A12
||         FADDDP  .L2X    A17:A16,B27:B26,B3:B2 ; |177| 
||         LDDW    .D2T1   *+SP(240),A17:A16 ; |550| 

           DADD    .L2X    0,A29:A28,B29:B28 ; |484| 
||         FMPYDP  .M1X    A13:A12,B7:B6,A29:A28 ; |553| 
||         LDDW    .D2T2   *+SP(192),B7:B6   ; |550| 

           FMPYDP  .M2     B17:B16,B25:B24,B25:B24 ; |537| 
||         LDDW    .D2T2   *+SP(200),B17:B16 ; |550| 

;----------------------------------------------------------------------
; 554 | (0.27744448 * roll_moment + 2.4920163 * yaw_moment);                   
;----------------------------------------------------------------------
           FMPYDP  .M1X    A25:A24,B31:B30,A25:A24 ; |553| 
           FMPYDP  .M1X    A5:A4,B31:B30,A5:A4 ; |550| 
           FSUBDP  .L2X    B29:B28,A23:A22,B29:B28 ; |181| 

           FSUBDP  .L2     B23:B22,B7:B6,B27:B26 ; |535| 
||         LDDW    .D2T2   *+SP(144),B7:B6   ; |536| 

           FMPYDP  .M2X    B17:B16,A17:A16,B31:B30 ; |179| 
||         LDW     .D2T2   *+SP(284),B17     ; |536| 

           LDW     .D2T2   *+SP(280),B16     ; |536| 
           FADDDP  .L2     B9:B8,B29:B28,B9:B8 ; |181| 
           ZERO    .L2     B23:B22           ; |485| 
           FSUBDP  .L2     B5:B4,B7:B6,B7:B6 ; |514| 
           FMPYDP  .M1X    A21:A20,B21:B20,A19:A18 ; |536| 
           FMPYDP  .M2X    B17:B16,A9:A8,B5:B4 ; |514| 

           FMPYDP  .M2     B11:B10,B27:B26,B17:B16 ; |535| 
||         ADDAW   .D2     SP,20,B26         ; |185| 

           FSUBDP  .L2X    B1:B0,A1:A0,B21:B20 ; |180| 
||         CMPLTDP .S2     B19:B18,B23:B22,B0 ; |485| 
||         STDW    .D2T2   B9:B8,*+B26(16)   ; |187| 

           LDDW    .D2T2   *+SP(160),B9:B8   ; |537| 

           FADDDP  .L1X    B25:B24,A27:A26,A17:A16 ; |537| 
||         FSUBDP  .S2     B3:B2,B31:B30,B25:B24 ; |179| 

           MVKL    .S1     0x35e50d53,A2

   [ B0]   ZERO    .L2     B19:B18           ; |486| 
||         FADDDP  .L1     A7:A6,A5:A4,A7:A6 ; |550| 
||         MVKH    .S1     0x35e50d53,A2

           FADDDP  .L2     B17:B16,B19:B18,B19:B18 ; |535| 
||         MVKL    .S1     0x3fd0d794,A3
||         STDW    .D2T2   B25:B24,*B26      ; |185| 

           FMPYDP  .M2     B9:B8,B11:B10,B9:B8 ; |552| 
||         FMPYDP  .M1X    B11:B10,A17:A16,A17:A16 ; |537| 
||         STDW    .D2T2   B21:B20,*+B26(8)  ; |186| 
||         MVKH    .S1     0x3fd0d794,A3

           FSUBDP  .L2     B7:B6,B5:B4,B5:B4 ; |514| 
||         ZERO    .S2     B22               ; |537| 
||         LDDW    .D2T2   *+SP(88),B17:B16  ; |536| 
||         MV      .L1     A3,A23            ; |536| 
||         MV      .S1     A2,A22            ; |536| 

           SET     .S2     B22,31,31,B6      ; |537| 
||         FMPYDP  .M1     A23:A22,A19:A18,A19:A18 ; |536| 

           FADDDP  .L1     A29:A28,A25:A24,A21:A20 ; |553| 
||         FMPYDP  .M1X    A3:A2,B19:B18,A7:A6 ; |535| 
||         FMPYDP  .M2X    B11:B10,A7:A6,B19:B18 ; |550| 

           XOR     .L1X    A17,B6,A17        ; |537| 
||         FMPYDP  .M2     B5:B4,B9:B8,B7:B6 ; |552| 
||         LDDW    .D2T2   *+SP(96),B5:B4    ; |537| 
||         MV      .S1     A3,A9             ; |536| 
||         MV      .D1     A2,A8             ; |536| 

           LDDW    .D2T2   *+SP(80),B21:B20  ; |535| 
||         FMPYDP  .M1     A9:A8,A17:A16,A5:A4 ; |537| 

           FADDDP  .L1X    B17:B16,A19:A18,A9:A8 ; |536| 
           FMPYDP  .M2X    B11:B10,A21:A20,B9:B8 ; |553| 
           STDW    .D1T2   B19:B18,*+A10(128) ; |550| 

           FADDDP  .L1X    B5:B4,A5:A4,A17:A16 ; |537| 
||         STDW    .D1T1   A9:A8,*+A10(56)   ; |536| 

           FADDDP  .L1X    B21:B20,A7:A6,A5:A4 ; |535| 
||         STDW    .D1T2   B7:B6,*+A10(136)  ; |552| 

           STDW    .D1T2   B9:B8,*+A10(144)  ; |553| 
           STDW    .D1T1   A17:A16,*+A10(64) ; |537| 

           STDW    .D1T1   A5:A4,*+A10(48)   ; |535| 
||         ADDK    .S2     400,SP            ; |555| 

           LDW     .D2T2   *++SP(8),B3       ; |555| 
           LDDW    .D2T1   *++SP,A11:A10     ; |555| 
           LDDW    .D2T1   *++SP,A13:A12     ; |555| 
           LDDW    .D2T2   *++SP,B11:B10     ; |555| 
           LDDW    .D2T2   *++SP,B13:B12     ; |555| 
           RET     .S2     B3                ; |555| 
           LDW     .D2T1   *++SP(8),A14      ; |555| 
           NOP             4
           ; BRANCH OCCURS {B3}              ; |555|