rbowler/spinhawk

PATCH: Fix all-zero MAC bug in LCS cmd reply frame handling

Fish-Git opened this issue · 2 comments

In case the above URLs become inaccessible, the explanation of the issue is as follows:

The following patch (see next comment for this GitHub issue) for Hercules 3.10 spinhawk corrects a bug causing unreliable LCS connections resulting from the guest incorrectly being told its MAC address is 00:00:00:00:00:00 due to a race condition that was identified and fixed in Hercules 4.0 Hyperion over 3 years ago.

Symptoms of this bug include a frequently non-functioning guest network connection as soon as networking is enabled in the guest. That is to say, as soon as the guest enables TCPIP networking it is, nevertheless, still unable to reach the host or anywhere else, and neither the host nor anyone else is able to reach the guest, even though the guest's networking configuration is correct.

Depending on the speed of the host this behavior can occur greater than 50% of the time. The only way to correct the problem is to try again and again until it finally works (if it ever does).

Users who run MVS or z/OS in their guest can determine whether this bug has occurred by entering the command "netstat arp all" from the TSO option 6 panel or by entering the command "d tcpip,xxxxx,netstat,arp" on the master console (where "xxxx" is the name of your tcpip configuration, usually "tcpip").

You can also determine whether the problem has occurred from the host as well, by entering the appropriate command to display the host's ARP table after unsuccessfully trying to ping the guest. On both Windows and Linux the command is "arp -a".

If a MAC address of all zeros is displayed for your guest's IP address then the bug has definitely occurred and you should apply the patch and rebuild Hercules. (See next comment for this GitHub issue for the patch)

Here is the patch: (GitHub will probably mangle it but there's nothing I can do about that)

From: "Fish" (David B. Trout) fish@softdevlabs.com
Date: Mon, 14 Jul 2014 23:26:02 -0800
Subject: [PATCH] Fix all-zero MAC bug in LCS cmd reply frame handling:

Orignal 2/27/2011 1:42:25 AM hyperion commit d9b032a

diff -r -a -x .git -x 'msvc.AMD64.' -x 'msvc.dllmod.' -x 'msvc.debug.' -x '.suo' -x '.ncb' -x '.user' -x '.htm' -x WORK -x DICTS -x FILES -Nu spinhawk-0/ctc_lcs.c spinhawk-LCS/ctc_lcs.c
--- spinhawk-0/ctc_lcs.c 2014-07-05 22:20:13.100884300 -0700
+++ spinhawk-LCS/ctc_lcs.c 2014-07-14 06:41:30.346383800 -0700
@@ -107,8 +107,8 @@
static int LCS_EnqueueEthFrame( PLCSDEV pLCSDEV, BYTE bPort,
BYTE
pData, size_t iSize );

-static void* LCS_InitReplyFrame( PLCSDEV pLCSDEV,

  •                                size_t  iSize, PLCSCMDHDR pCmdFrame );
    

    +static int LCS_EnqueueReplyFrame( PLCSDEV pLCSDEV, PLCSCMDHDR pReply,

  •                                                    size_t     iSize );
    

    static int BuildOAT( char* pszOATName, PLCSBLK pLCSBLK );
    static char* ReadOAT( char* pszOATName, FILE* fp, char* pszBuff );
    @@ -116,6 +116,30 @@
    int argc, char** argv );

    // ====================================================================
    +// Helper macros
    +// ====================================================================
    +
    +#define INIT_REPLY_FRAME( reply, pCmdFrame ) \

  •                                                        \
    
  • memset( &(reply), 0, sizeof( reply )); \

  • memcpy( &(reply), (pCmdFrame), sizeof( LCSCMDHDR )); \

  • STORE_HW( (reply).bLCSCmdHdr.hwReturnCode, 0x0000 )

+#define ENQUEUE_REPLY_FRAME( pLCSDEV, reply ) \

  •                                                                        \
    
  • while \
  • (1 \
  •    && LCS_EnqueueReplyFrame( (pLCSDEV), (PLCSCMDHDR) &(reply),         \
    
  •                                                sizeof( reply )) != 0   \
    
  •    &&  (pLCSDEV)->pLCSBLK->Port[(pLCSDEV)->bPort].fd != -1             \
    
  •    && !(pLCSDEV)->pLCSBLK->Port[(pLCSDEV)->bPort].fCloseInProgress     \
    
  • ) \
  • { \
  •    TRACE("*\* ENQUEUE_REPLY_FRAME() failed...\n");                      \
    
  •    SLEEP( 1 );                                                         \
    
  • }

+// ====================================================================
// find_group_device
// ====================================================================

@@ -1160,29 +1184,18 @@

static void LCS_Startup( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • PLCSSTDFRM pStdCmdReplyFrame;
  • PLCSSTRTFRM pStartupCmdFrame;
  • LCSSTRTFRM reply;
    PLCSPORT pLCSPORT;
  • U16 iOrigMaxFrameBufferSize;

- pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSSTDFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSSTDFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR

- // that was passed to us...

  • pStdCmdReplyFrame->bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • pStdCmdReplyFrame->bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;
  • INIT_REPLY_FRAME( reply, pCmdFrame );
  • pStartupCmdFrame = (PLCSSTRTFRM)pCmdFrame;
  • reply.bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • reply.bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;

// Save the max buffer size parameter

  • FETCH_HW( pLCSDEV->iMaxFrameBufferSize, pStartupCmdFrame->hwBufferSize );
  • iOrigMaxFrameBufferSize = pLCSDEV->iMaxFrameBufferSize;
  • FETCH_HW( pLCSDEV->iMaxFrameBufferSize, ((PLCSSTRTFRM)pCmdFrame)->hwBufferSize );

// Make sure it doesn't exceed our compiled maximum
if (pLCSDEV->iMaxFrameBufferSize > sizeof(pLCSDEV->bFrameBuffer))
@@ -1192,7 +1205,7 @@
pLCSDEV->pDEVBLK[1]->devnum,
pLCSDEV->iMaxFrameBufferSize,
sizeof( pLCSDEV->bFrameBuffer ) );

  •    pLCSDEV->iMaxFrameBufferSize = sizeof(pLCSDEV->bFrameBuffer);
    
  •    pLCSDEV->iMaxFrameBufferSize = iOrigMaxFrameBufferSize;
    

    }

    // Make sure it's not smaller than the compiled minimum size
    @@ -1203,9 +1216,11 @@
    pLCSDEV->pDEVBLK[1]->devnum,
    pLCSDEV->iMaxFrameBufferSize,
    CTC_MIN_FRAME_BUFFER_SIZE );

  •    pLCSDEV->iMaxFrameBufferSize = sizeof(pLCSDEV->bFrameBuffer);
    
  •    pLCSDEV->iMaxFrameBufferSize = iOrigMaxFrameBufferSize;
    

    }

  • pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

VERIFY( TUNTAP_SetIPAddr( pLCSPORT->szNetDevName, "0.0.0.0" ) == 0 );
VERIFY( TUNTAP_SetMTU ( pLCSPORT->szNetDevName, "1500" ) == 0 );

@@ -1217,7 +1232,7 @@
}
#endif // OPTION_TUNTAP_SETMACADDR

  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0000 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );

pLCSDEV->fStarted = 1;
}
@@ -1228,22 +1243,14 @@

static void LCS_Shutdown( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • PLCSSTDFRM pStdCmdReplyFrame;
  • LCSSTDFRM reply;
  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSSTDFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSSTDFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR
  • // that was passed to us...
  • INIT_REPLY_FRAME( reply, pCmdFrame );
  • pStdCmdReplyFrame->bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • pStdCmdReplyFrame->bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;
  • reply.bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • reply.bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;
  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0000 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );

pLCSDEV->fStarted = 0;
}
@@ -1254,13 +1261,15 @@

static void LCS_StartLan( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • LCSSTDFRM reply;
    PLCSPORT pLCSPORT;
    #ifdef OPTION_TUNTAP_DELADD_ROUTES
    PLCSRTE pLCSRTE;
    #endif // OPTION_TUNTAP_DELADD_ROUTES
  • PLCSSTDFRM pStdCmdReplyFrame;
    int nIFFlags;
  • INIT_REPLY_FRAME( reply, pCmdFrame );

pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

// Serialize access to eliminate ioctl errors
@@ -1327,17 +1336,7 @@
}
#endif // OPTION_TUNTAP_DELADD_ROUTES

  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSSTDFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSSTDFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR

- // that was passed to us...

  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );
    }

// ====================================================================
@@ -1346,12 +1345,14 @@

static void LCS_StopLan( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • LCSSTDFRM reply;
    PLCSPORT pLCSPORT;
  • PLCSSTDFRM pStdCmdReplyFrame;
    #ifdef OPTION_TUNTAP_DELADD_ROUTES
    PLCSRTE pLCSRTE;
    #endif // OPTION_TUNTAP_DELADD_ROUTES
  • INIT_REPLY_FRAME( reply, pCmdFrame );

pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

// Serialize access to eliminate ioctl errors
@@ -1401,17 +1402,7 @@
// FIXME: Really need to iterate through the devices and close
// the TAP interface if all devices have been stopped.

  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSSTDFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSSTDFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR

- // that was passed to us...

  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );
    }

// ====================================================================
@@ -1420,20 +1411,12 @@

static void LCS_QueryIPAssists( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • LCSQIPFRM reply;
    PLCSPORT pLCSPORT;
  • PLCSQIPFRM pStdCmdReplyFrame;
  • pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];
  • INIT_REPLY_FRAME( reply, pCmdFrame );
  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSQIPFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSQIPFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR
  • // that was passed to us...
  • pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

#if defined( WIN32 )

@@ -1495,10 +1478,12 @@

#endif // WIN32

  • STORE_HW( pStdCmdReplyFrame->hwNumIPPairs, 0x0000 );
  • STORE_HW( pStdCmdReplyFrame->hwIPAssistsSupported, pLCSPORT->sIPAssistsSupported );
  • STORE_HW( pStdCmdReplyFrame->hwIPAssistsEnabled, pLCSPORT->sIPAssistsEnabled );
  • STORE_HW( pStdCmdReplyFrame->hwIPVersion, 0x0004 );
  • STORE_HW( reply.hwNumIPPairs, 0x0000 );
  • STORE_HW( reply.hwIPAssistsSupported, pLCSPORT->sIPAssistsSupported );
  • STORE_HW( reply.hwIPAssistsEnabled, pLCSPORT->sIPAssistsEnabled );
  • STORE_HW( reply.hwIPVersion, 0x0004 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );
    }

// ====================================================================
@@ -1507,26 +1492,16 @@

static void LCS_LanStats( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • LCSLSTFRM reply;
    PLCSPORT pLCSPORT;
  • PLCSLSTFRM pStdCmdReplyFrame;
    int fd;
    struct ifreq ifr;
    BYTE* pPortMAC;
    BYTE* pIFaceMAC;

- pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSLSTFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSLSTFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR
  • // that was passed to us...
  • INIT_REPLY_FRAME( reply, pCmdFrame );
  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0000 );
  • pLCSPORT = &pLCSDEV->pLCSBLK->Port[pLCSDEV->bPort];

fd = socket( AF_INET, SOCK_STREAM, IPPROTO_IP );

@@ -1535,7 +1510,7 @@
logmsg( _("HHCLC007E Error in call to socket: %s.\n"),
strerror( HSO_errno ) );
// FIXME: we should probably be returning a non-zero hwReturnCode

  •    // STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0001 );
    
  •    // STORE_HW( reply.bLCSCmdHdr.hwReturnCode, 0x0001 );
     return;
    
    }

@@ -1553,7 +1528,7 @@
logmsg( _("HHCLC008E ioctl error on device %s: %s.\n"),
pLCSPORT->szNetDevName, strerror( errno ) );
// FIXME: we should probably be returning a non-zero hwReturnCode

  •    // STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0002 );
    
  •    // STORE_HW( reply.bLCSCmdHdr.hwReturnCode, 0x0002 );
     return;
    

    }
    pIFaceMAC = (BYTE*) ifr.ifr_hwaddr.sa_data;
    @@ -1580,14 +1555,16 @@

     memcpy( pPortMAC, pIFaceMAC, IFHWADDRLEN );
    
  •    snprintf(pLCSPORT->szMACAddress, sizeof(pLCSPORT->szMACAddress),
    
  •    snprintf(pLCSPORT->szMACAddress, sizeof(pLCSPORT->szMACAddress)-1,
         "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X", *(pPortMAC+0), *(pPortMAC+1),
         *(pPortMAC+2), *(pPortMAC+3), *(pPortMAC+4), *(pPortMAC+5));
    

    }

  • memcpy( pStdCmdReplyFrame->MAC_Address, pIFaceMAC, IFHWADDRLEN );

  • memcpy( reply.MAC_Address, pIFaceMAC, IFHWADDRLEN );

// FIXME: Really should read /proc/net/dev to retrieve actual stats
+

  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );
    }

// ====================================================================
@@ -1596,22 +1573,14 @@

static void LCS_DefaultCmdProc( PLCSDEV pLCSDEV, PLCSCMDHDR pCmdFrame )
{

  • PLCSSTDFRM pStdCmdReplyFrame;
  • LCSSTDFRM reply;
  • // Get a pointer to the next available reply frame
  • pStdCmdReplyFrame = (PLCSSTDFRM)LCS_InitReplyFrame( pLCSDEV,
  •                                          sizeof( LCSSTDFRM ),
    

- pCmdFrame );

  • // NOTE: pStdCmdReplyFrame now points to a reply frame
  • // the first part of which is an LCSCMDHDR which, except for
  • // the 'hwOffset' field, is an exact copy of the LCSCMDHDR
  • // that was passed to us...
  • INIT_REPLY_FRAME( reply, pCmdFrame );
  • pStdCmdReplyFrame->bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • pStdCmdReplyFrame->bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;
  • reply.bLCSCmdHdr.bLanType = LCS_FRMTYP_ENET;
  • reply.bLCSCmdHdr.bRelAdapterNo = pLCSDEV->bPort;
  • STORE_HW( pStdCmdReplyFrame->bLCSCmdHdr.hwReturnCode, 0x0000 );
  • ENQUEUE_REPLY_FRAME( pLCSDEV, reply );
    }

// ====================================================================
@@ -1678,8 +1647,7 @@
break;
logmsg( _("HHCLC042E Port %2.2X: Read error: %s\n"),
pLCSPORT->bPort, strerror( errno ) );

  •        SLEEP(1);           // (purposeful long delay)
    
  •        continue;
    
  •        break;
     }
    
     if( pLCSPORT->pLCSBLK->fDebug )
    

    @@ -1940,7 +1908,7 @@
    PLCSETHFRM pLCSEthFrame;

    // Will frame NEVER fit into buffer??

  • if( iSize > MAX_LCS_ETH_FRAME_SIZE( pLCSDEV ) )

  • if( iSize > MAX_LCS_ETH_FRAME_SIZE( pLCSDEV ) || iSize > 9000 )
    {
    errno = EMSGSIZE; // Message too long
    return -1; // (-1==failure)
    @@ -1965,7 +1933,7 @@
    pLCSDEV->iFrameOffset );

// Increment offset to NEXT available slot (after ours)

  • pLCSDEV->iFrameOffset += sizeof(LCSETHFRM) + iSize;
  • pLCSDEV->iFrameOffset += (U16)(sizeof(LCSETHFRM) + iSize);

// Plug updated offset to next frame into our frame header
STORE_HW( pLCSEthFrame->bLCSHdr.hwOffset, pLCSDEV->iFrameOffset );
@@ -1991,39 +1959,43 @@
}

// ====================================================================
-// LCS_InitReplyFrame
+// LCS_EnqueueReplyFrame
// ====================================================================
//
-// Returns a pointer to the next available frame slot of iSize bytes
-// of type LCSCMDHDR (i.e. the partially pre-initialized reply frame)
-//
-// As a part of frame setup, initializes the reply frame with basic
-// information that is provided in the original frame for which this
-// is a reply frame for. Note that only Command frames have replies.
+// Copy a pre-built LCS Command Frame reply frame of iSize bytes
+// to the next available frame slot. Returns 0 on success, -1 and
+// errno set to ENOBUFS on failure (no room (yet) in o/p buffer).
+// The LCS device lock must NOT be held when called.
//
// --------------------------------------------------------------------

-static void* LCS_InitReplyFrame( PLCSDEV pLCSDEV,

  •                              size_t  iSize, PLCSCMDHDR pCmdFrame )
    

    +static int LCS_EnqueueReplyFrame( PLCSDEV pLCSDEV, PLCSCMDHDR pReply,

  •                                                size_t     iSize )
    

    {
    PLCSCMDHDR pReplyCmdFrame;

    obtain_lock( &pLCSDEV->Lock );

  • // Ensure we dont overflow the buffer

  • if( ( pLCSDEV->iFrameOffset + // Current buffer Offset

  •      iSize +                           // Size of reply frame
    
  •      sizeof(pReply->bLCSHdr.hwOffset)) // Size of Frame terminator
    
  •    > pLCSDEV->iMaxFrameBufferSize )    // Size of Frame buffer
    
  • {

  •    release_lock( &pLCSDEV->Lock );
    
  •    errno = ENOBUFS;                    // No buffer space available
    
  •    return -1;                          // (-1==failure)
    
  • }

// Point to next available LCS Frame slot in our buffer...
pReplyCmdFrame = (PLCSCMDHDR)( pLCSDEV->bFrameBuffer +
pLCSDEV->iFrameOffset );

  • // Increment buffer offset to NEXT next-available-slot...

- pLCSDEV->iFrameOffset += iSize;

  • // Initialize our Reply LCS Command Frame...
  • memset( pReplyCmdFrame, 0, iSize );
  • // Copy the reply frame into the frame buffer slot...
  • memcpy( pReplyCmdFrame, pReply, iSize );
  • // Use the LCS Command Frame header that was passed to us
  • // as a template for the Reply LCS Command Frame we're to
  • // build...
  • memcpy( pReplyCmdFrame, pCmdFrame, sizeof( LCSCMDHDR ) );
  • // Increment buffer offset to NEXT next-available-slot...
  • pLCSDEV->iFrameOffset += (U16) iSize;

// Store offset of next frame
STORE_HW( pReplyCmdFrame->bLCSHdr.hwOffset, pLCSDEV->iFrameOffset );
@@ -2033,7 +2005,7 @@

release_lock( &pLCSDEV->Lock );

  • return pReplyCmdFrame;
  • return 0; // success
    }

// ====================================================================