google/nftables

Objects implementation refactor

turekt opened this issue · 1 comments

turekt commented

Hi,

I am creating an issue as discussed in PR #244.

TL;DR

Current object implementation seems to consist of mostly duplicated code from the expr package. This issue tries to present the problem and propose an implementation which reuses the expr implementation, removes duplicated code and provides immediate support of all stateful object types supported by nftables.

Observations

During implementation of quota objects I noticed that the elements in the object (Obj) structs are duplicated. Observe the following counter object CounterObj in counter.go against Counter in expr/counter.go:

type CounterObj struct {
	Table *Table
	Name  string // e.g. “fwded”

	Bytes   uint64
	Packets uint64
}
...
type Counter struct {
	Bytes   uint64
	Packets uint64
}

Note how expression elements in the struct (Packets and Bytes) are duplicated. Here is their corresponding unmarshaling logic:

func (c *CounterObj) unmarshal(ad *netlink.AttributeDecoder) error {
	for ad.Next() {
		switch ad.Type() {
		case unix.NFTA_COUNTER_BYTES:
			c.Bytes = ad.Uint64()
		case unix.NFTA_COUNTER_PACKETS:
			c.Packets = ad.Uint64()
		}
	}
	return ad.Err()
}
...
func (e *Counter) unmarshal(fam byte, data []byte) error {
	ad, err := netlink.NewAttributeDecoder(data)
	if err != nil {
		return err
	}
	ad.ByteOrder = binary.BigEndian
	for ad.Next() {
		switch ad.Type() {
		case unix.NFTA_COUNTER_BYTES:
			e.Bytes = ad.Uint64()
		case unix.NFTA_COUNTER_PACKETS:
			e.Packets = ad.Uint64()
		}
	}
	return ad.Err()
}

Marshaling logic is a bit more different since object marshaling creates a complete expression, but the data specific implementation is duplicated:

// returns only expression
func (e *Counter) marshal(fam byte) ([]byte, error) {
	data, err := netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_COUNTER_BYTES, Data: binaryutil.BigEndian.PutUint64(e.Bytes)},
		{Type: unix.NFTA_COUNTER_PACKETS, Data: binaryutil.BigEndian.PutUint64(e.Packets)},
	})
	if err != nil {
		return nil, err
	}

	return netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_EXPR_NAME, Data: []byte("counter\x00")},
		{Type: unix.NLA_F_NESTED | unix.NFTA_EXPR_DATA, Data: data},
	})
}
...
// returns complete object, but the expression part is more or less the same (except the NFTA_EXPR_DATA part)
func (c *CounterObj) marshal(data bool) ([]byte, error) {
	obj, err := netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_COUNTER_BYTES, Data: binaryutil.BigEndian.PutUint64(c.Bytes)},
		{Type: unix.NFTA_COUNTER_PACKETS, Data: binaryutil.BigEndian.PutUint64(c.Packets)},
	})
	if err != nil {
		return nil, err
	}
	const NFT_OBJECT_COUNTER = 1 // TODO: get into x/sys/unix
	attrs := []netlink.Attribute{
		{Type: unix.NFTA_OBJ_TABLE, Data: []byte(c.Table.Name + "\x00")},
		{Type: unix.NFTA_OBJ_NAME, Data: []byte(c.Name + "\x00")},
		{Type: unix.NFTA_OBJ_TYPE, Data: binaryutil.BigEndian.PutUint32(NFT_OBJECT_COUNTER)},
	}
	if data {
		attrs = append(attrs, netlink.Attribute{Type: unix.NLA_F_NESTED | unix.NFTA_OBJ_DATA, Data: obj})
	}
	return netlink.MarshalAttributes(attrs)
}

Furthermore, note the marshaling implementation difference between objects and expressions (NFTA_EXPR_DATA vs NFTA_OBJ_DATA):

// exprsFromBytes func in expr/expr.go
for ad.Next() {
	switch ad.Type() {
	case unix.NFTA_EXPR_NAME:
		name = ad.String()
		if name == "notrack" {
			e := &Notrack{}
			exprs = append(exprs, e)
		}
	case unix.NFTA_EXPR_DATA:
		var e Any
		switch name {
		case "ct":
			e = &Ct{}
		case "range":
			e = &Range{}
		case "meta":
			e = &Meta{}
		case "cmp":
			e = &Cmp{}
		case "counter":
			e = &Counter{}
		...
		case "hash":
			e = &Hash{}
		}
		if e == nil {
			// TODO: introduce an opaque expression type so that users know
			// something is here.
			continue // unsupported expression type
		}

		ad.Do(func(b []byte) error {
			if err := Unmarshal(fam, b, e); err != nil {
				return err
			}
			// Verdict expressions are a special-case of immediate expressions, so
			// if the expression is an immediate writing nothing into the verdict
			// register (invalid), re-parse it as a verdict expression.
			if imm, isImmediate := e.(*Immediate); isImmediate && imm.Register == unix.NFT_REG_VERDICT && len(imm.Data) == 0 {
				e = &Verdict{}
				if err := Unmarshal(fam, b, e); err != nil {
					return err
				}
			}
			exprs = append(exprs, e)
			return nil
		})
...
// objFromMsg under obj.go
for ad.Next() {
	switch ad.Type() {
	case unix.NFTA_OBJ_TABLE:
		table = &Table{Name: ad.String(), Family: TableFamily(msg.Data[0])}
	case unix.NFTA_OBJ_NAME:
		name = ad.String()
	case unix.NFTA_OBJ_TYPE:
		objectType = ad.Uint32()
	case unix.NFTA_OBJ_DATA:
		switch objectType {
		case NFT_OBJECT_COUNTER:
			o := CounterObj{
				Table: table,
				Name:  name,
			}

			ad.Do(func(b []byte) error {
				ad, err := netlink.NewAttributeDecoder(b)
				if err != nil {
					return err
				}
				ad.ByteOrder = binary.BigEndian
				return o.unmarshal(ad)
			})
			return &o, ad.Err()
		case NFT_OBJECT_QUOTA:
			o := QuotaObj{
				Table: table,
				Name:  name,
			}

			ad.Do(func(b []byte) error {
				ad, err := netlink.NewAttributeDecoder(b)
				if err != nil {
					return err
				}
				ad.ByteOrder = binary.BigEndian
				return o.unmarshal(ad)
			})
			return &o, ad.Err()
		}

All implementations are quite similar, if not the same. Essentially, nftables object contains expressions embedded inside of a struct. This is seen in the original nftables implementation (nft_object_attributes holds NFTA_OBJ_DATA which is a nested nftables expression):

/**
 * enum nft_object_attributes - nf_tables stateful object netlink attributes
 *
 * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING)
 * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING)
 * @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
 * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
 * @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
 * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
 * @NFTA_OBJ_USERDATA: user data (NLA_BINARY)
 */
enum nft_object_attributes {
	NFTA_OBJ_UNSPEC,
	NFTA_OBJ_TABLE,
	NFTA_OBJ_NAME,
	NFTA_OBJ_TYPE,
	NFTA_OBJ_DATA,
	NFTA_OBJ_USE,
	NFTA_OBJ_HANDLE,
	NFTA_OBJ_PAD,
	NFTA_OBJ_USERDATA,
	__NFTA_OBJ_MAX
};

Proposal

Current implementation works well, but I believe it can be made more generic by refactoring the obj.go code where the ObjectAttributes holds:

  • table
  • table family
  • expr.Any struct

The implementation for fetching table and table family is the same for each object, as seen here:

  • nftables/counter.go

    Lines 44 to 50 in 33ee8df

    func (c *CounterObj) table() *Table {
    return c.Table
    }
    func (c *CounterObj) family() TableFamily {
    return c.Table.Family
    }
  • nftables/quota.go

    Lines 74 to 80 in 33ee8df

    func (q *QuotaObj) table() *Table {
    return q.Table
    }
    func (q *QuotaObj) family() TableFamily {
    return q.Table.Family
    }

Since expr.Any struct marshaling is implemented under expr/expr.go with implementations for each struct in their corresponding files (expr/counter.go for counter, expr/quota.go for quotas, etc.), the generic object would perform the same marshaling logic via objFromMsg
as it does now for table, name and type, but serialization of NFTA_OBJ_DATA would be handed to the corresponding serializer in expr/expr.go.

With this change, implementation would benefit from immediate support of all object types and there would be no need to implement each object type separately by duplicating already existing code.

The only problem with this refactor is a major change of the current codebase where the serialization of NFTA_EXPR_DATA in all expressions supporting objects should be "decoupled" from the current marshal implementation. Observe the following code:

func (e *Counter) marshal(fam byte) ([]byte, error) {
	data, err := netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_COUNTER_BYTES, Data: binaryutil.BigEndian.PutUint64(e.Bytes)},
		{Type: unix.NFTA_COUNTER_PACKETS, Data: binaryutil.BigEndian.PutUint64(e.Packets)},
	})
	if err != nil {
		return nil, err
	}

	return netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_EXPR_NAME, Data: []byte("counter\x00")},
		{Type: unix.NLA_F_NESTED | unix.NFTA_EXPR_DATA, Data: data},
	})
}

The initial call to netlink.MarshalAttributes returns the data that we would like to reuse in CounterObj:

func (c *CounterObj) marshal(data bool) ([]byte, error) {
	obj, err := netlink.MarshalAttributes([]netlink.Attribute{	// this call is exactly the same as in `Counter`
		{Type: unix.NFTA_COUNTER_BYTES, Data: binaryutil.BigEndian.PutUint64(c.Bytes)},
		{Type: unix.NFTA_COUNTER_PACKETS, Data: binaryutil.BigEndian.PutUint64(c.Packets)},
	})
	if err != nil {
		return nil, err
	}
	...
}

In order to perform the previously mentioned refactor idea, all expressions should have the ability to create just the data bytes and return them without the name part.

I'll try to draft what I said previously in Go code.

Some basic changes:

type Obj interface {
	table()  *Table
	family() TableFamily
	// no marshal and unmarshal since these are handed over to the modified implementation in expr package
	data()   *expr.Any  // object data is defined here
	name()	 string     // object name is defined here
	type()   uint32	    // object type is defined here
}

type ObjAttr struct {
	Table   *Table
	Name    string
	ObjType uint32	   // this can also be an enum ...
	Obj     *expr.Any
}

func (o *ObjAttr) table() *Table {
	return o.Table
}

func (o *ObjAttr) family() TableFamily {
	return o.Table.Family
}

func (o *ObjAttr) data() *expr.Any {
	return o.Obj
}

func (o *ObjAttr) name() string {
	// if we introduce `objectType -> typeName` mapping, we can do that through that mapping as well
	return o.Name
}

func (o *ObjAttr) type() uint32 {
	return o.ObjType
}

The unmarshaling part:

func objFromMsg(msg netlink.Message) (Obj, error) {
	...
	for ad.Next() {
		switch ad.Type() {
		case unix.NFTA_OBJ_TABLE:
			table = &Table{Name: ad.String(), Family: TableFamily(msg.Data[0])}
		case unix.NFTA_OBJ_NAME:
			name = ad.String()
		case unix.NFTA_OBJ_TYPE:
			objectType = ad.Uint32()
		case unix.NFTA_OBJ_DATA:
			o := ObjAttr{
				Table:   table,
				Name:    name,
				ObjType: objectType,
			}

			ad.Do(func(b []byte) error {
				// my guess is that ParseExprMsgFunc would be the calling function used from the nftables package ...
				// we could technically add the `EXPR_DATA_NAME` message here in order for the serializer to
				// know which expression this is, otherwise the `exprsFromBytes` would need to add an
				// additional way of recognizing which expression is being parsed via `objectType` attribute ...
				// in any case we do not have a "name" at this point in code, but we could map it via `objectType -> typeName`
				objs, err := parseexprfunc.ParseExprMsgFunc(o.family(), b)
				if err != nil {
					return err
				}
				o.Obj = objs[1]
			})
			return &o, ad.Err()
		}
	}
	...
}

The marshaling part:

func (cc *Conn) AddObj(o Obj) Obj {
	cc.mu.Lock()
	defer cc.mu.Unlock()
	data, err := expr.Marshal(o.family(), o.data(), true)  // this is expr package specific marshaling, dataOnly is true
	if err != nil {
		cc.setErr(err)
		return nil
	}

	const NFT_OBJECT_COUNTER = 1 // TODO: get into x/sys/unix
	attrs := []netlink.Attribute{
		{Type: unix.NFTA_OBJ_TABLE, Data: []byte(o.table().Name + "\x00")},
		{Type: unix.NFTA_OBJ_NAME, Data: []byte(o.name() + "\x00")},
		{Type: unix.NFTA_OBJ_TYPE, Data: binaryutil.BigEndian.PutUint32(o.type())},
	}
	if data {
		attrs = append(attrs, netlink.Attribute{Type: unix.NLA_F_NESTED | unix.NFTA_OBJ_DATA, Data: data})
	}

	cc.messages = append(cc.messages, netlink.Message{
		Header: netlink.Header{
			Type:  netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWOBJ),
			Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
		},
		Data: append(extraHeader(uint8(o.family()), 0), attrs...),
	})
	return o
}

The marshaling part in expr package (seen as o.data().marshal(o.family(), true) in above code) would need to return only the NFTA_EXPR_DATA bytes and we could achieve this with an additional dataOnly parameter introduced to marshal function signature. In case this parameter is set, we would return only the NFTA_EXPR_DATA bytes, without NFTA_EXPR_NAME attribute, i.e.:

func (e *Counter) marshal(fam byte, dataOnly bool) ([]byte, error) {
	data, err := netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_COUNTER_BYTES, Data: binaryutil.BigEndian.PutUint64(e.Bytes)},
		{Type: unix.NFTA_COUNTER_PACKETS, Data: binaryutil.BigEndian.PutUint64(e.Packets)},
	})
	if err != nil {
		return nil, err
	}

	if dataOnly {
		return data
	}

	return netlink.MarshalAttributes([]netlink.Attribute{
		{Type: unix.NFTA_EXPR_NAME, Data: []byte("counter\x00")},
		{Type: unix.NLA_F_NESTED | unix.NFTA_EXPR_DATA, Data: data},
	})
}

Note that the above code was written by heart so mistakes are very much possible. I hope that I didn't miss anything.

Let me know what you think of this approach. If agreed, I can give it a try but I'm not sure when exactly this will be delivered.

Hey, thanks for flagging the duplication and sketching out how it could be addressed.

From what I can tell, this looks reasonable. The user-visible API won’t need to be changed, so as long as the tests keep passing, this should be a reasonable change to make.