dicarlolab/archconvnets

Replicating the HvM performance of the Zeiler DNN

Closed this issue · 1 comments

@ardila I'm having some trouble replicating good performance on the HvM images using the archconvnet implementation of the Zeiler Fergus DNN. Have you tried this? Maybe I've done something wrong? Here is a bit of code to grab these from the database and compute some performance (you could swap out another method for computing performance if you'd like to stick to the HvM code base and not mess with kanalyze).

Notice the try/except loop I threw in to get around some funky behavior in feature loading. Would be curious if you also experience this.

def get_zf_hvm_features_by_layer(layer, data, image_inds=None):
    import bson
    model_id = bson.ObjectId('534c1ec6c626a8063eae8acd')
    preproc = {'normalize': False, 'dtype': 'float32', 'crop': [0, 0, 256, 256], 'mode': 'RGB',
               'resize_to': [256, 256]}
    feature_layers = [layer, ]  # some choices: conv3,conv4,conv5,pool5,fc6,fc7

    raw_features = data.get_features(preproc, model_id, feature_layers)

    if image_inds is None:
        return raw_features
    else:
        return raw_features[image_inds]


def test_get_zf_hvm_features_by_layer(layer_key='fc6', variation='V6'):
    import os
    from kanalyze import protocol
    from dldata.stimulus_sets import hvm as neural_datasets

    number_of_features = 80
    protocol_class = protocol.ProtocolLinearSVMNoBoatsLeaveOneOutRandomSplits
    protocol_class.number_of_repeats = 1

    data = neural_datasets.HvMWithDiscfade()
    data_base_filenames = [os.path.splitext(os.path.split(m_value['filename'])[1])[0] for m_value in data.meta]

    p = protocol_class(data, (variation,))

    # get the images specified by the protocol:
    image_inds = []
    for valid_image in p.meta:
        base_filename = os.path.splitext(os.path.split(valid_image['filename'])[1])[0]
        assert data_base_filenames.count(base_filename) == 1
        image_inds.append(data_base_filenames.index(base_filename))

    print 'layer_key', layer_key
    retry_done = False
    for retry_num in range(20):
        if retry_done:
            continue
        try:
            all_features = get_zf_hvm_features_by_layer(layer_key, data, image_inds=image_inds)
            #all_features = all_features[image_inds]
            retry_done = True
        except IndexError:
            print 'Caught IndexError, trying again...'
    assert retry_done, 'Failed to load features.'

    print all_features.shape
    rind = np.random.permutation(all_features.shape[1])
    features = all_features[:, rind[:number_of_features]]

    features = np.array(features, dtype=np.float64)
    r = p.run_on_features(features)
    print r

    all_features = data.machine_features('Zeiler2013_10Crops')[image_inds, :]
    print all_features.shape
    rind = np.random.permutation(all_features.shape[1])
    features = all_features[:, rind[:number_of_features]]

    features = np.array(features, dtype=np.float64)
    r = p.run_on_features(features)
    print r

Was this issue solved by the numpy version number?