VertaAI/modeldb

requests.exceptions.HTTPError: 501 Server Error: Method ai.verta.modeldb.DatasetVersionService/getUrlForDatasetBlobVersioned is unimplemented.

irtiza11 opened this issue · 2 comments

I am trying to run dataset version scripts.
Got this error.

connection successfully established
got existing Dataset: Census Income Local
created new Dataset Version: 1 for Census Income Local
Traceback (most recent call last):
File "/home/irtiza/catkin_ws/devel/lib/ubica_retinanet_detector/modeldb_dataset_versioning", line 15, in
exec(compile(fh.read(), python_script, 'exec'), context)
File "/home/irtiza/catkin_ws/src/ubica_learning/ubica_retinanet_detector/scripts/modeldb_dataset_versioning", line 122, in
main()
File "/home/irtiza/catkin_ws/src/ubica_learning/ubica_retinanet_detector/scripts/modeldb_dataset_versioning", line 72, in main
dataset_version = dataset.create_version(Path("census-train.csv", enable_mdb_versioning=True))
File "/home/irtiza/.local/lib/python3.8/site-packages/verta/dataset/entities/_dataset.py", line 345, in create_version
return DatasetVersion._create(
File "/home/irtiza/.local/lib/python3.8/site-packages/verta/dataset/entities/_dataset_version.py", line 128, in _create
obj._upload_artifact(component.path, f) # pylint: disable=no-member
File "/home/irtiza/.local/lib/python3.8/site-packages/verta/dataset/entities/_dataset_version.py", line 519, in _upload_artifact
url_for_artifact = self._get_url_for_artifact(dataset_component_path, "PUT", part_num=1)
File "/home/irtiza/.local/lib/python3.8/site-packages/verta/dataset/entities/_dataset_version.py", line 485, in _get_url_for_artifact
_utils.raise_for_http_error(response)
File "/home/irtiza/.local/lib/python3.8/site-packages/verta/_internal_utils/_utils.py", line 681, in raise_for_http_error
six.raise_from(requests.HTTPError(message, response=response), None)
File "", line 3, in raise_from
requests.exceptions.HTTPError: 501 Server Error: Method ai.verta.modeldb.DatasetVersionService/getUrlForDatasetBlobVersioned is unimplemented for url: http://localhost:3000/api/v1/modeldb/dataset-version/dataset/e8ba9c41-a916-4df6-abd3-e1e0dd30cbd1/datasetVersion/0696d023-cecd-42b1-a13d-94c5868c679a/getUrlForDatasetBlobVersioned at 2021-08-04 12:11:34.988000 UTC

# from __future__ import print_function

import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

from datetime import datetime
import itertools
import os
import time

import numpy as np
import pandas as pd

import sklearn
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
from verta.dataset import Path, S3

try:
    import verta
except ImportError:
    print("Verta import failed")

try:
    import wget
except ImportError:
    print("wget import error, install with 'python3.8 -m pip install wget'")

from verta import Client
from verta.utils import ModelAPI
import os

def download_starter_dataset(bucket_name, DATASET_PATH, train_data_filename, test_data_filename):

    if not os.path.exists(DATASET_PATH + "census-train.csv"):
        train_data_url = "http://s3.amazonaws.com/" + bucket_name + "/census-train.csv"
        if not os.path.isfile(train_data_filename):
            wget.download(train_data_url)

    if not os.path.exists(DATASET_PATH + "census-test.csv"):
        test_data_url = "http://s3.amazonaws.com/" + bucket_name + "/census-test.csv"
        if not os.path.isfile(test_data_filename):
            wget.download(test_data_url)

def main(args=None):

    # ModelDB things
    HOST = "http://localhost:3000"


    client = Client(HOST)

    dataset = client.set_dataset(name="Census Income Local")

    DATASET_PATH = "./"

    train_data_filename = DATASET_PATH + "census-train.csv"
    test_data_filename = DATASET_PATH + "census-test.csv"

    download_starter_dataset("verta-starter", DATASET_PATH,train_data_filename,test_data_filename)

    dataset_version = dataset.create_version(Path("census-train.csv"))

    #print(dataset, dataset_version)

    DATASET_PATH = "./"

    train_data_filename = DATASET_PATH + "census-train.csv"
    test_data_filename = DATASET_PATH + "census-test.csv"

    df_train = pd.read_csv(train_data_filename)
    X_train = df_train.iloc[:, :-1]
    Y_train = df_train.iloc[:, -1]

    df_train.head()

    # create object to track experiment run
    run = client.set_experiment_run()

    # log training data
    run.log_dataset_version("train", dataset_version)

    # ---------------------- other tracking below ------------------------

    # create validation split
    (X_val_train, X_val_test,
     Y_val_train, Y_val_test) = model_selection.train_test_split(X_train, Y_train,
                                                                 test_size=0.2,
                                                                 shuffle=True)
    # log hyperparameters
    hyperparams = {"C": 10}
    run.log_hyperparameters(hyperparams)
    print(hyperparams, end=' ')

    # create and train model
    model = linear_model.LogisticRegression(**hyperparams)
    model.fit(X_train, Y_train)

    # calculate and log validation accuracy
    val_acc = model.score(X_val_test, Y_val_test)
    run.log_metric("val_acc", val_acc)
    print("Validation accuracy: {:.4f}".format(val_acc))

    # Log the model as artifact
    run.log_model(model)

    # fetch the dataset version info
    run.get_dataset_version("train")


if __name__ == '__main__':
    main()

Hello @irtiza11,

This functionality (managed dataset versioning) is not currently available in our open-source platform. If you are interested in this feature, you can contact us for information about our SaaS offering!

https://www.verta.ai/contact/