Swap out the normal upload in Milvus (
|
def write_embeddings_to_milvus(upsert_list, vector_db_metadata): |
|
connections.connect("default", |
|
uri = vector_db_metadata.environment, |
|
token = os.getenv('VECTOR_DB_KEY') |
|
) |
|
|
|
collection = Collection(vector_db_metadata.index_name) |
|
if not collection: |
|
logging.error(f"Index {vector_db_metadata.index_name} does not exist in environment {vector_db_metadata.environment}") |
|
return None |
|
|
|
logging.info(f"Starting Milvus insert for {len(upsert_list)} vectors") |
|
batch_size = config.PINECONE_BATCH_SIZE |
|
vectors_uploaded = 0 |
|
|
|
for i in range(0,len(upsert_list), batch_size): |
|
try: |
|
insert_response = collection.insert(upsert_list[i:i+batch_size]) |
|
vectors_uploaded += insert_response.insert_count |
|
except Exception as e: |
|
logging.error('Error writing embeddings to milvus:', e) |
|
return None |
|
|
|
logging.info(f"Successfully uploaded {vectors_uploaded} vectors to milvus") |
|
return vectors_uploaded |
) for the batch update for faster upload.
This sample code should provide guidance - https://github.com/milvus-io/pymilvus/blob/master/examples/example_bulkinsert_json.py