dgarnitz/vectorflow

Add Batch Write into Milvus

dgarnitz opened this issue · 4 comments

Swap out the normal upload in Milvus (

def write_embeddings_to_milvus(upsert_list, vector_db_metadata):
connections.connect("default",
uri = vector_db_metadata.environment,
token = os.getenv('VECTOR_DB_KEY')
)
collection = Collection(vector_db_metadata.index_name)
if not collection:
logging.error(f"Index {vector_db_metadata.index_name} does not exist in environment {vector_db_metadata.environment}")
return None
logging.info(f"Starting Milvus insert for {len(upsert_list)} vectors")
batch_size = config.PINECONE_BATCH_SIZE
vectors_uploaded = 0
for i in range(0,len(upsert_list), batch_size):
try:
insert_response = collection.insert(upsert_list[i:i+batch_size])
vectors_uploaded += insert_response.insert_count
except Exception as e:
logging.error('Error writing embeddings to milvus:', e)
return None
logging.info(f"Successfully uploaded {vectors_uploaded} vectors to milvus")
return vectors_uploaded
) for the batch update for faster upload.

This sample code should provide guidance - https://github.com/milvus-io/pymilvus/blob/master/examples/example_bulkinsert_json.py