PGVector does not work with HuggingFaceEmbeddings
vcahlik opened this issue · 19 comments
PGVector works fine for me when coupled with OpenAIEmbeddings. However, when I try to use HuggingFaceEmbeddings, I get the following error: StatementError: (builtins.ValueError) expected 1536 dimensions, not 768
Example code:
from langchain.vectorstores.pgvector import PGVector
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
import os
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
texts = TextLoader('data/made-up-story.txt').load()
documents = CharacterTextSplitter(chunk_size=500, chunk_overlap=20).split_documents(texts)
CONNECTION_STRING = PGVector.connection_string_from_db_params(
driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
host=os.environ.get("PGVECTOR_HOST", "localhost"),
port=int(os.environ.get("PGVECTOR_PORT", "5432")),
database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
user=os.environ.get("PGVECTOR_USER", "postgres"),
password=os.environ.get("PGVECTOR_PASSWORD", "postgres"),
)
db = PGVector.from_documents(
embedding=embeddings,
documents=documents,
collection_name="test",
connection_string=CONNECTION_STRING,
)
Output:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1702 in │
│ _execute_context │
│ │
│ 1699 │ │ │ if conn is None: │
│ 1700 │ │ │ │ conn = self._revalidate_connection() │
│ 1701 │ │ │ │
│ ❱ 1702 │ │ │ context = constructor( │
│ 1703 │ │ │ │ dialect, self, conn, execution_options, *args, **kw │
│ 1704 │ │ │ ) │
│ 1705 │ │ except (exc.PendingRollbackError, exc.ResourceClosedError): │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1078 in │
│ _init_compiled │
│ │
│ 1075 │ │ │ │ │ │ for key in compiled_params │
│ 1076 │ │ │ │ │ } │
│ 1077 │ │ │ │ else: │
│ ❱ 1078 │ │ │ │ │ param = { │
│ 1079 │ │ │ │ │ │ key: processors[key](compiled_params[key]) │
│ 1080 │ │ │ │ │ │ if key in processors │
│ 1081 │ │ │ │ │ │ else compiled_params[key] │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1079 in │
│ <dictcomp> │
│ │
│ 1076 │ │ │ │ │ } │
│ 1077 │ │ │ │ else: │
│ 1078 │ │ │ │ │ param = { │
│ ❱ 1079 │ │ │ │ │ │ key: processors[key](compiled_params[key]) │
│ 1080 │ │ │ │ │ │ if key in processors │
│ 1081 │ │ │ │ │ │ else compiled_params[key] │
│ 1082 │ │ │ │ │ │ for key in compiled_params │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/pgvector/sqlalchemy/__init__.py:21 in │
│ process │
│ │
│ 18 │ │
│ 19 │ def bind_processor(self, dialect): │
│ 20 │ │ def process(value): │
│ ❱ 21 │ │ │ return to_db(value, self.dim) │
│ 22 │ │ return process │
│ 23 │ │
│ 24 │ def result_processor(self, dialect, coltype): │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/pgvector/utils/__init__.py:35 in to_db │
│ │
│ 32 │ │ value = value.tolist() │
│ 33 │ │
│ 34 │ if dim is not None and len(value) != dim: │
│ ❱ 35 │ │ raise ValueError('expected %d dimensions, not %d' % (dim, len(value))) │
│ 36 │ │
│ 37 │ return '[' + ','.join([str(float(v)) for v in value]) + ']' │
│ 38 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: expected 1536 dimensions, not 768
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /tmp/ipykernel_81963/141995419.py:21 in <cell line: 21> │
│ │
│ [Errno 2] No such file or directory: '/tmp/ipykernel_81963/141995419.py' │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/langchain/vectorstores/pgvector.py:420 in │
│ from_documents │
│ │
│ 417 │ │ │
│ 418 │ │ kwargs["connection_string"] = connection_string │
│ 419 │ │ │
│ ❱ 420 │ │ return cls.from_texts( │
│ 421 │ │ │ texts=texts, │
│ 422 │ │ │ pre_delete_collection=pre_delete_collection, │
│ 423 │ │ │ embedding=embedding, │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/langchain/vectorstores/pgvector.py:376 in │
│ from_texts │
│ │
│ 373 │ │ │ pre_delete_collection=pre_delete_collection, │
│ 374 │ │ ) │
│ 375 │ │ │
│ ❱ 376 │ │ store.add_texts(texts=texts, metadatas=metadatas, ids=ids, **kwargs) │
│ 377 │ │ return store │
│ 378 │ │
│ 379 │ @classmethod │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/langchain/vectorstores/pgvector.py:228 in │
│ add_texts │
│ │
│ 225 │ │ │ │ ) │
│ 226 │ │ │ │ collection.embeddings.append(embedding_store) │
│ 227 │ │ │ │ session.add(embedding_store) │
│ ❱ 228 │ │ │ session.commit() │
│ 229 │ │ │
│ 230 │ │ return ids │
│ 231 │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:1428 in commit │
│ │
│ 1425 │ │ │ if not self._autobegin(): │
│ 1426 │ │ │ │ raise sa_exc.InvalidRequestError("No transaction is begun.") │
│ 1427 │ │ │
│ ❱ 1428 │ │ self._transaction.commit(_to_root=self.future) │
│ 1429 │ │
│ 1430 │ def prepare(self): │
│ 1431 │ │ """Prepare the current transaction in progress for two phase commit. │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:829 in commit │
│ │
│ 826 │ def commit(self, _to_root=False): │
│ 827 │ │ self._assert_active(prepared_ok=True) │
│ 828 │ │ if self._state is not PREPARED: │
│ ❱ 829 │ │ │ self._prepare_impl() │
│ 830 │ │ │
│ 831 │ │ if self._parent is None or self.nested: │
│ 832 │ │ │ for conn, trans, should_commit, autoclose in set( │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:808 in │
│ _prepare_impl │
│ │
│ 805 │ │ │ for _flush_guard in range(100): │
│ 806 │ │ │ │ if self.session._is_clean(): │
│ 807 │ │ │ │ │ break │
│ ❱ 808 │ │ │ │ self.session.flush() │
│ 809 │ │ │ else: │
│ 810 │ │ │ │ raise exc.FlushError( │
│ 811 │ │ │ │ │ "Over 100 subsequent flushes have occurred within " │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3345 in flush │
│ │
│ 3342 │ │ │ return │
│ 3343 │ │ try: │
│ 3344 │ │ │ self._flushing = True │
│ ❱ 3345 │ │ │ self._flush(objects) │
│ 3346 │ │ finally: │
│ 3347 │ │ │ self._flushing = False │
│ 3348 │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3485 in _flush │
│ │
│ 3482 │ │ │
│ 3483 │ │ except: │
│ 3484 │ │ │ with util.safe_reraise(): │
│ ❱ 3485 │ │ │ │ transaction.rollback(_capture_exception=True) │
│ 3486 │ │
│ 3487 │ def bulk_save_objects( │
│ 3488 │ │ self, │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py:70 in │
│ __exit__ │
│ │
│ 67 │ │ │ exc_type, exc_value, exc_tb = self._exc_info │
│ 68 │ │ │ self._exc_info = None # remove potential circular references │
│ 69 │ │ │ if not self.warn_only: │
│ ❱ 70 │ │ │ │ compat.raise_( │
│ 71 │ │ │ │ │ exc_value, │
│ 72 │ │ │ │ │ with_traceback=exc_tb, │
│ 73 │ │ │ │ ) │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/util/compat.py:207 in raise_ │
│ │
│ 204 │ │ │ exception.__cause__ = replace_context │
│ 205 │ │ │
│ 206 │ │ try: │
│ ❱ 207 │ │ │ raise exception │
│ 208 │ │ finally: │
│ 209 │ │ │ # credit to │
│ 210 │ │ │ # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/ │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/session.py:3445 in _flush │
│ │
│ 3442 │ │ try: │
│ 3443 │ │ │ self._warn_on_events = True │
│ 3444 │ │ │ try: │
│ ❱ 3445 │ │ │ │ flush_context.execute() │
│ 3446 │ │ │ finally: │
│ 3447 │ │ │ │ self._warn_on_events = False │
│ 3448 │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:456 in execute │
│ │
│ 453 │ │ │ │ │ n.execute_aggregate(self, set_) │
│ 454 │ │ else: │
│ 455 │ │ │ for rec in topological.sort(self.dependencies, postsort_actions): │
│ ❱ 456 │ │ │ │ rec.execute(self) │
│ 457 │ │
│ 458 │ def finalize_flush_changes(self): │
│ 459 │ │ """Mark processed objects as clean / deleted after a successful │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/unitofwork.py:630 in execute │
│ │
│ 627 │ │
│ 628 │ @util.preload_module("sqlalchemy.orm.persistence") │
│ 629 │ def execute(self, uow): │
│ ❱ 630 │ │ util.preloaded.orm_persistence.save_obj( │
│ 631 │ │ │ self.mapper, │
│ 632 │ │ │ uow.states_for_mapper_hierarchy(self.mapper, False, False), │
│ 633 │ │ │ uow, │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:244 in │
│ save_obj │
│ │
│ 241 │ │ │ update, │
│ 242 │ │ ) │
│ 243 │ │ │
│ ❱ 244 │ │ _emit_insert_statements( │
│ 245 │ │ │ base_mapper, │
│ 246 │ │ │ uowtransaction, │
│ 247 │ │ │ mapper, │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/orm/persistence.py:1155 in │
│ _emit_insert_statements │
│ │
│ 1152 │ │ │ if do_executemany: │
│ 1153 │ │ │ │ multiparams = [rec[2] for rec in records] │
│ 1154 │ │ │ │ │
│ ❱ 1155 │ │ │ │ c = connection._execute_20( │
│ 1156 │ │ │ │ │ statement, multiparams, execution_options=execution_options │
│ 1157 │ │ │ │ ) │
│ 1158 │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1614 in │
│ _execute_20 │
│ │
│ 1611 │ │ │ │ exc.ObjectNotExecutableError(statement), replace_context=err │
│ 1612 │ │ │ ) │
│ 1613 │ │ else: │
│ ❱ 1614 │ │ │ return meth(self, args_10style, kwargs_10style, execution_options) │
│ 1615 │ │
│ 1616 │ def exec_driver_sql( │
│ 1617 │ │ self, statement, parameters=None, execution_options=None │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/sql/elements.py:325 in │
│ _execute_on_connection │
│ │
│ 322 │ │ self, connection, multiparams, params, execution_options, _force=False │
│ 323 │ ): │
│ 324 │ │ if _force or self.supports_execution: │
│ ❱ 325 │ │ │ return connection._execute_clauseelement( │
│ 326 │ │ │ │ self, multiparams, params, execution_options │
│ 327 │ │ │ ) │
│ 328 │ │ else: │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1481 in │
│ _execute_clauseelement │
│ │
│ 1478 │ │ │ schema_translate_map=schema_translate_map, │
│ 1479 │ │ │ linting=self.dialect.compiler_linting | compiler.WARN_LINTING, │
│ 1480 │ │ ) │
│ ❱ 1481 │ │ ret = self._execute_context( │
│ 1482 │ │ │ dialect, │
│ 1483 │ │ │ dialect.execution_ctx_cls._init_compiled, │
│ 1484 │ │ │ compiled_sql, │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1708 in │
│ _execute_context │
│ │
│ 1705 │ │ except (exc.PendingRollbackError, exc.ResourceClosedError): │
│ 1706 │ │ │ raise │
│ 1707 │ │ except BaseException as e: │
│ ❱ 1708 │ │ │ self._handle_dbapi_exception( │
│ 1709 │ │ │ │ e, util.text_type(statement), parameters, None, None │
│ 1710 │ │ │ ) │
│ 1711 │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:2026 in │
│ _handle_dbapi_exception │
│ │
│ 2023 │ │ │ if newraise: │
│ 2024 │ │ │ │ util.raise_(newraise, with_traceback=exc_info[2], from_=e) │
│ 2025 │ │ │ elif should_wrap: │
│ ❱ 2026 │ │ │ │ util.raise_( │
│ 2027 │ │ │ │ │ sqlalchemy_exception, with_traceback=exc_info[2], from_=e │
│ 2028 │ │ │ │ ) │
│ 2029 │ │ │ else: │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/util/compat.py:207 in raise_ │
│ │
│ 204 │ │ │ exception.__cause__ = replace_context │
│ 205 │ │ │
│ 206 │ │ try: │
│ ❱ 207 │ │ │ raise exception │
│ 208 │ │ finally: │
│ 209 │ │ │ # credit to │
│ 210 │ │ │ # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/ │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/base.py:1702 in │
│ _execute_context │
│ │
│ 1699 │ │ │ if conn is None: │
│ 1700 │ │ │ │ conn = self._revalidate_connection() │
│ 1701 │ │ │ │
│ ❱ 1702 │ │ │ context = constructor( │
│ 1703 │ │ │ │ dialect, self, conn, execution_options, *args, **kw │
│ 1704 │ │ │ ) │
│ 1705 │ │ except (exc.PendingRollbackError, exc.ResourceClosedError): │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1078 in │
│ _init_compiled │
│ │
│ 1075 │ │ │ │ │ │ for key in compiled_params │
│ 1076 │ │ │ │ │ } │
│ 1077 │ │ │ │ else: │
│ ❱ 1078 │ │ │ │ │ param = { │
│ 1079 │ │ │ │ │ │ key: processors[key](compiled_params[key]) │
│ 1080 │ │ │ │ │ │ if key in processors │
│ 1081 │ │ │ │ │ │ else compiled_params[key] │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/sqlalchemy/engine/default.py:1079 in │
│ <dictcomp> │
│ │
│ 1076 │ │ │ │ │ } │
│ 1077 │ │ │ │ else: │
│ 1078 │ │ │ │ │ param = { │
│ ❱ 1079 │ │ │ │ │ │ key: processors[key](compiled_params[key]) │
│ 1080 │ │ │ │ │ │ if key in processors │
│ 1081 │ │ │ │ │ │ else compiled_params[key] │
│ 1082 │ │ │ │ │ │ for key in compiled_params │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/pgvector/sqlalchemy/__init__.py:21 in │
│ process │
│ │
│ 18 │ │
│ 19 │ def bind_processor(self, dialect): │
│ 20 │ │ def process(value): │
│ ❱ 21 │ │ │ return to_db(value, self.dim) │
│ 22 │ │ return process │
│ 23 │ │
│ 24 │ def result_processor(self, dialect, coltype): │
│ │
│ /home/vojta/.miniconda3/lib/python3.9/site-packages/pgvector/utils/__init__.py:35 in to_db │
│ │
│ 32 │ │ value = value.tolist() │
│ 33 │ │
│ 34 │ if dim is not None and len(value) != dim: │
│ ❱ 35 │ │ raise ValueError('expected %d dimensions, not %d' % (dim, len(value))) │
│ 36 │ │
│ 37 │ return '[' + ','.join([str(float(v)) for v in value]) + ']' │
│ 38 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
StatementError: (builtins.ValueError) expected 1536 dimensions, not 768
[SQL: INSERT INTO langchain_pg_embedding (uuid, collection_id, embedding, document, cmetadata, custom_id) VALUES
(%(uuid)s, %(collection_id)s, %(embedding)s, %(document)s, %(cmetadata)s, %(custom_id)s)]
[parameters: [{'embedding': [0.10074684768915176, 0.00936161819845438, 0.01689612865447998, 0.00424081739038229,
0.01892073266208172, 0.02156691998243332, -0.00793 ... (174655 characters truncated) ...
UUID('65a530b7-bcd4-47a2-a2df-e22fb3c353d2'), 'custom_id': '8daf3193-cf95-11ed-aea8-482ae319f16c', 'cmetadata':
{'source': 'data/made-up-story.txt'}}]]
This issue happens because the vector size is hardcoded in the constant ADA_TOKEN_COUNT = 1536 on pgvector.py, so it creates the column in the table with a vector of that size and it's not compatible with Instructor vector size (768).
It would be ideal if we could pass the vector size as a parameter or have it automagically obtained from some property in the embedding class.
Same here.
I am working after some modifications:
- I updated the
ADA_TOKEN_COUNT = 768
in local (site-packages): https://github.com/hwchase17/langchain/blob/199cb855eaf9cc7a2c3e671e96c59a8ea4d80dc8/langchain/vectorstores/pgvector.py#L22 - I updated the type column on my database:
- Enter on posgresql:
psql postgres
- List databases:
postgres=# \l
- Change to database:
postgres=# \c postgres
alter table langchain_pg_embedding alter column embedding type vector(768);
thank you for pointing that out @bukosabino . Definitely need a flexible solution here
could a environment variable be a solution for this?
i could create PR for this if this would help?
pro:
- fast solution
- easy to implement
con:
- no auto detection of vector length
- no different length pg vector stores in one program possible
@woodworker that would be better than the current implementation, but passing it as an argument somewhere would be
def better
i tried to find a parameter based solution but the EmbeddingStore
in pgvector.py is pretty hardwired into all the sqlalchemy
https://github.com/hwchase17/langchain/blob/master/langchain/vectorstores/pgvector.py#L70-L87
ill create a PR for the env based solution and maybe someone (maybe that someone is even me in a later time) will find a better parameterized solution
waiting for #4203 to make it through
It seems @hwchase17 committed recently #7355 a fix to make it None some time ago.
My Idea would be to create the BaseModel classes within a function itself, and call it in ctor, and apply to self. For example:
def make_models(dims: Optional[int] = None):
class EmbeddingStore(BaseModel):
...
embedding: Vector = sqlalchemy.Column(Vector(dims))
return {
'EmbeddingStore': EmbeddingStore
}
class PGVector(VectorStore):
def __init__(self, dims: Optional[int] = None):
models = make_models(dims)
self.EmbeddingStore = models['EmbeddingStore']
Or something like this.
I ran into the same issue. When will this be fixed? @bukosabino's approach works but we need a permanent fix
Is this issue fixed? Looks like the code has changed and now the pgvector.py files don't have the 'ADA_TOKEN_COUNT ' variable. It's difficult now to explicitly set the value of this to some other dimension as the current one only works for OpenAI embeddings. What if we want to use sentence-transformer models? Some of these models have dimensions of 768 only. Can we have a fix that can explicitly help us set the value of the embedding dimensions based on the model chosen or can this be dynamically set?
This problem is still existing. Is there any chance for permanent fix?
I was able to use hugging face embeddings using latest langchain. I did initially encounter this issue as when I created a new collection it did not create a new table, and my prior collection used an embedding model with a diff vector dim size so I had a similar error. I simply created a new database so that the new langchain_pg_embedding table would be created with an embedding column of the correct dims.
Same issue here, with latest langchain version 0.0.333
. Index created in ChromaDB by LLM mode "google/flan-t5-xxl". When reusing index from persited ChromaDB, have tried embeddings with:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
none of works. All throw chromadb.errors.InvalidDimensionException: Embedding dimension 768 does not match collection dimensionality 1536 error.
Change to
embeddings = OpenAIEmbeddings()
or
embeddings = HuggingFaceEmbeddings(model_name="sangmini/msmarco-cotmae-MiniLM-L12_en-ko-ja")
then works as expected.
I believe those two models are 1536 dimensions.
Links:
The hugging face one: https://huggingface.co/sangmini/msmarco-cotmae-MiniLM-L12_en-ko-ja
The open ai one: https://openai.com/blog/new-and-improved-embedding-model
Not sure how langchain's integration with chromadb works, but for pgvector the issue arose from all collection sutilizing the same table + column to store embeddings, which means the vectors can only be one size.
I have been setting the env var PGVECTOR_VECTOR_SIZE
which seems to work ok - look up the vector_size
for the model you are using first. Make sure to create the table with the right vector size column otherwise you need to delete it and recreate.
os.environ["PGVECTOR_VECTOR_SIZE"] = str(vector_size)
vectorstore = PGVector(connection_string=CONNECTION_STRING,
embedding_function=embeddings,
collection_name=vector_name,
#pre_delete_collection=True # for testing purposes
)
@MarkEdmondson1234 how are you creating the table
While trying to adapt this example https://docs.llamaindex.ai/en/latest/examples/vector_stores/postgres.html# to use a local embedding model, I was able to solve this issue by:
- Deleting the database with the stored index (since it likely has data in incorrect dimensions due to previous settings) and recreating it
vector_size=768
vector_store = PGVectorStore.from_params(
database=db_name,
host="localhost",
password="mock",
port=5432,
user="mock",
table_name="test",
embed_dim=vector_size, # Ensure this matches your model's output dimensions
)
where I need to define PGVECTOR_VECTOR_SIZE variable after setting up in .env file?
os.environ["PGVECTOR_VECTOR_SIZE"] = str(vector_size)
Works for me. Go through the below code, I've implemented pgvector using the SupaBaseVectorStore
import os
Settings.llm=Gemini()
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
Settings.embed_model = embed_model
Settings.chunk_size = 512
Settings.chunk_overlap = 20
documents = SimpleDirectoryReader("PDFs").load_data()
splitter=SentenceSplitter()
vector_store = SupabaseVectorStore(
postgres_connection_string=DB_CONNECTION,
dimension=768,
collection_name="sample_demo",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index =
VectorStoreIndex.from_documents(documents,storage_context=storage_context,embed_model=embed_model,transformations``=[splitter])