order_by is not working in distributed deployment (multi-node cluster)
Closed this issue · 6 comments
Current Behavior
in V1.8.0, qdrant introduced order_by in scroll api, tested in one node and official cloud demo cluster, it worked well,
while when deployed a distributed deployment with multiple nodes (e.g. 5 nodes), the scroll api retured points is not restrictly sorted by the order_by fields, when reduce the cluster's node number, the result became more sequential though.
Steps to Reproduce
- deploy a multiple node deployment
- create a test collection
- add index, to one field (e.g. test_int_index)
- add 10 points
- call scroll api with order_by="index", the results is not sorted as expected.
Expected Behavior
the scroll results should be sorted by the sort_by field.
Possible Solution
Context (Environment)
Detailed Description
Possible Implementation
columns: index, timestamp, text
Expected:
0 2024-04-12T08:40:06.189301 Italy: tomatoes, olive oil, pasta.
1 2024-04-12T08:40:09.482208 Japan: seafood-centric with artistic presentation.
2 2024-04-12T08:40:09.744967 Mexico: beans, chili peppers, and meat.
3 2024-04-12T08:40:10.020073 India: diverse vegetarian and meat dishes.
4 2024-04-12T08:40:10.285342 France: Gourmet dining, focuses on cheese, wine
5 2024-04-12T08:40:10.883714 China: diverse cuisines, emphasizes balance, uses rice
6 2024-04-12T08:40:13.166867 Thailand: five flavors, spicy, sweet, salty, sour.
7 2024-04-12T08:40:15.436143 USA: Melting pot, diverse, large portions, fast food.
8 2024-04-12T08:40:15.697163 Brazil: barbecue heavy
9 2024-04-12T08:40:15.963504 Greece: Olive oil, feta, yogurt, seafood
10 2024-04-12T08:40:16.738832 Ethiopia: bread with spicy stews and vegetables
Current output:
0 2024-04-12T08:40:06.189301 Italy: tomatoes, olive oil, pasta.
4 2024-04-12T08:40:10.285342 France: Gourmet dining, focuses on cheese, wine
7 2024-04-12T08:40:15.436143 USA: Melting pot, diverse, large portions, fast food.
8 2024-04-12T08:40:15.697163 Brazil: barbecue heavy
10 2024-04-12T08:40:16.738832 Ethiopia: bread with spicy stews and vegetables
2 2024-04-12T08:40:09.744967 Mexico: beans, chili peppers, and meat.
1 2024-04-12T08:40:09.482208 Japan: seafood-centric with artistic presentation.
5 2024-04-12T08:40:10.883714 China: diverse cuisines, emphasizes balance, uses rice
3 2024-04-12T08:40:10.020073 India: diverse vegetarian and meat dishes.
6 2024-04-12T08:40:13.166867 Thailand: five flavors, spicy, sweet, salty, sour.
9 2024-04-12T08:40:15.963504 Greece: Olive oil, feta, yogurt, seafood
hey @open-chinese, I have tried your steps, but unfortunately I can't reproduce the problem. Could you please share more detailed instructions?
my cluster results
{
"result": {
"status": "enabled",
"peer_id": 6314324602164287,
"peers": {
"6314324602164287": {
"uri": "http://localhost:6335/"
},
"1698852423267208": {
"uri": "http://127.0.0.1:6345/"
},
"5887298532774956": {
"uri": "http://127.0.0.1:6355/"
}
},
"raft_info": {
"term": 1,
"commit": 41,
"pending_operations": 0,
"leader": 6314324602164287,
"role": "Leader",
"is_voter": true
},
"consensus_thread_status": {
"consensus_thread_status": "working",
"last_update": "2024-04-13T10:11:53.199335849Z"
},
"message_send_failures": {}
},
"status": "ok",
"time": 0.000153659
}
collection info:
{
"result": {
"status": "green",
"optimizer_status": "ok",
"vectors_count": 1000,
"indexed_vectors_count": 0,
"points_count": 1000,
"segments_count": 6,
"config": {
"params": {
"vectors": {
"size": 128,
"distance": "Cosine"
},
"shard_number": 3,
"replication_factor": 2,
"write_consistency_factor": 1,
"on_disk_payload": false
},
"hnsw_config": {
"m": 16,
"ef_construct": 100,
"full_scan_threshold": 10000,
"max_indexing_threads": 1,
"on_disk": false
},
"optimizer_config": {
"deleted_threshold": 0.2,
"vacuum_min_vector_number": 1000,
"default_segment_number": 2,
"max_segment_size": null,
"memmap_threshold": null,
"indexing_threshold": 20000,
"flush_interval_sec": 5,
"max_optimization_threads": null
},
"wal_config": {
"wal_capacity_mb": 1,
"wal_segments_ahead": 0
},
"quantization_config": null
},
"payload_schema": {
"c": {
"data_type": "integer",
"points": 1000
}
}
},
"status": "ok",
"time": 0.006432669
}
request
POST collections/benchmark/points/scroll
{
"order_by": "c",
"limit": 10
}
result
{
"result": {
"points": [
{
"id": 377,
"payload": {
"c": 3
},
"vector": null
},
{
"id": 397,
"payload": {
"c": 21
},
"vector": null
},
{
"id": 98,
"payload": {
"c": 21
},
"vector": null
},
{
"id": 780,
"payload": {
"c": 69
},
"vector": null
},
{
"id": 745,
"payload": {
"c": 122
},
"vector": null
},
{
"id": 550,
"payload": {
"c": 152
},
"vector": null
},
{
"id": 239,
"payload": {
"c": 171
},
"vector": null
},
{
"id": 978,
"payload": {
"c": 181
},
"vector": null
},
{
"id": 33,
"payload": {
"c": 214
},
"vector": null
},
{
"id": 220,
"payload": {
"c": 226
},
"vector": null
}
],
"next_page_offset": null
},
"status": "ok",
"time": 0.004771597
}
Thanks for you reply, @generall , I've tested a collection (test-db) with 14 points, it's almost working, but not really. I've tested on 1.8.3, and then rolled back to 1.8.0. also tried on a 4 nodes cluster and a 3 nodes cluster. here's my configurations
1. cluster info
{
"result": {
"status": "enabled",
"peer_id": 6474706752094638,
"peers": {
"217194742794991": {
"uri": "http://qdrant-1.qdrant-headless:6335/"
},
"6474706752094638": {
"uri": "http://qdrant-0.qdrant-headless:6335/"
},
"2099310441933307": {
"uri": "http://qdrant-2.qdrant-headless:6335/"
}
},
"raft_info": {
"term": 1,
"commit": 30,
"pending_operations": 0,
"leader": 6474706752094638,
"role": "Leader",
"is_voter": true
},
"consensus_thread_status": {
"consensus_thread_status": "working",
"last_update": "2024-04-15T03:40:32.806726249Z"
},
"message_send_failures": {}
},
"status": "ok",
"time": 0.000018902
}
2 collection info (ignore that I forgot to reduce the shard num to 3)
{
"result": {
"status": "green",
"optimizer_status": "ok",
"vectors_count": 14,
"indexed_vectors_count": 0,
"points_count": 14,
"segments_count": 32,
"config": {
"params": {
"vectors": {
"size": 1536,
"distance": "Cosine",
"on_disk": true
},
"shard_number": 4,
"replication_factor": 2,
"write_consistency_factor": 1,
"on_disk_payload": true
},
"hnsw_config": {
"m": 16,
"ef_construct": 100,
"full_scan_threshold": 10000,
"max_indexing_threads": 0,
"on_disk": true
},
"optimizer_config": {
"deleted_threshold": 0.2,
"vacuum_min_vector_number": 1000,
"default_segment_number": 0,
"max_segment_size": null,
"memmap_threshold": null,
"indexing_threshold": 20000,
"flush_interval_sec": 5,
"max_optimization_threads": null
},
"wal_config": {
"wal_capacity_mb": 32,
"wal_segments_ahead": 0
},
"quantization_config": {
"scalar": {
"type": "int8",
"always_ram": true
}
}
},
"payload_schema": {
"title": {
"data_type": "keyword",
"points": 14
},
"timestamp": {
"data_type": "datetime",
"points": 14
},
"index": {
"data_type": "integer",
"points": 14
}
}
},
"status": "ok",
"time": 0.001039349
}
3 scroll results
and the scroll result, both tried order by index, and order by timestamp, please notice the index == 8 point
curl -H "Content-Type: application/json" -d '{"limit": 15, "with_payload": true, "order_by": "index"}' -X POST http://localhost:6333/collections/test-db/points/scroll
{
"result": {
"points": [
{
"id": "f26d41eb-4e03-7be9-1309-3a6df1f93c08",
"payload": {
"index": 0,
"text": "Italy: tomatoes, olive oil, pasta.",
"timestamp": "2024-04-12T08:40:06.189301",
"title": "food"
},
"vector": null
},
{
"id": "ec9cdbe9-e63c-f653-f49c-c8fa9ee65700",
"payload": {
"index": 1,
"text": "Japan: seafood-centric with artistic presentation.",
"timestamp": "2024-04-12T08:40:09.482208",
"title": "food"
},
"vector": null
},
{
"id": "3124168e-8b2c-87b5-3c02-515f4051088d",
"payload": {
"index": 2,
"text": "Mexico: beans, chili peppers, and meat.",
"timestamp": "2024-04-12T08:40:09.744967",
"title": "food"
},
"vector": null
},
{
"id": "d2fcfe8e-8166-2992-35a8-b60fafc0df10",
"payload": {
"index": 3,
"text": "India: diverse vegetarian and meat dishes.",
"timestamp": "2024-04-12T08:40:10.020073",
"title": "food"
},
"vector": null
},
{
"id": "a257e96a-1ed2-3ed3-da6a-7b21fedf2e86",
"payload": {
"index": 4,
"text": "France: Gourmet dining, focuses on cheese, wine",
"timestamp": "2024-04-12T08:40:10.285342",
"title": "food"
},
"vector": null
},
{
"id": "fbc02338-353c-2013-a7f3-8b8901f7647f",
"payload": {
"index": 5,
"text": "China: diverse cuisines, emphasizes balance, uses rice",
"timestamp": "2024-04-12T08:40:10.883714",
"title": "food"
},
"vector": null
},
{
"id": "9e63a9ef-fd59-cfb4-a021-fa7c88eacd3e",
"payload": {
"index": 6,
"text": "Thailand: five flavors, spicy, sweet, salty, sour.",
"timestamp": "2024-04-12T08:40:13.166867",
"title": "food"
},
"vector": null
},
{
"id": "1a53129f-a66c-fbab-aa11-db6a55a2172e",
"payload": {
"index": 7,
"text": "USA: Melting pot, diverse, large portions, fast food.",
"timestamp": "2024-04-12T08:40:15.436143",
"title": "food"
},
"vector": null
},
{
"id": "931075dc-e0b8-137d-9a66-618cf9354152",
"payload": {
"index": 9,
"text": "Greece: Olive oil, feta, yogurt, seafood",
"timestamp": "2024-04-12T08:40:15.963504",
"title": "food"
},
"vector": null
},
{
"id": "66d71fb5-eb49-591b-a063-7c1f31bf5f7d",
"payload": {
"index": 10,
"text": "Ethiopia: bread with spicy stews and vegetables",
"timestamp": "2024-04-12T08:40:16.738832",
"title": "food"
},
"vector": null
},
{
"id": "da32249e-5097-bd5a-63c9-95b90a3ab645",
"payload": {
"index": 11,
"text": "Japan: Polite, punctual, public transport reliance",
"timestamp": "2024-04-12T08:40:20.015587",
"title": "travel"
},
"vector": null
},
{
"id": "b4054b96-244f-4eb3-e8f4-1d2eb589f86c",
"payload": {
"index": 8,
"text": "Brazil: barbecue heavy",
"timestamp": "2024-04-12T08:40:15.697163",
"title": "food"
},
"vector": null
},
{
"id": "a7c8c26c-2dd5-b102-0e52-35142027f2aa",
"payload": {
"index": 12,
"text": "Italy: Relaxed, historical tours, slow-paced",
"timestamp": "2024-04-12T08:40:20.290047",
"title": "travel"
},
"vector": null
},
{
"id": "5289cdb5-66a4-171c-b09e-ab80493fafad",
"payload": {
"index": 13,
"text": "USA: Road trips, diverse landscapes.",
"timestamp": "2024-04-12T08:40:20.562687",
"title": "travel"
},
"vector": null
}
],
"next_page_offset": null
},
"status": "ok",
"time": 0.001263863
}
4 scroll results with order by timestamp
it's the same.
curl -H "Content-Type: application/json" -d '{"limit": 15, "with_payload": true, "order_by": "timestamp"}' -X POST http://localhost:6333/collections/test-db/points/scroll
{
"result": {
"points": [
{
"id": "f26d41eb-4e03-7be9-1309-3a6df1f93c08",
"payload": {
"index": 0,
"text": "Italy: tomatoes, olive oil, pasta.",
"timestamp": "2024-04-12T08:40:06.189301",
"title": "food"
},
"vector": null
},
{
"id": "ec9cdbe9-e63c-f653-f49c-c8fa9ee65700",
"payload": {
"index": 1,
"text": "Japan: seafood-centric with artistic presentation.",
"timestamp": "2024-04-12T08:40:09.482208",
"title": "food"
},
"vector": null
},
{
"id": "3124168e-8b2c-87b5-3c02-515f4051088d",
"payload": {
"index": 2,
"text": "Mexico: beans, chili peppers, and meat.",
"timestamp": "2024-04-12T08:40:09.744967",
"title": "food"
},
"vector": null
},
{
"id": "d2fcfe8e-8166-2992-35a8-b60fafc0df10",
"payload": {
"index": 3,
"text": "India: diverse vegetarian and meat dishes.",
"timestamp": "2024-04-12T08:40:10.020073",
"title": "food"
},
"vector": null
},
{
"id": "a257e96a-1ed2-3ed3-da6a-7b21fedf2e86",
"payload": {
"index": 4,
"text": "France: Gourmet dining, focuses on cheese, wine",
"timestamp": "2024-04-12T08:40:10.285342",
"title": "food"
},
"vector": null
},
{
"id": "fbc02338-353c-2013-a7f3-8b8901f7647f",
"payload": {
"index": 5,
"text": "China: diverse cuisines, emphasizes balance, uses rice",
"timestamp": "2024-04-12T08:40:10.883714",
"title": "food"
},
"vector": null
},
{
"id": "9e63a9ef-fd59-cfb4-a021-fa7c88eacd3e",
"payload": {
"index": 6,
"text": "Thailand: five flavors, spicy, sweet, salty, sour.",
"timestamp": "2024-04-12T08:40:13.166867",
"title": "food"
},
"vector": null
},
{
"id": "1a53129f-a66c-fbab-aa11-db6a55a2172e",
"payload": {
"index": 7,
"text": "USA: Melting pot, diverse, large portions, fast food.",
"timestamp": "2024-04-12T08:40:15.436143",
"title": "food"
},
"vector": null
},
{
"id": "931075dc-e0b8-137d-9a66-618cf9354152",
"payload": {
"index": 9,
"text": "Greece: Olive oil, feta, yogurt, seafood",
"timestamp": "2024-04-12T08:40:15.963504",
"title": "food"
},
"vector": null
},
{
"id": "66d71fb5-eb49-591b-a063-7c1f31bf5f7d",
"payload": {
"index": 10,
"text": "Ethiopia: bread with spicy stews and vegetables",
"timestamp": "2024-04-12T08:40:16.738832",
"title": "food"
},
"vector": null
},
{
"id": "da32249e-5097-bd5a-63c9-95b90a3ab645",
"payload": {
"index": 11,
"text": "Japan: Polite, punctual, public transport reliance",
"timestamp": "2024-04-12T08:40:20.015587",
"title": "travel"
},
"vector": null
},
{
"id": "b4054b96-244f-4eb3-e8f4-1d2eb589f86c",
"payload": {
"index": 8,
"text": "Brazil: barbecue heavy",
"timestamp": "2024-04-12T08:40:15.697163",
"title": "food"
},
"vector": null
},
{
"id": "a7c8c26c-2dd5-b102-0e52-35142027f2aa",
"payload": {
"index": 12,
"text": "Italy: Relaxed, historical tours, slow-paced",
"timestamp": "2024-04-12T08:40:20.290047",
"title": "travel"
},
"vector": null
},
{
"id": "5289cdb5-66a4-171c-b09e-ab80493fafad",
"payload": {
"index": 13,
"text": "USA: Road trips, diverse landscapes.",
"timestamp": "2024-04-12T08:40:20.562687",
"title": "travel"
},
"vector": null
}
],
"next_page_offset": null
},
"status": "ok",
"time": 0.002083308
}
I was able to reproduce the faulty behavior using the points&scenario from the description.
qdrant version: 1.8.4
num of nodes: 3 , 5
For 1 node cluster (1 shard per node and 3 shards per node): can not reproduce.
I'll continue investigating...
Steps to reproduce using Cloud UI:
- create a cluster with 3 nodes
- go to dashboard console and execute the requests below:
Requests
`PUT collections/try_me { "vectors": { "size": 3, "distance": "Cosine" }, "params": { "replication_factor": 2 }, "shard_number": 3 }PUT /collections/try_me/index
{
"field_name": "index",
"field_schema": "integer"
}
// Create 1 point
PUT /collections/try_me/points
{
"points": [
{
"id": 0,
"payload": {"index": 0, "timestapm": "2024-04-12T08:40:06.189301", "text": "Italy: tomatoes, olive oil, pasta.", "title": "food"},
"vector": [0.9, 0.1, 0.2]
}
]
}
// Batch create points
PUT /collections/try_me/points
{
"batch": {
"ids": [
11,
12,
13,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10
],
"payloads": [
{
"timestapm": "2024-04-12T08:40:20.015587",
"text": "Japan: Polite, punctual, public transport reliance",
"index": 11,
"title": "travel"
},
{
"timestapm": "2024-04-12T08:40:20.290047",
"text": "Italy: Relaxed, historical tours, slow-paced",
"index": 12,
"title": "travel"
},
{
"timestapm": "2024-04-12T08:40:20.562687",
"text": "USA: Road trips, diverse landscapes.",
"index": 13,
"title": "travel"
},
{
"timestapm": "2024-04-12T08:40:09.482208",
"text": "Japan: seafood-centric with artistic presentation.",
"index": 1,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:09.744967",
"text": "Mexico: beans, chili peppers, and meat.",
"index": 2,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:10.020073",
"text": "India: diverse vegetarian and meat dishes.",
"index": 3,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:10.285342",
"text": "France: Gourmet dining, focuses on cheese, wine",
"index": 4,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:10.883714",
"text": "China: diverse cuisines, emphasizes balance, uses rice",
"index": 5,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:13.166867",
"text": "Thailand: five flavors, spicy, sweet, salty, sour.",
"index": 6,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:15.436143",
"text": "USA: Melting pot, diverse, large portions, fast food.",
"index": 7,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:15.697163",
"text": "Brazil: barbecue heavy",
"index": 8,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:15.963504",
"text": "Greece: Olive oil, feta, yogurt, seafood",
"index": 9,
"title": "food"
},
{
"timestapm": "2024-04-12T08:40:16.738832",
"text": "Ethiopia: bread with spicy stews and vegetables",
"index": 10,
"title": "food"
}
],
"vectors": [
[
0.9,
0.1,
0.3
],
[
0.9,
0.3,
0.1
],
[
0.9,
0.5,
0.1
],
[
0.9,
0.2,
0.1
],
[
0.8,
0.2,
0.1
],
[
0.8,
0.1,
0.1
],
[
0.3,
0.1,
0.1
],
[
0.3,
0.1,
0.2
],
[
0.4,
0.1,
0.1
],
[
0.7,
0.2,
0.1
],
[
0.7,
0.1,
0.1
],
[
0.7,
0.3,
0.1
],
[
0.9,
0.2,
0.1
]
]
}
}
// List points in a collection, using order_by
POST collections/try_me/points/scroll
{
"limit": 10,
"order_by": "index"
}`
I tried to scale out the cluster from 1 node (where ordering works OK) to 2 nodes and the issue reproduced right away. on 2 nodes.
Thank you for bringing this to our attention @open-chinese 🙌 , this has now been fixed with #4054