-
Notifications
You must be signed in to change notification settings - Fork 1
/
milvus.yaml
513 lines (454 loc) · 21.2 KB
/
milvus.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# Licensed to the LF AI & Data foundation under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Related configuration of etcd, used to store Milvus metadata & service discovery.
etcd:
endpoints:
- localhost:2379
rootPath: by-dev # The root path where data is stored in etcd
metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath
kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath
log:
# path is one of:
# - "default" as os.Stderr,
# - "stderr" as os.Stderr,
# - "stdout" as os.Stdout,
# - file path to append server logs to.
# please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log
path: stdout
level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
use:
# please adjust in embedded Milvus: true
embed: false # Whether to enable embedded Etcd (an in-process EtcdServer).
data:
# Embedded Etcd only.
# please adjust in embedded Milvus: /tmp/milvus/etcdData/
dir: default.etcd
ssl:
enabled: false # Whether to support ETCD secure connection mode
tlsCert: /path/to/etcd-client.pem # path to your cert file
tlsKey: /path/to/etcd-client-key.pem # path to your key file
tlsCACert: /path/to/ca.pem # path to your CACert file
# TLS min version
# Optional values: 1.0, 1.1, 1.2, 1.3。
# We recommend using version 1.2 and above
tlsMinVersion: 1.3
# Default value: etcd
# Valid values: [etcd, mysql]
metastore:
type: etcd
# Related configuration of mysql, used to store Milvus metadata.
mysql:
username: root
password: 123456
address: localhost
port: 3306
dbName: milvus_meta
driverName: mysql
maxOpenConns: 20
maxIdleConns: 5
# please adjust in embedded Milvus: /tmp/milvus/data/
localStorage:
path: /var/lib/milvus/data/
# Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus.
# We refer to the storage service as MinIO/S3 in the following description for simplicity.
minio:
address: localhost # Address of MinIO/S3
port: 9000 # Port of MinIO/S3
accessKeyID: idios
secretAccessKey: AtadGhefDow2
useSSL: false # Access to MinIO/S3 with SSL
bucketName: "a-bucket" # Bucket name in MinIO/S3
rootPath: files # The root path where the message is stored in MinIO/S3
# Whether to use IAM role to access S3/GCS instead of access/secret keys
# For more infomation, refer to
# aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html
# gcp: https://cloud.google.com/storage/docs/access-control/iam
useIAM: false
# Cloud Provider of S3. Supports: "aws", "gcp".
# You can use "aws" for other cloud provider supports S3 API with signature v4, e.g.: minio
# You can use "gcp" for other cloud provider supports S3 API with signature v2
# When `useIAM` enabled, only "aws" & "gcp" is supported for now
cloudProvider: "aws"
# Custom endpoint for fetch IAM role credentials. when useIAM is true & cloudProvider is "aws".
# Leave it empty if you want to use AWS default endpoint
iamEndpoint: ""
# Milvus supports three MQ: rocksmq(based on RockDB), Pulsar and Kafka, which should be reserved in config what you use.
# There is a note about enabling priority if we config multiple mq in this file
# 1. standalone(local) mode: rockskmq(default) > Pulsar > Kafka
# 2. cluster mode: Pulsar(default) > Kafka (rocksmq is unsupported)
# Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
pulsar:
address: localhost # Address of pulsar
port: 6650 # Port of pulsar
webport: 80 # Web port of pulsar, if you connect direcly without proxy, should use 8080
maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar.
tenant: public
namespace: default
# If you want to enable kafka, needs to comment the pulsar configs
kafka:
producer:
client.id: dc
consumer:
client.id: dc1
# brokerList: localhost1:9092,localhost2:9092,localhost3:9092
# saslUsername: username
# saslPassword: password
# saslMechanisms: PLAIN
# securityProtocol: SASL_SSL
rocksmq:
# please adjust in embedded Milvus: /tmp/milvus/rdb_data
path: /var/lib/milvus/rdb_data # The path where the message is stored in rocksmq
rocksmqPageSize: 2147483648 # 2 GB, 2 * 1024 * 1024 * 1024 bytes, The size of each page of messages in rocksmq
retentionTimeInMinutes: 7200 # 5 days, 5 * 24 * 60 minutes, The retention time of the message in rocksmq.
retentionSizeInMB: 8192 # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq.
compactionInterval: 86400 # 1 day, trigger rocksdb compaction every day to remove deleted data
lrucacheratio: 0.06 # rocksdb cache memory ratio
# Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
rootCoord:
address: localhost
port: 53100
enableActiveStandby: false # Enable active-standby
dmlChannelNum: 256 # The number of dml channels created at system startup
maxPartitionNum: 4096 # Maximum number of partitions in a collection
minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed
# (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes).
# Note: If default value is to be changed, change also the default in: internal/util/paramtable/component_param.go
importTaskExpiration: 900
# (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400
# seconds (24 hours).
# Note: If default value is to be changed, change also the default in: internal/util/paramtable/component_param.go
importTaskRetention: 86400
# Related configuration of proxy, used to validate client requests and reduce the returned results.
proxy:
port: 19530
internalPort: 19529
http:
enabled: true # Whether to enable the http server
debug_mode: false # Whether to enable http server debug mode
timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick
msgStream:
timeTick:
bufSize: 512
maxNameLength: 255 # Maximum length of name for a collection or alias
maxFieldNum: 256 # Maximum number of fields in a collection
maxDimension: 32768 # Maximum dimension of a vector
maxShardNum: 256 # Maximum number of shards in a collection
maxTaskNum: 1024 # max task number of proxy task queue
# please adjust in embedded Milvus: false
ginLogging: true # Whether to produce gin logs.
grpc:
serverMaxRecvSize: 67108864 # 64M
serverMaxSendSize: 67108864 # 64M
clientMaxRecvSize: 104857600 # 100 MB, 100 * 1024 * 1024
clientMaxSendSize: 104857600 # 100 MB, 100 * 1024 * 1024
# Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments.
queryCoord:
address: localhost
port: 19531
autoHandoff: true # Enable auto handoff
autoBalance: true # Enable auto balance
overloadedMemoryThresholdPercentage: 90 # The threshold percentage that memory overload
balanceIntervalSeconds: 60
memoryUsageMaxDifferencePercentage: 30
checkInterval: 1000
channelTaskTimeout: 60000 # 1 minute
segmentTaskTimeout: 120000 # 2 minute
distPullInterval: 500
loadTimeoutSeconds: 600
checkHandoffInterval: 5000
taskMergeCap: 16
taskExecutionCap: 256
enableActiveStandby: false # Enable active-standby
refreshTargetsIntervalSeconds: 300
# Related configuration of queryNode, used to run hybrid search between vector and scalar data.
queryNode:
cacheSize: 32 # GB, default 32 GB, `cacheSize` is the memory used for caching data for faster query. The `cacheSize` must be less than system memory size.
port: 21123
loadMemoryUsageFactor: 3 # The multiply factor of calculating the memory usage while loading segments
enableDisk: true # enable querynode load disk index, and search on disk index
maxDiskUsagePercentage: 95
stats:
publishInterval: 1000 # Interval for querynode to report node information (milliseconds)
dataSync:
flowGraph:
maxQueueLength: 1024 # Maximum length of task queue in flowgraph
maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
# Segcore will divide a segment into multiple chunks to enbale small index
segcore:
chunkRows: 1024 # The number of vectors in a chunk.
# Note: we have disabled segment small index since @2022.05.12. So below related configurations won't work.
# We won't create small index for growing segments and search on these segments will directly use bruteforce scan.
smallIndex:
nlist: 128 # small index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8
nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
cache:
enabled: true
memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024
scheduler:
receiveChanSize: 10240
unsolvedQueueSize: 10240
# maxReadConcurrentRatio is the concurrency ratio of read task (search task and query task).
# Max read concurrency would be the value of `runtime.NumCPU * maxReadConcurrentRatio`.
# It defaults to 2.0, which means max read concurrency would be the value of runtime.NumCPU * 2.
# Max read concurrency must greater than or equal to 1, and less than or equal to runtime.NumCPU * 100.
maxReadConcurrentRatio: 2.0 # (0, 100]
cpuRatio: 10.0 # ratio used to estimate read task cpu usage.
grouping:
enabled: true
maxNQ: 1000
topKMergeRatio: 10.0
indexCoord:
address: localhost
port: 31000
enableActiveStandby: false # Enable active-standby
minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed
bindIndexNodeMode:
enable: false
address: "localhost:22930"
withCred: false
nodeID: 0
gc:
interval: 600 # gc interval in seconds
indexNode:
port: 21121
enableDisk: true # enable index node build disk vector index
maxDiskUsagePercentage: 95
scheduler:
buildParallel: 1
dataCoord:
address: localhost
port: 13333
enableCompaction: true # Enable data segment compaction
enableGarbageCollection: true
enableActiveStandby: false # Enable active-standby
channel:
maxWatchDuration: 60 # Timeout on watching channels (in seconds). Default 60 seconds.
segment:
maxSize: 512 # Maximum size of a segment in MB
diskSegmentMaxSize: 2048 # Maximun size of a segment in MB for collection which has Disk index
# Minimum proportion for a segment which can be sealed.
# Sealing early can prevent producing large growing segments in case these segments might slow down our search/query.
# Segments that sealed early will be compacted into a larger segment (within maxSize) eventually.
sealProportion: 0.23
assignmentExpiration: 2000 # The time of the assignment expiration in ms
maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60
# If a segment didn't accept dml records in `maxIdleTime` and the size of segment is greater than
# `minSizeFromIdleToSealed`, Milvus will automatically seal it.
maxIdleTime: 600 # The max idle time of segment in seconds, 10*60.
minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed.
smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than
# (smallProportion * segment max # of rows).
compactableProportion: 0.85 # A compaction will happen on small segments if the segment after compaction will have
# over (compactableProportion * segment max # of rows) rows.
compaction:
enableAutoCompaction: true
gc:
interval: 3600 # gc interval in seconds
missingTolerance: 86400 # file meta missing tolerance duration in seconds, 60*24
dropTolerance: 86400 # file belongs to dropped entity tolerance duration in seconds, 60*24
dataNode:
port: 21124
dataSync:
flowGraph:
maxQueueLength: 1024 # Maximum length of task queue in flowgraph
maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
segment:
# Max buffer size to flush for a single segment.
insertBufSize: 16777216 # Bytes, 16 MB
# Max buffer size to flush del for a single channel
deleteBufBytes: 67108864 # Bytes, 64MB
# The period to sync segments if buffer is not empty.
syncPeriod: 600 # Seconds, 10min
# Configures the system log output.
log:
level: warn # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
file:
# please adjust in embedded Milvus: /tmp/milvus/logs
rootPath: "" # default to stdout, stderr
maxSize: 300 # MB
maxAge: 10 # Maximum time for log retention in day.
maxBackups: 20
format: text # text/json
grpc:
log:
level: WARNING
serverMaxRecvSize: 536870912 # 512MB
serverMaxSendSize: 536870912 # 512MB
clientMaxRecvSize: 104857600 # 100 MB, 100 * 1024 * 1024
clientMaxSendSize: 104857600 # 100 MB, 100 * 1024 * 1024
client:
dialTimeout: 5000
keepAliveTime: 10000
keepAliveTimeout: 20000
maxMaxAttempts: 5
initialBackOff: 1.0
maxBackoff: 60.0
backoffMultiplier: 2.0
# Configure the proxy tls enable.
tls:
serverPemPath: configs/cert/server.pem
serverKeyPath: configs/cert/server.key
caPemPath: configs/cert/ca.pem
common:
# Channel name generation rule: ${namePrefix}-${ChannelIdx}
chanNamePrefix:
cluster: "by-dev"
rootCoordTimeTick: "rootcoord-timetick"
rootCoordStatistics: "rootcoord-statistics"
rootCoordDml: "rootcoord-dml"
rootCoordDelta: "rootcoord-delta"
search: "search"
searchResult: "searchResult"
queryTimeTick: "queryTimeTick"
queryNodeStats: "query-node-stats"
# Cmd for loadIndex, flush, etc...
cmd: "cmd"
dataCoordStatistic: "datacoord-statistics-channel"
dataCoordTimeTick: "datacoord-timetick-channel"
dataCoordSegmentInfo: "segment-info-channel"
# Sub name generation rule: ${subNamePrefix}-${NodeID}
subNamePrefix:
rootCoordSubNamePrefix: "rootCoord"
proxySubNamePrefix: "proxy"
queryNodeSubNamePrefix: "queryNode"
dataNodeSubNamePrefix: "dataNode"
dataCoordSubNamePrefix: "dataCoord"
defaultPartitionName: "_default" # default partition name for a collection
defaultIndexName: "_default_idx" # default index name
retentionDuration: 86400 # time travel reserved time, insert/delete will not be cleaned in this period. 1 days in seconds
entityExpiration: -1 # Entity expiration in seconds, CAUTION make sure entityExpiration >= retentionDuration and -1 means never expire
gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency.
# Default value: auto
# Valid values: [auto, avx512, avx2, avx, sse4_2]
# This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building.
simdType: auto
indexSliceSize: 16 # MB
DiskIndex:
MaxDegree: 56
SearchListSize: 100
PQCodeBudgetGBRatio: 0.125
BuildNumThreadsRatio: 1.0
SearchCacheBudgetGBRatio: 0.10
LoadNumThreadRatio: 8.0
BeamWidthRatio: 4.0
# This parameter specify how many times the number of threads is the number of cores
threadCoreCoefficient : 10
# please adjust in embedded Milvus: local
storageType: minio
security:
authorizationEnabled: true
# tls mode values [0, 1, 2]
# 0 is close, 1 is one-way authentication, 2 is two-way authentication.
tlsMode: 0
session:
ttl: 60 # ttl value when session granting a lease to register service
retryTimes: 30 # retry times when session sending etcd requests
# QuotaConfig, configurations of Milvus quota and limits.
# By default, we enable:
# 1. TT protection;
# 2. Memory protection.
# 3. Disk quota protection.
# You can enable:
# 1. DML throughput limitation;
# 2. DDL, DQL qps/rps limitation;
# 3. DQL Queue length/latency protection;
# 4. DQL result rate protection;
# If necessary, you can also manually force to deny RW requests.
quotaAndLimits:
enabled: true # `true` to enable quota and limits, `false` to disable.
# quotaCenterCollectInterval is the time interval that quotaCenter
# collects metrics from Proxies, Query cluster and Data cluster.
quotaCenterCollectInterval: 3 # seconds, (0 ~ 65536)
ddl: # ddl limit rates, default no limit.
enabled: false
collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
indexRate:
enabled: false
max: -1 # qps, default no limit, rate for CreateIndex, DropIndex
flushRate:
enabled: false
max: -1 # qps, default no limit, rate for flush
compactionRate:
enabled: false
max: -1 # qps, default no limit, rate for manualCompaction
# dml limit rates, default no limit.
# The maximum rate will not be greater than `max`.
dml:
enabled: false
insertRate:
max: -1 # MB/s, default no limit
deleteRate:
max: -1 # MB/s, default no limit
bulkLoadRate: # not support yet. TODO: limit bulkLoad rate
max: -1 # MB/s, default no limit
# dql limit rates, default no limit.
# The maximum rate will not be greater than `max`.
dql:
enabled: false
searchRate:
max: -1 # vps (vectors per second), default no limit
queryRate:
max: -1 # qps, default no limit
# limitWriting decides whether dml requests are allowed.
limitWriting:
# forceDeny `false` means dml requests are allowed (except for some
# specific conditions, such as memory of nodes to water marker), `true` means always reject all dml requests.
forceDeny: false
ttProtection:
enabled: false
# maxTimeTickDelay indicates the backpressure for DML Operations.
# DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay,
# if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected.
maxTimeTickDelay: 300 # in seconds
memProtection:
enabled: true
# When memory usage > memoryHighWaterLevel, all dml requests would be rejected;
# When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate;
# When memory usage < memoryLowWaterLevel, no action.
# memoryLowWaterLevel should be less than memoryHighWaterLevel.
dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes
dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes
queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes
queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes
diskProtection:
# When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
enabled: true
diskQuota: -1 # MB, (0, +inf), default no limit
# limitReading decides whether dql requests are allowed.
limitReading:
# forceDeny `false` means dql requests are allowed (except for some
# specific conditions, such as collection has been dropped), `true` means always reject all dql requests.
forceDeny: false
queueProtection:
enabled: false
# nqInQueueThreshold indicated that the system was under backpressure for Search/Query path.
# If NQ in any QueryNode's queue is greater than nqInQueueThreshold, search&query rates would gradually cool off
# until the NQ in queue no longer exceeds nqInQueueThreshold. We think of the NQ of query request as 1.
nqInQueueThreshold: -1 # int, default no limit
# queueLatencyThreshold indicated that the system was under backpressure for Search/Query path.
# If dql latency of queuing is greater than queueLatencyThreshold, search&query rates would gradually cool off
# until the latency of queuing no longer exceeds queueLatencyThreshold.
# The latency here refers to the averaged latency over a period of time.
queueLatencyThreshold: -1 # milliseconds, default no limit
resultProtection:
enabled: false
# maxReadResultRate indicated that the system was under backpressure for Search/Query path.
# If dql result rate is greater than maxReadResultRate, search&query rates would gradually cool off
# until the read result rate no longer exceeds maxReadResultRate.
maxReadResultRate: -1 # MB/s, default no limit
# coolOffSpeed is the speed of search&query rates cool off.
coolOffSpeed: 0.9 # (0, 1]