Skip to content

Commit

Permalink
Improvements to the categorize-text challenge (#594) (#608)
Browse files Browse the repository at this point in the history
* bulk import index data
* target only those indices that have the required message field
* remove code duplication in the challenge definition
* Reduce default number of iterations
* parameterize number of warmup iterations and measurable iterations
* Set default number of warmup iterations to 3, actual iterations 10.

Also pull in...
* Disable lossy _source parameters on serverless (#603)

---------

Co-authored-by: Dave Pifke <[email protected]>
  • Loading branch information
edsavage and dpifke-elastic authored May 26, 2024
1 parent 2101bc7 commit b6a3fdf
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 100 deletions.
2 changes: 2 additions & 0 deletions cohere_vector/index-vectors-only-mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
},
"mappings": {
"dynamic": false,
{%- if build_flavor != "serverless" -%}
"_source": {
"enabled": false
},
{%- endif -%}
"properties": {
"emb": {
"type": "dense_vector",
Expand Down
5 changes: 5 additions & 0 deletions elastic/logs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,11 @@ Note: `include_target_throughput` parameter is ignored in this challenge.
* `ds_autosharding_max_threads` (default: 32) The maximum number of write threads in the auto *scaling* function.
* `dsl_poll_interval` (default: "5m") A time value indicating the interval data stream lifecycle runs at. This is relevant in the context of auto sharding as data stream lifecycle periodically triggers the rollover operations that will recalcualte and implement the (auto)sharding scheme.

### Categorize Text (categorize-text)

Runs the categorize-text aggregation with varying values of shard-size and with/without the use of a sampler
aggregation. The challenge targets a specific set of indices by way of an index alias.

## Changing the Datasets

The generated dataset is influenced by 2 key configurations:
Expand Down
167 changes: 67 additions & 100 deletions elastic/logs/challenges/categorize-text.json
Original file line number Diff line number Diff line change
@@ -1,66 +1,58 @@
{% import "rally.helpers" as rally %}
{% set shard_size_list = [10, 100, 1000] %}
{
"name": "categorize-text",
"description": "Checks the performance of the categorize text aggregation",
"schedule": [
{% include "tasks/index-setup.json" %},
{
"name": "bulk-index",
"operation": {
"name": "categorize_text_shard_size_10",
"operation-type": "search",
"index": "logs-*",
"body": {
"query": {
"exists": {
"field": "message"
}
},
"aggs": {
"categories.message": {
"categorize_text": {
"field": "message",
"shard_size": 10
}
}
}
}
"operation-type": "raw-bulk",
"param-source": "processed-source",
"time-format": "milliseconds",
"profile": "fixed_interval",
"bulk-size": {{ p_bulk_size }},
"detailed-results": true
},
"clients": 8,
"warmup-iterations": 10,
"iterations": 100,
"target-throughput": 100
"clients": {{ p_bulk_indexing_clients }}{% if p_throttle_indexing %},
"ignore-response-error-level": "{{error_level | default('non-fatal')}}",
"schedule": "timestamp-throttler",
"max-delay-secs": 1
{% endif %}
},
{
"operation": {
"name": "categorize_text_shard_size_100",
"operation-type": "search",
"index": "logs-*",
"name": "create-categorize-text-index-alias",
"include-in-reporting": false,
"operation-type": "raw-request",
"method": "POST",
"path": "/_aliases",
"body": {
"query": {
"exists": {
"field": "message"
}
},
"aggs": {
"categories.message": {
"categorize_text": {
"field": "message",
"shard_size": 100
"actions": [
{
"add": {
"indices": [
"logs-redis.log-default",
"logs-apache.error-default",
"logs-k8-application.log-default",
"logs-system.syslog-default",
"logs-redis.slowlog-default",
"logs-kafka.log-default"
],
"alias": "categorize-text-index-alias"
}
}
}
]
}
},
"clients": 8,
"warmup-iterations": 10,
"iterations": 100,
"target-throughput": 100
}
},
{% for shard_size in shard_size_list %}
{
"operation": {
"name": "categorize_text_shard_size_1000",
"name": "categorize_text_shard_size_{{ shard_size }}",
"operation-type": "search",
"index": "logs-*",
"index": "categorize-text-index-alias",
"body": {
"query": {
"exists": {
Expand All @@ -69,52 +61,24 @@
},
"aggs": {
"categories.message": {
"categorize_text": {
"categorize_text": {
"field": "message",
"shard_size": 1000
"shard_size": {{ shard_size }}
}
}
}
}
},
"clients": 8,
"warmup-iterations": 10,
"iterations": 100,
"warmup-iterations": {{ warmup_iterations | default(3) }},
"iterations": {{ iterations | default(10) }},
"target-throughput": 100
},
{
"operation": {
"name": "categorize_text_sampler_shard_size_10",
"operation-type": "search",
"index": "logs-*",
"body": {
"query": {
"exists": {
"field": "message"
}
},
"aggs": {
"sample": {
"sampler": {
"shard_size": 10
},
"aggs": {
"categories.message": {
"categorize_text": {
"field": "message"
}
}
}
}
}
}
}
},
{
"operation": {
"name": "categorize_text_sampler_shard_size_100",
"name": "categorize_text_sampler_shard_size_{{ shard_size }}",
"operation-type": "search",
"index": "logs-*",
"index": "categorize-text-index-alias",
"body": {
"query": {
"exists": {
Expand All @@ -124,7 +88,7 @@
"aggs": {
"sample": {
"sampler": {
"shard_size": 100
"shard_size": {{ shard_size }}
},
"aggs": {
"categories.message": {
Expand All @@ -136,35 +100,38 @@
}
}
}
}
},
"clients": 8,
"warmup-iterations": {{ warmup_iterations | default(3) }},
"iterations": {{ iterations | default(10) }},
"target-throughput": 100
},
{% endfor %}
{
"operation": {
"name": "categorize_text_sampler_shard_size_1000",
"operation-type": "search",
"index": "logs-*",
"name": "remove-categorize-text-index-alias",
"include-in-reporting": false,
"operation-type": "raw-request",
"method": "POST",
"path": "/_aliases",
"body": {
"query": {
"exists": {
"field": "message"
}
},
"aggs": {
"sample": {
"sampler": {
"shard_size": 1000
},
"aggs": {
"categories.message": {
"categorize_text": {
"field": "message"
}
}
"actions": [
{
"remove": {
"indices": [
"logs-redis.log-default",
"logs-apache.error-default",
"logs-k8-application.log-default",
"logs-system.syslog-default",
"logs-redis.slowlog-default",
"logs-kafka.log-default"
],
"alias": "categorize-text-index-alias"
}
}
}
]
}
}
}
]
}
}
2 changes: 2 additions & 0 deletions so_vector/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
{%- endif -%}{# non-serverless-index-settings-marker-end #}
},
"mappings": {
{%- if build_flavor != "serverless" -%}
"_source": {
"excludes": ["titleVector"]
},
{%- endif -%}
"properties": {
"userId": {
"type": "keyword"
Expand Down

0 comments on commit b6a3fdf

Please sign in to comment.