Elasticsearch & OpenSearch Reference
Elasticsearch & OpenSearch Reference
Query DSL, index management, aggregations, mappings, and the production patterns that actually matter — covering both Elasticsearch 8.x and OpenSearch 2.x.
Index operations and cluster health
# Cluster health
GET _cluster/health
GET _cluster/health?wait_for_status=yellow&timeout=30s
# Node info
GET _cat/nodes?v
GET _nodes/stats/jvm,os,process
# Index operations
GET _cat/indices?v&s=index
GET _cat/indices?v&h=index,docs.count,store.size,health
GET _cat/indices?v&index=my-logs-* # wildcard
# Create index with settings
PUT my-index
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "5s", # default 1s — increase for heavy indexing
"index.max_result_window": 10000 # default — never raise this, use search_after
}
}
# Delete index
DELETE my-index
DELETE my-logs-2025-* # wildcard
# Index aliases
POST _aliases
{
"actions": [
{ "add": { "index": "my-index-v2", "alias": "my-index" } },
{ "remove": { "index": "my-index-v1", "alias": "my-index" } }
]
}
# Zero-downtime reindex
POST _reindex
{
"source": { "index": "old-index" },
"dest": { "index": "new-index" }
}
# Shard allocation
GET _cat/shards?v&h=index,shard,prirep,state,node
GET _cluster/allocation/explain # why is a shard unassigned?
Mappings — field types
# Mappings define how fields are indexed
PUT my-index
{
"mappings": {
"properties": {
"id": { "type": "keyword" }, # exact match only, not analyzed
"title": { "type": "text", # full-text searched, analyzed
"analyzer": "english", # stemming, stop words
"fields": {
"raw": { "type": "keyword" } # also available as keyword
}},
"price": { "type": "float" },
"quantity": { "type": "integer" },
"active": { "type": "boolean" },
"created_at": { "type": "date",
"format": "strict_date_optional_time||epoch_millis" },
"tags": { "type": "keyword" }, # array of keywords — same type
"description": { "type": "text", "index": false }, # stored but not searchable
"location": { "type": "geo_point" }, # lat/lon for geo queries
"metadata": {
"type": "object",
"properties": {
"source": { "type": "keyword" },
"score": { "type": "float" }
}
}
}
}
}
# text vs keyword — the key decision
# keyword: filter, sort, aggregations, exact match
# text: full-text search (analyzed — stemmed, tokenized)
# Use both via multi-fields when you need both behaviors
# Dynamic mapping gotcha:
# First document sets the type for a field
# Sending { "price": "10.99" } when price is float → mapping conflict error
# Use strict dynamic mapping to prevent surprises:
PUT my-index/_mapping
{
"dynamic": "strict" # reject unknown fields
// "dynamic": false // ignore unknown fields
// "dynamic": true // (default) auto-map unknown fields
}
# View current mapping
GET my-index/_mapping
GET my-index/_mapping/field/title
Query DSL — search queries
# Basic search
GET my-index/_search
{
"query": {
"match": { "title": "elasticsearch tutorial" } # full-text, analyzed
}
}
# Exact match (keyword field)
GET my-index/_search
{
"query": {
"term": { "status": "published" } # keyword field
}
}
# Multiple terms
{
"query": {
"terms": { "status": ["published", "featured"] }
}
}
# Range query
{
"query": {
"range": {
"created_at": {
"gte": "2026-01-01",
"lte": "now",
"format": "strict_date_optional_time"
}
}
}
}
# Boolean query — the workhorse
{
"query": {
"bool": {
"must": [ # AND — affects score
{ "match": { "title": "elasticsearch" } },
{ "match": { "body": "tutorial" } }
],
"filter": [ # AND — does NOT affect score (faster!)
{ "term": { "status": "published" } },
{ "range": { "created_at": { "gte": "now-30d" } } }
],
"must_not": [ # NOT
{ "term": { "draft": true } }
],
"should": [ # OR — boosts score if matches
{ "term": { "featured": true } }
],
"minimum_should_match": 1 # require at least 1 should to match
}
}
}
# Multi-match — search across multiple fields
{
"query": {
"multi_match": {
"query": "elasticsearch guide",
"fields": ["title^3", "body", "tags"], # ^3 = boost title 3x
"type": "best_fields" // cross_fields, most_fields, phrase
}
}
}
# Prefix and wildcard (avoid on large datasets!)
{ "query": { "prefix": { "username": "ale" } } } # starts with
{ "query": { "wildcard": { "email": "*@gmail.com" } } } # expensive!
# Exists — field is present and not null
{ "query": { "exists": { "field": "email" } } }
# Pagination (prefer search_after over from/size for deep pagination)
{
"size": 20,
"from": 0, # avoid from > 10000
"sort": [{ "created_at": "desc" }],
"search_after": ["2026-03-01T00:00:00"] # cursor-based (use last hit's sort values)
}
Aggregations
# Aggregations run on the full result set (not paginated)
# "size": 0 skips returning hits — faster for aggregation-only queries
GET orders/_search
{
"size": 0,
"query": {
"range": { "created_at": { "gte": "now-30d" } } # filter first!
},
"aggs": {
"by_status": {
"terms": { # bucket by unique values
"field": "status",
"size": 10, # top 10 by count
"order": { "_count": "desc" }
}
},
"revenue": {
"sum": { "field": "amount" } # metric aggregation
},
"avg_order": {
"avg": { "field": "amount" }
},
"orders_over_time": {
"date_histogram": { # time-series buckets
"field": "created_at",
"calendar_interval": "day",
"time_zone": "Europe/London"
},
"aggs": { # nested aggregation
"daily_revenue": { "sum": { "field": "amount" } }
}
},
"price_ranges": {
"range": {
"field": "amount",
"ranges": [
{ "to": 25 },
{ "from": 25, "to": 100 },
{ "from": 100 }
]
}
},
"percentiles": {
"percentiles": {
"field": "amount",
"percents": [50, 75, 95, 99]
}
}
}
}
# Cardinality — approximate distinct count
{
"aggs": {
"unique_users": {
"cardinality": {
"field": "user_id",
"precision_threshold": 1000 # higher = more accurate, more memory
}
}
}
}
Index lifecycle management (ILM)
# ILM — automatically manage index lifecycle for time-series data (logs, metrics)
PUT _ilm/policy/my-logs-policy
{
"policy": {
"phases": {
"hot": {
"min_age": "0ms",
"actions": {
"rollover": {
"max_primary_shard_size": "50gb",
"max_age": "1d"
},
"set_priority": { "priority": 100 }
}
},
"warm": {
"min_age": "3d",
"actions": {
"forcemerge": { "max_num_segments": 1 },
"shrink": { "number_of_shards": 1 },
"allocate": { "number_of_replicas": 1 }
}
},
"cold": {
"min_age": "30d",
"actions": {
"allocate": { "number_of_replicas": 0 }
}
},
"delete": {
"min_age": "90d",
"actions": { "delete": {} }
}
}
}
}
# Index template — applied to new indices matching a pattern
PUT _index_template/my-logs-template
{
"index_patterns": ["my-logs-*"],
"template": {
"settings": {
"index.lifecycle.name": "my-logs-policy",
"index.lifecycle.rollover_alias": "my-logs"
}
},
"priority": 200
}
# Bootstrap first index and alias
PUT my-logs-000001
{
"aliases": {
"my-logs": { "is_write_index": true }
}
}
# Now write to alias "my-logs" — ILM handles rollover automatically
Performance and diagnostics
# Slow log — queries taking > threshold
PUT my-index/_settings
{
"index.search.slowlog.threshold.query.warn": "10s",
"index.search.slowlog.threshold.query.info": "5s",
"index.search.slowlog.threshold.query.debug": "2s",
"index.search.slowlog.level": "info"
}
# Hot threads — what is Elasticsearch actually doing?
GET _nodes/hot_threads
# Index stats — which index is slow?
GET _stats/search,indexing,store
GET my-index/_stats
# Fielddata / query cache
GET _nodes/stats/indices/fielddata?fields=* # fielddata memory by field
GET _nodes/stats/indices/query_cache
# Profile API — diagnose slow queries
GET my-index/_search
{
"profile": true,
"query": { "match": { "title": "elasticsearch" } }
}
# Returns timing breakdown per shard/query/collector
# Common performance patterns
# 1. Always filter before query in bool (filter context = no scoring = cached)
# 2. Use keyword for sort/agg fields (text field sort requires fielddata = memory hog)
# 3. search_after for deep pagination (from/size is O(n) memory per shard)
# 4. _source filtering — only fetch fields you need
GET my-index/_search
{
"_source": ["title", "created_at"], # don't fetch large body field
"query": { "match": { "title": "search" } }
}
# 5. Doc values disabled? Never disable for keyword/numeric — needed for sort/agg
# 6. Refresh interval: "30s" for heavy indexing (default 1s causes many small segments)
Useful one-liners
# Check if document exists
HEAD my-index/_doc/doc-id # 200 = exists, 404 = not found
# Get a document
GET my-index/_doc/doc-id
# Bulk indexing (newline-delimited — MUST end with \n)
POST _bulk
{ "index": { "_index": "my-index", "_id": "1" } }
{ "title": "Document one", "created_at": "2026-03-14" }
{ "index": { "_index": "my-index", "_id": "2" } }
{ "title": "Document two", "created_at": "2026-03-14" }
# Update by query
POST my-index/_update_by_query
{
"query": { "term": { "status": "draft" } },
"script": {
"source": "ctx._source.status = 'archived'",
"lang": "painless"
}
}
# Delete by query
POST my-index/_delete_by_query
{
"query": { "range": { "created_at": { "lt": "now-365d" } } }
}
# Reindex with script transform
POST _reindex
{
"source": { "index": "old-index", "size": 1000 },
"dest": { "index": "new-index" },
"script": {
"source": "ctx._source.full_name = ctx._source.first + ' ' + ctx._source.last",
"lang": "painless"
}
}
# Cat APIs for quick ops
GET _cat/health?v
GET _cat/indices?v&s=docs.count:desc # sorted by doc count
GET _cat/aliases?v
GET _cat/tasks?v # running tasks
GET _cat/thread_pool?v&h=name,active,queue,rejected
🔍 Free tool: PyPI Package Health Checker — check elasticsearch-py, opensearch-py, and related Python packages for known CVEs and active maintenance.
Founded
2023 in London, UK
Contact
hello@releaserun.com