Script to re-index elasticsearch.
We started off with the default 5 shards of Elasticsearch and then suddenly realized, it was way beyond what we warranted for. After a conversation with the ES support team (which by the way is awesome! I definitely recommend them), we decided to downsize our shard level to 1.
I've used elasticdump to carry out the task. It is fairly quick and versatile tool.
The script assumes you have modified elasticsearch.yml to have the desired shard/index size, and have restarted the cluster for the property to take effect. The script is just a general reindexing script, it backups up the data, deletes the indices backed up and then restores from the backup.
Check the script for more info.
I've used elasticdump to carry out the task. It is fairly quick and versatile tool.
The script assumes you have modified elasticsearch.yml to have the desired shard/index size, and have restarted the cluster for the property to take effect. The script is just a general reindexing script, it backups up the data, deletes the indices backed up and then restores from the backup.
Check the script for more info.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Help menu for the script. | |
usage () { | |
echo "Usage: `basename $0` [-h] [-b] [-d] [-r] [-i] [-s] [http://es-ESname:9200]" | |
echo "" | |
echo "where: " | |
echo " -h Show this help text " | |
echo " -b Backup the elasticsearch indices to .json files " | |
echo " -d Delete the indices backed up" | |
echo " -r Restore the indices back on the same ES using the .json files created." | |
echo " -i Reindex, short for -bdr" | |
echo " -s Elasticsearch server name with port" | |
echo "" | |
info | |
} | |
# What should be done before the script is executed. | |
info () { | |
echo "*-*-*-*-*-*-*-*-*-*-*-*-*-* W A R N I N G *-*-*-*-*-*-*-*-*-*-*-*-*-*" | |
echo "" | |
echo -e "\\tPLEASE ENSURE NOTHING IS WRITING TO THIS Elasticsearch Cluster" | |
echo "" | |
echo -e "\\tThis script will dump all the indices from the elasticsearch ES into individual json files" | |
echo -e "\\tChange 'index.number_of_shards: 1' under elasticsearch.yml on the es-ES and restart before running this script" | |
echo "" | |
echo -e "\\tYou should ALSO have a SNAPSHOT of ES as backup, if not done. ctrl+c and do that first" | |
echo "" | |
echo "*-*-*-*-*-*-*-*-*-*-*-*-*-* W A R N I N G *-*-*-*-*-*-*-*-*-*-*-*-*-*" | |
echo "" | |
} | |
BACKUP=0 | |
DELETE=0 | |
RESTORE=0 | |
ES="" | |
while getopts ":hbdris:" option; do | |
case $option in | |
b) | |
BACKUP=1 ;; | |
d) | |
DELETE=1 ;; | |
r) | |
RESTORE=1 ;; | |
i) | |
BACKUP=1 | |
DELETE=1 | |
RESTORE=1 ;; | |
s) | |
ES=$OPTARG ;; | |
:) | |
usage | |
exit 0 | |
;; | |
h) # provide help | |
usage | |
exit 0 | |
;; | |
\?) #bla bla | |
usage | |
exit 1 | |
;; | |
esac | |
done | |
# Check if the ES name is provided | |
if [ "$ES" == "" ]; then | |
usage | |
exit 0 | |
fi | |
# print usage and info about the program | |
usage | |
echo "" | |
echo -ne "You have decided to " | |
if [ $BACKUP == 1 ]; then | |
echo -n " BACKUP " | |
fi | |
if [ $DELETE == 1 ]; then | |
echo -n " DELETE " | |
fi | |
if [ $RESTORE == 1 ]; then | |
echo -n " RESTORE " | |
fi | |
echo -e " $ES\\r\\r" | |
echo "Document & Shard count on the ES " | |
# Get ES document count | |
touch info | |
echo $ES > info | |
curl -s -XGET $ES/_count?pretty=true >> info | |
cat info | |
read -p "You can break the script now, or press any key to continue..." | |
# Backing up the elasticsearch indices to json files. | |
backup() { | |
echo "*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* B A C K U P *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" | |
echo "Capturing indices..." | |
curl -s -XGET $ES/_cat/indices?h=i > indices | |
echo "Using elasticdump to backup indices" | |
read -p "You can modify the indices to reduce the subset of data being backed up and deleted, the script will not proceed till you hit any key" | |
for INDEX in $(cat indices) | |
do | |
echo "Backing up " $INDEX | |
elasticdump --input=$ES/$INDEX --output=$INDEX.json | |
echo "-----------------" | |
done | |
echo "BACKUP COMPLETE!!" | |
} | |
# Delete all the indices backed up. | |
delete() { | |
echo "*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* D E L E T E *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" | |
echo "Deleting all indices from the ES" | |
read -p "ARE YOU SURE ??? ctrl+c to escape. This will delete all data on ES " | |
for INDEX in $(cat indices) | |
do | |
echo "DELETING $INDEX" | |
curl -XDELETE $ES/$INDEX | |
done | |
echo "DATA NUKED!! BHWAHAHAHA!! " | |
} | |
# Recreate all the indices backed up as part of the backup process | |
restore() { | |
echo "*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* R E S T O R E *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" | |
echo "REINDEXING..." | |
FILES=*.json | |
for f in $FILES | |
do | |
echo "Processing $f ..." | |
elasticdump --bulk=true --input=$f --output=$ES | |
done | |
} | |
if [ $BACKUP == 1 ]; then | |
backup | |
fi | |
if [ $DELETE == 1 ]; then | |
delete | |
fi | |
if [ $RESTORE == 1 ]; then | |
restore | |
fi | |
echo "---- POST REINDEX ---- " >> info | |
curl -s -XGET $ES/_count?pretty=true >> info | |
cat info | |
echo "All Done!" |
Comments
Post a Comment