Extend ec2 boot disk

07/07/2015
Posted in AWS Blog
07/07/2015 Wannes Van Causbroeck

As most aws users know by now, amazon doesn’t make it particularly easy to extend boot volumes. The centos images don’t use LVM and the EBS stores are not extendable.
The aws recommended way of extending a disk is:

  • stop the instance
  • unmap the volume
  • create a snapshot of the volume
  • create a bigger volume of the snapshot
  • mount the new volume on another stopped instance (in the same AZ! Otherwise we can’t map the volume)
  • start that instance
  • delete and recreate the partition table to fit to the size of the disk + resize the filesystem
  • (the two above steps are needed when using parted. Fdisk can handle modifying the root volume in situ, but i couldn’t get it to play nice through scripting

  • stop the instance
  • move the volume to the original instance
  • start that instance

pfew! To avoid doing this manually and getting bored to death I wrote a small script to handle this for me. It’s not the cleanest and most flexible script in the world (end could be improved no end), but it gets the job done.


#!/bin/bash
#
# ATTENTION: this only works when root login is allowed on your intermediate host. Preferably use centos 6
#
# This script uses an intermediate host to resize the disk + you need an ssh tunnel to that machine through a nat host. 
# This way we can also reach machines in a private subnet
# ex: ssh -i  -L 2222::22 
#
# Fill in the following variables
# and DON'T forget to make sure grub boots the correct disk by changing root=/dev/xvda1 in /boot/grub/menu.lst on the intermediate host. Booting the wrong disk is caused
# by the fact that by default it mentions the UUID of the volume, and aws uses the same image for every instance of a specific ami.
#
# The script is called with two parameters:
# - the instance_id you want to resize
# - the new disk size in GB

# temporary host to use
temp_instance=
# temporary device name as called in the web gui
temp_device_aws=/dev/sdf # doesn't really matter, but this one works. I've had issues with higher letters
# temporary device name as known by the OS
temp_device_host=/dev/xvdf # has to be in accordance with the $temp_device
# identity file to log into temphost
pemfile=
#username to log into temp instance
temp_username=root # sorry, won't work otherwise. You could adapt all commands to use sudo, but some commands don't work with it
# aws profile to use
profile=
# local port for ssh tunnel
tunnel_port=2222 # use the port you opened up for the ssh tunnel

source_instance=$1
new_size=$2
[[ $1 == '' ]] || [[ $2 == '' ]] &# for ease of use, let's put all our actions into functions

function get_instance_name {
  instance_id=$1
  aws --profile $profile ec2 describe-instances --query 'Reservations[*].Instances[*].Tags[?Key==`Name`].Value' --output text --instance-ids $instance_id
}

function get_ip {
  instance_id=$1
  aws --profile $profile ec2 describe-instances --query 'Reservations[0].Instances[0].PrivateIpAddress' --output text --instance-ids $instance_id
}

function get_bootvol_id {
  instance_id=$1
  aws --profile $profile ec2 describe-instances --query 'Reservations[0].Instances[0].BlockDeviceMappings[0].Ebs.VolumeId' --output text --instance-ids $instance_id
}

function get_az {
  instance_id=$1
  aws --profile $profile ec2 describe-instances --query 'Reservations[0].Instances[0].Placement.AvailabilityZone' --output text --instance-ids $instance_id
}

function check_stopped {
  instance_id=$1
  aws --profile $profile ec2 wait instance-stopped --instance-ids $instance_id
}
# the difference between this and the previous function is subtle, but needed.
function is_stopped {
  instance_id=$1
  aws --profile $profile ec2 describe-instances --query 'Reservations[*].Instances[*].State.Name' --instance-ids $instance_id --output text | grep -q 'stopped'
}

function wait_snapshot {
  snapshot_id=$1
  aws --profile $profile ec2 wait snapshot-completed --snapshot-id $snapshot_id
}

function wait_volume {
  volume_id=$1
  aws --profile $profile ec2 wait volume-available --volume-id $volume_id
}

function detach_volume {
  volume_id=$1
  aws --profile $profile ec2 detach-volume --volume-id $volume_id > /dev/null
}

function create_snapshot {
  volume_id=$1
  aws --profile $profile ec2 create-snapshot --volume-id $volume_id --description "${volume_id}-snap"  | sed 's/"//g' | grep -oe 'SnapshotId: [a-z0-9-]*' | cut -d' ' -f 2
}

function create_volume_from_snap {
  snapshot_id=$1
  az=$2
  size=$3
  aws --profile $profile ec2 create-volume --availability-zone $az --snapshot-id $snapshot_id --size $size --volume-type gp2 | sed 's/"//g' | grep -oe 'VolumeId: [a-z0-9-]*' | cut -d' ' -f 2
}

function attach_volume {
  volume_id=$1
  instance_id=$2
  device=$3
  aws --profile $profile ec2 attach-volume --volume-id $volume_id --instance-id $instance_id --device $device > /dev/null
}

function do_resize {
  temp_instance_ip=$1
  temp_file=$( echo "/tmp/$(date +%s).sh" )

  # NAT or direct version
  #ssh_cmd="ssh -o StrictHostKeyChecking=no -i $pemfile $temp_username@$temp_instance_ip "
  ssh_cmd="ssh -o StrictHostKeyChecking=no -i $pemfile $temp_username@localhost -p $tunnel_port "
  $ssh_cmd yum -y install parted 
  partition_start=$($ssh_cmd parted $temp_device_host print | grep -e '^ 1' | tr -s ' '  | cut -d' ' -f3  )
  $ssh_cmd parted -s $temp_device_host rm 1
  $ssh_cmd parted -s $temp_device_host mkpart primary $partition_start 100%
  $ssh_cmd parted -s $temp_device_host set 1 boot on
  # for some reason, this command sometimes doesn't work and you still need to log into the instance when the script has finished and manually resize the filesystem
  $ssh_cmd " e2fsck -p -y -f $( echo ${temp_device_host}1 ) && resize2fs $( echo ${temp_device_host}1 ) "
}

function stop_instance {
  instance_id=$1
  aws --profile $profile ec2 stop-instances --instance-ids $instance_id > /dev/null
}
function start_instance {
  instance_id=$1
  aws --profile $profile ec2 start-instances --instance-ids $instance_id > /dev/null
}

function wait_for_ssh {
  host=$1
  while ! $( nc -w 1 localhost $tunnel_port | grep -q OpenSSH ); do   
  # NAT instance
  #while ! nc -z $host 22 ; do   
    echo - n "."
    sleep 1
  done
}

source_instance_name=$(get_instance_name $source_instance)
echo "Starting resize"
echo "Resizing disk of host $source_instance_name"

echo "Getting info & testing connections"
temp_instance_name=$(get_instance_name $temp_instance) 
source_az=$( get_az $source_instance )
temp_source_az=$( get_az $temp_instance)
source_ip=$( get_ip $source_instance )
temp_ip=$( get_ip $temp_instance )
source_vol_id=$( get_bootvol_id $source_instance )

aws ec2 describe-instances --profile $profile --instance-id $source_instance &> /dev/null || { echo "Source instance not found"; exit 1; }
aws ec2 describe-instances --profile $profile --instance-id $temp_instance &> /dev/null || { echo "Temporary instance not found"; exit 1; }
nc -z localhost $tunnel_port || { echo "Tunnel on port $tunnel_port is closed"; exit 1; }
[[ $source_az == $temp_source_az ]] || { echo "$temp_instance_name is not in the same AZ as $source_instance_name"; exit 1; }
is_stopped $temp_instance || ssh -o StrictHostKeyChecking=no -i $pemfile $temp_username@localhost -p $tunnel_port grep -q 'root=/dev/xvda1' /boot/grub/menu.lst || { echo "Grub is incorrectly configured on $temp_instance_name"; exit 1; }

echo "Stopping instances"
is_stopped $source_instance || stop_instance $source_instance 
is_stopped $temp_instance || stop_instance $temp_instance 
check_stopped $source_instance
check_stopped $temp_instance

echo "Removing disk from $source_instance_name"
detach_volume $source_vol_id

echo "Creating snapshot"
snap_id=$( create_snapshot $source_vol_id )
wait_snapshot $snap_id

echo "Creating volume from snapshot )"
new_vol_id=$( create_volume_from_snap $snap_id $source_az $new_size )
wait_volume $new_vol_id

echo "Attaching volume to intermediate host $temp_instance_name"
attach_volume $new_vol_id $temp_instance $temp_device_aws

echo "Booting intermediate host $temp_instance_name"
start_instance $temp_instance
wait_for_ssh $temp_ip
echo "Resizing disk"
do_resize $temp_ip

echo "Attaching resized volume to $source_instance_name"
stop_instance $temp_instance
check_stopped $temp_instance
detach_volume $new_vol_id
sleep 5
attach_volume $new_vol_id $source_instance /dev/sda1
sleep 5

echo "Starting host $source_instance_name"
start_instance $source_instance
echo "Done"
  • SHARE

Leave a Reply

Your email address will not be published. Required fields are marked *

LET'S WORK
TOGETHER

Need a hand? Or a high five?
Feel free to visit our offices and come say hi
… or just drop us a message

We are ready when you are

Cloudar – Operations

Veldkant 7
2550 Kontich (Antwerp)
Belgium

info @ cloudar.be

+32 3 450 67 18

Cloudar – HQ

Veldkant 33A
2550 Kontich (Antwerp)
Belgium

VAT BE0564 763 890

    This contact form is protected by reCAPTCHA and the Google Privacy Policy and Terms of Service apply.

    contact
    • SHARE