summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-06-17 19:55:55 -0700
committerSage Weil <sage@inktank.com>2013-06-17 19:55:55 -0700
commit5e9a0a140a6474c2f66c61b3d4c21e7a8da2e7c4 (patch)
tree583e58e18c97c77e8a12b90987e5a1344547ccb7
parent47ce702ce6230f2404bf0b1cb051d78489537469 (diff)
parentdf8a3e5591948dfd94de2e06640cfe54d2de4322 (diff)
downloadceph-5e9a0a140a6474c2f66c61b3d4c21e7a8da2e7c4.tar.gz
Merge branch 'next'
-rw-r--r--ceph.spec.in5
-rwxr-xr-xsrc/ceph-disk106
-rwxr-xr-xsrc/ceph-disk-udev22
-rw-r--r--src/client/Client.cc15
-rw-r--r--src/init-ceph.in7
-rw-r--r--src/mon/MonCommands.h4
-rw-r--r--src/upstart/ceph-osd-all-starter.conf4
-rw-r--r--udev/60-ceph-partuuid-workaround.rules3
8 files changed, 157 insertions, 9 deletions
diff --git a/ceph.spec.in b/ceph.spec.in
index 3c9ac20902a..9ba972e5671 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -325,7 +325,7 @@ rm -rf $RPM_BUILD_ROOT
%post
/sbin/ldconfig
-#/sbin/chkconfig --add ceph
+/sbin/chkconfig --add ceph
%preun
%if %{defined suse_version}
@@ -333,7 +333,7 @@ rm -rf $RPM_BUILD_ROOT
%endif
if [ $1 = 0 ] ; then
/sbin/service ceph stop >/dev/null 2>&1
-# /sbin/chkconfig --del ceph
+ /sbin/chkconfig --del ceph
fi
%postun
@@ -398,6 +398,7 @@ fi
%{_libdir}/rados-classes/libcls_refcount.so*
%{_libdir}/ceph
/lib/udev/rules.d/50-rbd.rules
+/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
/lib/udev/rules.d/95-ceph-osd.rules
%dir %{_sysconfdir}/ceph/
%config %{_sysconfdir}/bash_completion.d/ceph
diff --git a/src/ceph-disk b/src/ceph-disk
index 127d809902d..bd7e6206ae8 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk
@@ -14,6 +14,48 @@ import tempfile
import uuid
import lockfile
+"""
+Prepare:
+ - create GPT partition
+ - mark the partition with the ceph type uuid
+ - create a file system
+ - mark the fs as ready for ceph consumption
+ - entire data disk is used (one big partition)
+ - a new partition is added to the journal disk (so it can be easily shared)
+
+ - triggered by administrator or ceph-deploy, e.g. 'ceph-disk <data disk> [journal disk]
+
+Activate:
+ - mount the volume in a temp loation
+ - allocate an osd id (if needed)
+ - remount in the correct location /var/lib/ceph/osd/$cluster-$id
+ - start ceph-osd
+
+ - triggered by udev when it sees the OSD gpt partition type
+ - triggered by admin 'ceph-disk activate <path>'
+ - triggered on ceph service startup with 'ceph-disk activate-all'
+
+We rely on /dev/disk/by-partuuid to find partitions by their UUID;
+this is what the journal symlink inside the osd data volume normally
+points to.
+
+activate-all relies on /dev/disk/by-parttype-uuid/$typeuuid.$uuid to
+find all partitions. We install special udev rules to create these
+links.
+
+udev triggers 'ceph-disk activate <dev>' or 'ceph-disk
+activate-journal <dev>' based on the partition type.
+
+On old distros (e.g., RHEL6), the blkid installed does not recognized
+GPT partition metadata and the /dev/disk/by-partuuid etc. links aren't
+present. We have a horrible hack in the form of ceph-disk-udev that
+parses gparted output to create the symlinks above and triggers the
+'ceph-disk activate' etc commands that udev normally would do if it
+knew the GPT partition type.
+
+"""
+
+
CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
@@ -57,6 +99,11 @@ INIT_SYSTEMS = [
'auto',
]
+# Nuke the TERM variable to avoid confusing any subprocesses we call.
+# For example, libreadline will print weird control sequences for some
+# TERM values.
+if 'TERM' in os.environ:
+ del os.environ['TERM']
LOG_NAME = __name__
if LOG_NAME == '__main__':
@@ -1689,6 +1736,46 @@ def main_activate_journal(args):
###########################
+def main_activate_all(args):
+ dir = '/dev/disk/by-parttypeuuid'
+ LOG.debug('Scanning %s', dir)
+ if not os.path.exists(dir):
+ return
+ err = False
+ for name in os.listdir(dir):
+ if name.find('.') < 0:
+ continue
+ (tag, uuid) = name.split('.')
+ if tag == OSD_UUID:
+ path = os.path.join(dir, name)
+ LOG.info('Activating %s', path)
+ activate_lock.acquire()
+ try:
+ (cluster, osd_id) = mount_activate(
+ dev=path,
+ activate_key_template=args.activate_key_template,
+ init=args.mark_init,
+ )
+ start_daemon(
+ cluster=cluster,
+ osd_id=osd_id,
+ )
+
+ except Exception as e:
+ print >> sys.stderr, '{prog}: {msg}'.format(
+ prog=args.prog,
+ msg=e,
+ )
+ err = True
+
+ finally:
+ activate_lock.release()
+ if err:
+ raise Error('One or more partitions failed to activate')
+
+
+###########################
+
def is_swap(dev):
dev = os.path.realpath(dev)
with file('/proc/swaps', 'rb') as proc_swaps:
@@ -2090,6 +2177,25 @@ def parse_args():
func=main_activate_journal,
)
+ activate_all_parser = subparsers.add_parser('activate-all', help='Activate all tagged OSD partitions')
+ activate_all_parser.add_argument(
+ '--activate-key',
+ metavar='PATH',
+ help='bootstrap-osd keyring path template (%(default)s)',
+ dest='activate_key_template',
+ )
+ activate_all_parser.add_argument(
+ '--mark-init',
+ metavar='INITSYSTEM',
+ help='init system to manage this dir',
+ default='auto',
+ choices=INIT_SYSTEMS,
+ )
+ activate_all_parser.set_defaults(
+ activate_key_template='/var/lib/ceph/bootstrap-osd/{cluster}.keyring',
+ func=main_activate_all,
+ )
+
list_parser = subparsers.add_parser('list', help='List disks, partitions, and Ceph OSDs')
list_parser.set_defaults(
func=main_list,
diff --git a/src/ceph-disk-udev b/src/ceph-disk-udev
index 885f638a1e0..bdf524e6aea 100755
--- a/src/ceph-disk-udev
+++ b/src/ceph-disk-udev
@@ -1,8 +1,9 @@
#! /bin/sh
-# Wrapper for the ceph udev rules. Since older versions of udev do not support gpt label fields, this shell
-# script is invoked from the udev rule to read the needed gpt label fields and call the appropriate ceph
-# OSD functions.
+# Wrapper for the ceph udev rules. Since older versions of udev+blkid
+# do not support gpt label fields, this shell script is invoked from
+# the udev rule to read the needed gpt label fields and call the
+# appropriate ceph OSD functions.
PARTNO=$1
NAME=$2
@@ -10,6 +11,19 @@ PARENT_NAME=$3
# Get GPT partition type guid
ID_PART_ENTRY_TYPE=$(/usr/sbin/sgdisk --info=${PARTNO} /dev/${PARENT_NAME} | grep "Partition GUID code" | awk '{print $4}' | tr '[:upper:]' '[:lower:]')
+
+if [ -z "$ID_PART_ENTRY_TYPE" ]; then
+ exit
+fi
+
+ID_PART_ENTRY_UUID=$(/usr/sbin/sgdisk --info=${PARTNO} /dev/${PARENT_NAME} | grep "Partition unique GUID" | awk '{print $4}' | tr '[:upper:]' '[:lower:]')
+
+# set up the symlinks
+mkdir -p /dev/disk/by-partuuid
+ln -sf ../../${NAME} /dev/disk/by-partuuid/$ID_PART_ENTRY_UUID
+mkdir -p /dev/disk/by-parttypeuuid
+ln -sf ../../${NAME} /dev/disk/by-parttypeuuid/${ID_PART_ENTRY_TYPE}.${ID_PART_ENTRY_UUID}
+
case $ID_PART_ENTRY_TYPE in
45b0969e-9b03-4f30-b4c6-b4b80ceff106)
@@ -21,7 +35,6 @@ case $ID_PART_ENTRY_TYPE in
45b0969e-9b03-4f30-b4c6-5ec00ceff106)
# DMCRYPT_JOURNAL_UUID
# Map journal if using dm-crypt
- ID_PART_ENTRY_UUID=$(/usr/sbin/sgdisk --info=${PARTNO} /dev/${PARENT_NAME} | grep "Partition unique GUID" | awk '{print $4}' | tr '[:upper:]' '[:lower:]')
/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/${ID_PART_ENTRY_UUID} --key-size 256 create ${ID_PART_ENTRY_UUID} /dev/${NAME}
;;
@@ -35,7 +48,6 @@ case $ID_PART_ENTRY_TYPE in
# DMCRYPT_OSD_UUID
# Map data device and activate ceph-tagged partitions
# for dm-crypted data devices
- ID_PART_ENTRY_UUID=$(/usr/sbin/sgdisk --info=${PARTNO} /dev/${PARENT_NAME} | grep "Partition unique GUID" | awk '{print $4}' | tr '[:upper:]' '[:lower:]')
/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/${ID_PART_ENTRY_UUID} --key-size 256 create ${ID_PART_ENTRY_UUID} /dev/${NAME}
bash -c 'while [ ! -e /dev/mapper/${ID_PART_ENTRY_UUID} ];do sleep 1; done'
/usr/sbin/ceph-disk-activate /dev/mapper/${ID_PART_ENTRY_UUID}
diff --git a/src/client/Client.cc b/src/client/Client.cc
index dd27261be23..eeb46eef2fc 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -7892,9 +7892,22 @@ void Client::ms_handle_remote_reset(Connection *con)
}
}
if (mds >= 0) {
- if (s->state == MetaSession::STATE_CLOSING) {
+ switch (s->state) {
+ case MetaSession::STATE_CLOSING:
ldout(cct, 1) << "reset from mds we were closing; we'll call that closed" << dendl;
_closed_mds_session(s);
+ break;
+
+ case MetaSession::STATE_OPENING:
+ {
+ ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl;
+ list<Cond*> waiters;
+ waiters.swap(s->waiting_for_open);
+ _closed_mds_session(s);
+ MetaSession *news = _get_or_open_mds_session(mds);
+ news->waiting_for_open.swap(waiters);
+ }
+ break;
}
}
}
diff --git a/src/init-ceph.in b/src/init-ceph.in
index aea2dc37680..0ed906be987 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -430,4 +430,11 @@ for name in $what; do
esac
done
+# activate latent osds?
+if [ "$command" = "start" ]; then
+ if [ "$*" = "" ] || echo $* | grep -q ^osd\$ ; then
+ ceph-disk activate-all
+ fi
+fi
+
exit $EXIT_STATUS
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 2c805362255..5e6f9d49198 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -272,7 +272,9 @@ COMMAND("mon remove " \
* OSD commands
*/
COMMAND("osd stat", "print summary of OSD map")
-COMMAND("osd dump", "print summary of OSD map")
+COMMAND("osd dump " \
+ "name=epoch,type=CephInt,range=0,req=false",
+ "print summary of OSD map")
COMMAND("osd tree " \
"name=epoch,type=CephInt,range=0,req=false", \
"print OSD tree")
diff --git a/src/upstart/ceph-osd-all-starter.conf b/src/upstart/ceph-osd-all-starter.conf
index 0311716cdb4..eeb64bca567 100644
--- a/src/upstart/ceph-osd-all-starter.conf
+++ b/src/upstart/ceph-osd-all-starter.conf
@@ -6,6 +6,10 @@ task
script
set -e
+
+ # first activate any partitions
+ ceph-disk activate-all
+
# TODO what's the valid charset for cluster names and osd ids?
find /var/lib/ceph/osd/ -mindepth 1 -maxdepth 1 -regextype posix-egrep -regex '.*/[A-Za-z0-9]+-[A-Za-z0-9._-]+' -printf '%P\n' \
| while read f; do
diff --git a/udev/60-ceph-partuuid-workaround.rules b/udev/60-ceph-partuuid-workaround.rules
index a1aa060d452..c41a2720440 100644
--- a/udev/60-ceph-partuuid-workaround.rules
+++ b/udev/60-ceph-partuuid-workaround.rules
@@ -31,4 +31,7 @@ KERNEL!="sr*", IMPORT{program}="/sbin/blkid -o udev -p $tempnode"
ENV{ID_PART_ENTRY_SCHEME}=="gpt", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-partuuid/$env{ID_PART_ENTRY_UUID}"
ENV{ID_PART_ENTRY_SCHEME}=="gpt", ENV{ID_PART_ENTRY_NAME}=="?*", SYMLINK+="disk/by-partlabel/$env{ID_PART_ENTRY_NAME}"
+# NEW: by-parttypeuuid links (type.id)
+ENV{ID_PART_ENTRY_SCHEME}=="gpt", ENV{ID_PART_ENTRY_TYPE}=="?*", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-parttypeuuid/$env{ID_PART_ENTRY_TYPE}.$env{ID_PART_ENTRY_UUID}"
+
LABEL="persistent_storage_end_two"