Starting ZFS-FUSE up properly

by Rudd-O published 2009/01/22 19:25:00 GMT+0, last modified 2013-06-26T03:24:21+00:00
ZFS is the best filesystem ever invented. And FUSE brings it to Linux. There are, however, some caveats you need to keep in mind if you want to start it properly.

Here is a collection of tips you must pay attention to:

  1. Put the ZFS PID file in the root filesystem.
  2. Unset the LANG environment variable.  Failure to do so will cause ZFS-FUSE to hang if your /usr is on ZFS.
  3. Immunize ZFS-FUSE against the OOM killer.  If you don't, then it's very likely that your kernel will kill ZFS-FUSE as soon as things get tight -- and this is something you definitely do not want.
    The listing below contains code to do just that.
  4. Remove limits.  If you don't remove the limits, ZFS-FUSE will either hang and spin, or consume an inordinate amount of memory (close to two gigabytes).
    ulimit -v unlimited
    ulimit -c 512000
    ulimit -l unlimited
    ulimit -s unlimited

For your convenience, I am attaching the script /sbin/zfsctl which I invoke (with the start argument) in /etc/rc.sysinit after mounting other filesystems, and invoke it (with the stop argument) before killall is executed in the S01killall initscript:  It works in Fedora and Ubuntu.

#! /bin/sh

PIDFILE=/.zfs-fuse.pid
LOCKFILE=/var/lock/zfs/zfs_lock

export PATH=/sbin:/bin
unset LANG
ulimit -v unlimited
ulimit -c 512000
ulimit -l unlimited
ulimit -s unlimited

log_action_begin_msg() {
	true # echo $*
}

log_action_end_msg() {
	true # echo $*
}

do_start() {
	test -x /sbin/zfs-fuse || exit 0
	PID=`cat "$PIDFILE" 2> /dev/null`
	if [ "$PID" != "" ]
	then
		if kill -0 $PID 2> /dev/null
		then
			echo "ZFS-FUSE is already running"
			exit 3
		else
			# pid file is stale, we clean up shit
			log_action_begin_msg "Cleaning up stale ZFS-FUSE PID files"
			rm -f "$PIDFILE"
			# /var/run/sendsigs.omit.d/zfs-fuse
			log_action_end_msg 0
		fi
	fi

	log_action_begin_msg "Starting ZFS-FUSE process"
	zfs-fuse -p "$PIDFILE"
	ES_TO_REPORT=$?
	if [ 0 = "$ES_TO_REPORT" ]
	then
		true
	else
		log_action_end_msg 1 "code $ES_TO_REPORT"
		exit 3
	fi

	for a in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
	do
		PID=`cat "$PIDFILE" 2> /dev/null`
		 [ "$PID" != "" ] && break
		sleep 1
	done

	if [ "$PID" = "" ]
	then
		log_action_end_msg 1 "ZFS-FUSE did not start or create $PIDFILE"
		exit 3
	else
		log_action_end_msg 0
	fi

	log_action_begin_msg "Immunizing ZFS-FUSE against OOM kills and sendsigs signals"
	# mkdir -p /var/run/sendsigs.omit.d
	# cp "$PIDFILE" /var/run/sendsigs.omit.d/zfs-fuse
	echo -17 > "/proc/$PID/oom_adj"
	ES_TO_REPORT=$?
	if [ 0 = "$ES_TO_REPORT" ]
	then
		log_action_end_msg 0
	else
		log_action_end_msg 1 "code $ES_TO_REPORT"
		exit 3
	fi
	
	log_action_begin_msg "Mounting ZFS filesystems"
	
	sleep 1
	rm -f /var/lib/random-seed
	zfs mount -a
	ES_TO_REPORT=$?
	if [ 0 = "$ES_TO_REPORT" ]
	then
		log_action_end_msg 0
	else
		log_action_end_msg 1 "code $ES_TO_REPORT"
		#echo "Dropping into a shell for debugging.  Post_mountall pending."
		#bash
		#post_mountall
		exit 3
	fi

	if [ -x /nonexistent -a -x /usr/bin/renice ] ; then # DISABLED
		log_action_begin_msg "Increasing ZFS-FUSE priority"
		/usr/bin/renice -15 -g $PID > /dev/null
		ES_TO_REPORT=$?
		if [ 0 = "$ES_TO_REPORT" ]
		then
			log_action_end_msg 0
		else
			log_action_end_msg 1 "code $ES_TO_REPORT"
			exit 3
		fi
		true
	fi
	
}

do_stop () {
	test -x /sbin/zfs-fuse || exit 0
	PID=`cat "$PIDFILE" 2> /dev/null`
	if [ "$PID" = "" ] ; then
		# no pid file, we exit
		exit 0
	elif kill -0 $PID 2> /dev/null; then
		# pid file and killable, we continue
		true
	else
		# pid file is stale, we clean up shit
		log_action_begin_msg "Cleaning up stale ZFS-FUSE PID files"
		rm -f "$PIDFILE"
		# /var/run/sendsigs.omit.d/zfs-fuse 
		log_action_end_msg 0
		exit 0
	fi

	log_action_begin_msg "Syncing disks"
	sync
	log_action_end_msg 0

	log_action_begin_msg "Unmounting ZFS filesystems"
	zfs unmount -a
	ES_TO_REPORT=$?
	if [ 0 = "$ES_TO_REPORT" ]
	then
		log_action_end_msg 0
	else
		log_action_end_msg 1 "code $ES_TO_REPORT"
		exit 3
	fi
	
	log_action_begin_msg "Terminating ZFS-FUSE process gracefully"
	kill -TERM $PID

	for a in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
	do
		kill -0 $PID 2> /dev/null
		[ "$?" != "0" ] && break
		sleep 1
	done

	if kill -0 $PID 2> /dev/null
	then
		log_action_end_msg 1 "ZFS-FUSE refused to die after 15 seconds"
		exit 3
	else
		rm -f "$PIDFILE"
		# /var/run/sendsigs.omit.d/zfs-fuse 
		log_action_end_msg 0
	fi

	log_action_begin_msg "Syncing disks again"
	sync
	log_action_end_msg 0
}

case "$1" in
  start)
	do_start
	;;
  stop)
	do_stop
	;;
  status)
	PID=`cat "$PIDFILE" 2> /dev/null`
	if [ "$PID" = "" ] ; then
		echo "ZFS-FUSE is not running"
		exit 3
	else
		if kill -0 $PID
		then
			echo "ZFS-FUSE is running, pid $PID"
			zpool status
			exit 0
		else
			echo "ZFS-FUSE died, PID files stale"
			exit 3
		fi
	fi
	;;
  restart|reload|force-reload)
	echo "Error: argument '$1' not supported" >&2
	exit 3
	;;
  *)
	echo "Usage: $0 start|stop|status" >&2
	exit 3
	;;
esac

: