#!/bin/sh # # Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. # # init.ohasd - Control script for the Oracle HA services daemon # This script is invoked by the init system # # Note: # For security reason, all cli tools shipped with Clusterware should be # executed as HAS_USER in init.ohasd and ohasd rc script for SIHA. (See bug # 9216334 for more details) ### BEGIN INIT INFO # Provides: oracle_has # Required-Start: $network $syslog $remote_fs # Should-Start: open_iscsi raw afd # Required-Stop: $network $syslog $remote_fs # Should-Stop: open_iscsi # Default-Start: 3 5 # Default-Stop: 0 1 2 6 # Description: Start and Stop Oracle High Availability Service ### END INIT INFO ######### Shell functions ######### tolower_host() { #If the hostname is an IP address, let hostname #remain as IP address H1=`$HOSTN` len1=`$EXPRN "$H1" : '.*'` len2=`$EXPRN match $H1 '[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*'` # Strip off domain name in case /bin/hostname returns # FQDN hostname if [ $len1 != $len2 ]; then H1=`$ECHO $H1 | $CUT -d'.' -f1` fi $ECHO $H1 | $TR '[:upper:]' '[:lower:]' } # Invoke crsctl as root in case of clusterware, and HAS_USER in case of SIHA. # Note: Argument with space might be problemactic (my_crsctl 'hello world') my_crsctl() { if [ $HAS_USER = "root" ]; then $CRSCTL $* else $SU $HAS_USER -c "$CRSCTL $*" fi } # Invoke clsecho as root in case of clusterware, and HAS_USER in case of SIHA. # Note: Argument with space might be problemactic (my_clsecho 'hello world') my_clsecho() { if [ $HAS_USER = "root" ]; then $CLSECHO $* else $SU $HAS_USER -c "$CLSECHO $*" fi } # Invoke clsecho as root in case of clusterware, and HAS_USER in case of SIHA. # Note: Argument with space might be problemactic (my_logsys 'hello world') my_logsys() { if [ $HAS_USER = "root" ]; then $LOGSYS $* else $SU $HAS_USER -c "$LOGSYS $*" fi } read_pipe() { read line < $NAMEDPIPE rc=$? # For Linux systemd, more work is necessary. Ohasd will send is its PID so # we can move it to the appropriate cgroup for service tracking. if [ "$SYSTEMD" = "true" ]; then if [ "$rc" = "0" ]; then if [ -e /sys/fs/cgroup/systemd/system.slice/oracle-ohasd.service/tasks ]; then my_logsys "Writing $line to oracle-ohasd tasks" # echo needs to be done as root $ECHO $line > /sys/fs/cgroup/systemd/system.slice/oracle-ohasd.service/tasks # Not strictly necessary, but make sure the process actually moves while true do count=`cd /sys/fs/cgroup/systemd/user.slice/ && \ systemd-cgls | $GREP ohasd.bin | $GREP -vc grep` if [ "$count" = "0" ]; then break fi $SLEEP 1 done else my_logsys "Cannot write PID to oracle-ohasd tasks, file does not exist." fi # Create a second pipe to let ohasd know it's safe to proceed mkfifo -m 644 $NAMEDPIPE2 2>&1 # Go back to reading so we can detect ohasd crashes read_pipe fi fi } ################################### ######### Instantiated Variables ######### ORA_CRS_HOME=%ORA_CRS_HOME% export ORA_CRS_HOME HAS_USER=%HAS_USER% SCRBASE=%SCRBASE% ########################################## #limits(fixed) CRS_LIMIT_CORE=%CRS_LIMIT_CORE% CRS_LIMIT_MEMLOCK=%CRS_LIMIT_MEMLOCK% ### Restart related variables # Max. # of restart before checking restart time window RESTART_LIMIT=10 # Time window in which RESTART_LIMIT # of restarts allowed without # abort auto-restarting (in sec). RESTART_TIME_WINDOW=60 # Internal variables. CURR_TIME=0 TIME_ELAPSE=0 PREV_RESTART_TIME=0 RESTART_COUNTER=0 # How long to wait (in seconds) before rechecking a dependency, # and printing out messages about it. DEP_CHECK_WAIT=60 NAMEDPIPE=/var/tmp/.oracle/npohasd NAMEDPIPE2=/var/tmp/.oracle/npohasd2 ### CLI tools BASENAME=/bin/basename HOSTN=/bin/hostname SU=/bin/su CHOWN=/bin/chown ECHO=/bin/echo SLEEP=/bin/sleep EXPRN=/usr/bin/expr CUT=/usr/bin/cut CAT=/bin/cat GREP=/bin/grep # Location to TR differs in diff. platforms. TR=/bin/tr #solaris location (for both SPARC and amd) [ 'SunOS' = `/bin/uname` ] && TR=/usr/xpg4/bin/tr #on linux tr is at /usr/bin/tr [ 'Linux' = `/bin/uname` ] && TR=/usr/bin/tr CLSECHO="$ORA_CRS_HOME/bin/clsecho" LOGSYS="$CLSECHO -s $0:" PERL="$ORA_CRS_HOME/perl/bin/perl -I${ORA_CRS_HOME}/perl/lib" CRSCTL=$ORA_CRS_HOME/bin/crsctl ### Main ### # Retrieve name of the platform PLATFORM=`/bin/uname` MY_HOST=`tolower_host` SYSTEMD="false" # Default path for SCR control files. AUTORUNFILE=$SCRBASE/$MY_HOST/$HAS_USER/ohasdrun PIDFILE=$SCRBASE/$MY_HOST/$HAS_USER/ohasdinitpid LOGMSG="/bin/logger -puser.err" LOGERR="/bin/logger -puser.alert" ENV_FILE="${ORA_CRS_HOME}/crs/install/s_crsconfig_${MY_HOST}_env.txt" case $PLATFORM in Linux) LOGGER="/usr/bin/logger" if [ ! -f "$LOGGER" ];then LOGGER="/bin/logger" fi LOGMSG="$LOGGER -puser.err" LOGERR="$LOGGER -puser.alert" LSB_RELEASE="/usr/bin/lsb_release" ;; HP-UX) NAMEDPIPE=/tmp/.oracle/npohasd ;; AIX) NAMEDPIPE=/tmp/.oracle/npohasd ;; SunOS) ;; OSF1) ;; *) /bin/echo "ERROR: Unknown Operating System" exit -1 ;; esac # log start script execution $LOGMSG "Starting execution of Oracle Clusterware init.ohasd" # # for OEL6/RHEL6 rsyslogd is not up at the point # this script is run so write to /var/log location # in that case # if [ $PLATFORM = "Linux" ]; then # if lsb_release not there conservatively assume we need to log to /var/log RELEASE=6 if [ -e $LSB_RELEASE ]; then RELEASE=`$LSB_RELEASE -sr | $CUT -c1` fi if [ $RELEASE -ge 6 ]; then mydate="`date "+%b %d %T"`" echo "$mydate Starting execution of Oracle Clusterware init.ohasd" > /var/log/oracleohasd fi # Detect systemd if hash /bin/rpm 2>/dev/null; then rpmstr=`/bin/rpm -qf /sbin/init | $GREP ^systemd- -i` if [ -n "$rpmstr" ]; then SYSTEMD="true" fi fi fi # enable HA by default on most unix platforms case $PLATFORM in Linux) GIPCD_PASSTHROUGH=false export GIPCD_PASSTHROUGH ;; HP-UX) GIPCD_PASSTHROUGH=false export GIPCD_PASSTHROUGH ;; SunOS) GIPCD_PASSTHROUGH=false export GIPCD_PASSTHROUGH ;; AIX) GIPCD_PASSTHROUGH=false export GIPCD_PASSTHROUGH ;; OSF1) ;; esac case $1 in 'stop') if [ $PLATFORM = "Linux" ]; then if [ "$SYSTEMD" = "true" ]; then if [ -r $PIDFILE ]; then PID=`$CAT $PIDFILE` if [ `/bin/ps -ef | $GREP -v grep | $GREP -c $PID` = "1" ]; then my_logsys "init.ohasd calling ohasd stop" /etc/init.d/ohasd stop else my_logsys "init.ohasd stop called while not running, taking no action" fi fi fi fi ;; 'run') # Record PID so we can detect kills $ECHO $$ > $PIDFILE # Handle reboot of system or restarted init script STARTUP_LOOP="true" while ($STARTUP_LOOP) do if [ -r $AUTORUNFILE ] then case `$CAT $AUTORUNFILE` in restart*) OUTPUT=`my_crsctl check has | $GREP 4638` case "$OUTPUT" in CRS*) # Sync with ohasd and restart after crash read_pipe STARTUP_LOOP="false" ;; *) # Stale restart at boot time, wait for restart $SLEEP 10 ;; esac ;; *) STARTUP_LOOP="false" ;; esac else STARTUP_LOOP="false" fi done # Wait until clusterware home is mounted. while [ ! -r $CLSECHO ] do $LOGMSG "Waiting for filesystem containing $CLSECHO." $SLEEP $DEP_CHECK_WAIT done # we set the hard and soft limit here as root # these limits will be reset in ohasd wrapper. # Nevertheless, we need to set them here in the # case of SIHA since the wrapper will not get # called as root #source the script that has limit values . $ENV_FILE > /dev/null 2>&1 case $PLATFORM in Linux) # MEMLOCK limit is for Bug 9136459 ulimit -l $CRS_LIMIT_MEMLOCK ulimit -c $CRS_LIMIT_CORE ulimit -n $CRS_LIMIT_OPENFILE ulimit -u $CRS_LIMIT_NPROC ;; *) ulimit -c $CRS_LIMIT_CORE ulimit -n $CRS_LIMIT_OPENFILE if [ "$PLATFORM" = "AIX" ] then ulimit -u $CRS_LIMIT_NPROC fi ;; esac # Try to create named pipe. if [ $HAS_USER = "root" ]; then OUTPUT=`mkfifo -m 644 $NAMEDPIPE 2>&1` else OUTPUT=`$SU $HAS_USER -c "mkfifo -m 644 $NAMEDPIPE" 2>&1` fi if [ ! -p $NAMEDPIPE ]; then # Print an alert entry regarding failure to create named pipe. # Note: Cannot use my_clsecho since OUTPUT has newline and white spaces if [ $HAS_USER = "root" ]; then $CLSECHO -l -p crs -f crs -m 10131 $NAMEDPIPE "$OUTPUT" else $SU $HAS_USER -c "$CLSECHO -l -p crs -f crs -m 10131 $NAMEDPIPE '$OUTPUT'" fi # Stop ohasd restart. $ECHO "stop" > $AUTORUNFILE fi # Main loop while (true) do # Should ohasd be run or restarted? if [ -r $AUTORUNFILE ] then case `$CAT $AUTORUNFILE` in reboot*) # Sync with ohasd and restart after crash $ECHO "restart" > $AUTORUNFILE read_pipe ;; restart*) ### Bug 6956719. Prevent ohasd from crashing and restarting too ### rapidly in a short period of time. RESTART_COUNTER=`$EXPRN $RESTART_COUNTER + 1` if [ $RESTART_COUNTER -gt $RESTART_LIMIT ]; then # Obtain current epoch time. if [ $HAS_USER = "root" ]; then CURR_TIME=`$PERL -e 'print time'` else CURR_TIME=`$SU $HAS_USER -c "$PERL -e 'print time'"` fi if [ "$?" -eq "0" ]; then TIME_ELAPSE=`$EXPRN $CURR_TIME - $PREV_RESTART_TIME` my_logsys "Ohasd restarts $RESTART_COUNTER times in $TIME_ELAPSE seconds." # Stop restarting if ohasd restarts more than RESTART_LIMIT times # within RESTART_TIME_WINDOW. if [ $TIME_ELAPSE -lt $RESTART_TIME_WINDOW ]; then my_logsys "Ohasd restarts too rapidly. Stop auto-restarting." my_clsecho -l -p crs -f crs -m 10132 $RESTART_LIMIT $RESTART_TIME_WINDOW # Stop ohasd restart. $ECHO "stop" > $AUTORUNFILE # Reset RESTART_COUNTER and PREV_RESTART_TIME to default values. RESTART_COUNTER=0 PREV_RESTART_TIME=0 continue else # Reset the counter if ohasd does not restart more than # RESTART_LIMIT times in the last RESTART_TIME_WINDOW secs. RESTART_COUNTER=1 my_logsys "Resetting RESTART_COUNTER to 1." fi PREV_RESTART_TIME=$CURR_TIME else # Let ohasd restart if failed to obtain current time. my_logsys "Unable to obtain current time. $PERL -e 'print time' failed." fi fi ############################################ my_logsys "ohasd is restarting $RESTART_COUNTER/$RESTART_LIMIT." # Restart ohasd and sync with it if [ $HAS_USER = "root" ]; then $ORA_CRS_HOME/bin/ohasd restart & else $SU $HAS_USER -c "$ORA_CRS_HOME/bin/ohasd restart &" fi read_pipe ;; stop*) $SLEEP 10 ;; *) $LOGERR 'autorun file for ohasd has unknown value' $SLEEP 10 ;; esac else # bug 15869775 - reload hostname in case that's why we can't find file MY_HOST="`tolower_host`" AUTORUNFILE=$SCRBASE/$MY_HOST/$HAS_USER/ohasdrun PIDFILE=$SCRBASE/$MY_HOST/$HAS_USER/ohasdinitpid $LOGERR 'autorun file for ohasd is missing' $SLEEP 10 fi done ;; esac