Skip to content

Commit f252f4e

Browse files
committed
update time logging format, multiple change to make logs under disk full condition report failure with correct message including for remote, remove retries attempt when backup disabled to optimize logging
1 parent 2ae2ebb commit f252f4e

File tree

1 file changed

+33
-5
lines changed

1 file changed

+33
-5
lines changed

splunkconf-backup/bin/splunkconf-backup.sh

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,12 @@ exec > /tmp/splunkconf-backup-debug.log 2>&1
152152
# 20251215 add timeout for curl command to speed up backup for on prem with firewalls
153153
# 20251216 more versioncheck removal
154154
# 20251216 more curl timeout
155+
# 20251217 propage script autodisable status so remote copy status is always aligned
156+
# 20251218 improve remote copy and disabled modes to prevent useless retries which add unecessary logging
157+
# 20251219 propagate error from local to remote to avoid logging disabled when it is a failure du to local (like disk space issue)
158+
# 20260105 update time logging format
155159

156-
VERSION="20251216b"
160+
VERSION="20260105a"
157161

158162
###### BEGIN default parameters
159163
# dont change here, use the configuration file to override them
@@ -375,8 +379,9 @@ SCRIPTNAME="splunkconf-backup"
375379
function echo_log_ext {
376380
LANG=C
377381
#NOW=(date "+%Y/%m/%d %H:%M:%S")
378-
NOW=(date)
379-
echo `$NOW`" ${SCRIPTNAME} $1 " >> $LOGFILE
382+
#NOW=(date)
383+
NOW=$(/bin/date -u +"%d-%m-%Y %H:%M:%S.%3N %z")
384+
echo "$NOW ${SCRIPTNAME} $1 " >> $LOGFILE
380385
}
381386

382387

@@ -619,6 +624,8 @@ function do_remote_copy() {
619624
debug_log "do_remote_copy : FIC=$FIC LFIC=$FIC OBJECT=$OBJECT RENOTETECHNO=$REMOTETECHNO RFIC=$RFIC AWSCOPYMODE=$AWSCOPYMODE ATTEMPT=$ATTEMPT MAXTRY=$REMOTECOPYRETRY REMOTETECHNO=$REMOTETECHNO "
620625
if [ -e "$FIC" ]; then
621626
FILESIZE=$(/usr/bin/stat -c%s "$FIC")
627+
elif [ "${FIC}" == "disabled" ]; then
628+
FILESIZE=0
622629
else
623630
debug_log "FIC=$FIC doesn't exist !"
624631
FILESIZE=0
@@ -628,10 +635,19 @@ function do_remote_copy() {
628635
# local disable case
629636
echo_log "action=backup type=${TYPE} object=${OBJECT} result=disabled"
630637
#debug_log "not doing remote $OBJECT as no local version present MODE=$MODE"
638+
debug_log "exiting remote copy loop because result=disabled"
639+
break
640+
elif [ "${LFIC}" == "localdiskspacecheck" ]; then
641+
fail_log "action=backup type=${TYPE} object=${OBJECT} result=failure reason=localdiskspacecheck"
642+
#debug_log "not doing remote $OBJECT as no local version present MODE=$MODE"
643+
debug_log "exiting remote copy loop because result=localdiskspacecheck"
644+
break
631645
elif [ $DOREMOTEBACKUP -eq 0 ]; then
632646
# local ran but remote is disabled
633647
DURATION=0
634648
echo_log "action=backup type=${TYPE} object=${OBJECT} result=disabled src=${LFIC} dest=${RFIC} durationms=${DURATION} size=${FILESIZE} ATTEMPT=$ATTEMPT MAXTRY=$REMOTECOPYRETRY"
649+
debug_log "exiting remote copy loop because result=disabled"
650+
break
635651
elif [ "${LFIC}" != "disabled" ] && [ "${OBJECT}" == "kvdump" ] && [ "${kvdump_done}" == "0" ]; then
636652
# we have initiated kvdump but it took so long we never had a complete message so we cant copy as it could be incomplete
637653
# we want to log here so it appear in dashboard and alerts
@@ -1308,7 +1324,7 @@ fi
13081324
#SERVERNAME=`${SPLUNK_HOME}/bin/splunk show servername | awk '{print $3}'`
13091325
#splunk show servername
13101326

1311-
debug_log "src detection : splunkinstanceType=$splunkinstanceType,${#splunkinstanceType}, SERVERNAME=$SERVERNAME, ${#SERVERNAME}, HOST=$HOST"
1327+
debug_log "src detection : splunkinstanceType=$splunkinstanceType, length=${#splunkinstanceType}, SERVERNAME=$SERVERNAME, ${#SERVERNAME}, HOST=$HOST"
13121328
if [ ${#splunkinstanceType} -ge 2 ]; then
13131329
INSTANCE=$splunkinstanceType
13141330
debug_log "using splunkinstanceType tag for instance, instance=${INSTANCE} src=splunkinstanceType"
@@ -1418,6 +1434,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "etc" ]; then
14181434
splunkconf_checkspace;
14191435
if [ $ERROR -ne 0 ]; then
14201436
fail_log "action=backup type=$TYPE object=${OBJECT} result=failure dest=$FIC reason=${ERROR_MESS} ${MESS1}"
1437+
# propagate so remote copy know if disabled or error
1438+
LFICETC=${ERROR_MESS}
14211439
elif [ ${BACKUPTYPE} -eq 2 ]; then
14221440
debug_log "running tar for etc full backup";
14231441
if [ "${TARMODE}" = "abs" ]; then
@@ -1480,6 +1498,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "scripts" ]; then
14801498
echo_log "action=backup type=$TYPE object=${OBJECT} result=disabled dest=$FIC reason=disabled ${MESS1}"
14811499
elif [ $ERROR -ne 0 ]; then
14821500
fail_log "action=backup type=$TYPE object=${OBJECT} result=failure dest=$FIC reason=${ERROR_MESS} ${MESS1}"
1501+
# propagate so remote copy know if disabled or error
1502+
LFICSCRIPT=${ERROR_MESS}
14831503
else
14841504
#debug_log "doing backup scripts via tar";
14851505
FILELIST=${SCRIPTDIR}
@@ -1511,6 +1531,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "scripts" ]; then
15111531
LFICSCRIPT=$FIC;
15121532
else
15131533
echo_log "action=backup type=$TYPE object=$OBJECT result=autodisabledempty"
1534+
# we set file to disabled so we later dont attempt to do a remote copy
1535+
LFICSCRIPT="disabled";
15141536
fi
15151537
fi
15161538
# debug
@@ -1524,7 +1546,8 @@ kvstore_done=0
15241546
LFICKVSTORE="disabled"
15251547
kvdump_done=0
15261548
LFICKVDUMP="disabled"
1527-
OBJECT="kvstore"
1549+
#OBJECT="kvstore"
1550+
OBJECT="kvdump"
15281551
if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] || [ "$MODE" == "kvauto" ]; then
15291552
debug_log "object=kvstore action=start"
15301553
FIC="disabled"
@@ -1559,6 +1582,9 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "kvdump" ] || [ "$MODE" == "kvstore" ] ||
15591582
echo_log "action=backup type=$TYPE object=${OBJECT} result=disabled dest=$FIC reason=disabled ${MESS1}"
15601583
elif [ $ERROR -ne 0 ]; then
15611584
fail_log "action=backup type=$TYPE object=${OBJECT} result=failure dest=$FIC reason=${ERROR_MESS} ${MESS1}"
1585+
# propagate so remote copy know if disabled or error
1586+
LFICKVSTORE=${ERROR_MESS}
1587+
LFICKVDUMP=${ERROR_MESS}
15621588
# bc not present on some os changing if (( $(echo "$ver >= $minimalversion" |bc -l) )); then
15631589
#if [[ $ver \> $minimalversion ]] && [[ "$MODE" == "0" || "$MODE" == "kvdump" || "$MODE" == "kvauto" ]]; then
15641590
# test
@@ -1801,6 +1827,8 @@ if [ "$MODE" == "0" ] || [ "$MODE" == "state" ]; then
18011827
echo_log "action=backup type=$TYPE object=${OBJECT} result=disabled dest=$FIC reason=disabled ${MESS1}"
18021828
elif [ $ERROR -ne 0 ]; then
18031829
fail_log "action=backup type=$TYPE object=${OBJECT} result=failure dest=$FIC reason=${ERROR_MESS} ${MESS1}"
1830+
# propagate so remote copy know if disabled or error
1831+
LFICSTATE=${ERROR_MESS}
18041832
else
18051833
#echo_log "doing backup state (modinputs and scheduler state) via tar";
18061834
#result=$(tar -zcf ${FIC} ${MODINPUTPATH} ${SCHEDULERSTATEPATH} ${STATELIST} 2>&1 | tr -d "\n") && echo_log "${MESS1} action=backup type=local object=state result=success dest=$FIC local state backup succesfull (result=$result)" || warn_log "${MESS1} action=backup type=local object=state result=failure dest=$FIC local state backup returned error , please investigate (modinputpath=${MODINPUTPATH} schedulerpath=${SCHEDULERSTATEPATH} statelist=${STATELIST} result=$result )"

0 commit comments

Comments
 (0)