LINUX.ORG.RU

История изменений

Исправление vodz, (текущая версия) :

Запускать по процессу на каждый файл - как-то не очень, особенно, когда их в сумме много тысяч.

Таки переписал, stat теперь вызывает пачками для всех файлов/каталогов в каталоге, но не более 250 за раз. В statfile сохраняет номер бекапа, добавил ключи вызова, стало удобнее запускать. Других программ не вызывает (кроме stat и tar) и временных файлов не создаёт. С ключём -v даёт подробную статистику. Это полезно хотя бы потому, что каждая тыща файлов в ассоциативный массив влезает всё медленне и медленнее :(

#!/bin/bash

declare -a EXCLUDE=("/proc" "/tmp" "/var/tmp" "/dev" "/sys" "/run")
declare -a OUT DIRS=(/)

STATEFILE=statefile.lst
ARCHIVE=$(date '+%Y%m%d_%H%M%S')
ARCHIVE_SUFFIX=".tar.gz"

usage() {
  echo "Usage: $0 [-s STATEFILE] [-e exclude] [-v] [DIRECTORIES...]"
  echo
  echo "    make a sequency archive with state file, compared changes:"
  echo "        modify time, size, mode, uid and gid"
  echo
  echo "    ARCHIVE is NUMBER_DATE_TIME$ARCHIVE_SUFFIX,"
  echo "        NUMBER is of max BACKUP_NUMBER+1 from statefile"
  echo "        DATE_TIME format see from current: $ARCHIVE"
  echo "    STATEFILE - snapshot state of previos backups, default '$STATEFILE'"
  echo "        format: filename BACKUP_NUMBER=MODE:UID.GID_SIZE|TIMESTAMP|type"
  echo "                type: [dryoehfbc] - directory, regular, symlink, socket,"
  echo "                      semaphore, shared, fifo, block, character"
  echo -n " excludes default: ARCHIVE STATEFILE DIRECTORIES"
  for a in "${EXCLUDE[@]}"; do
	echo -n " $a"
  done
  echo
  echo "    default DIRECTORIES is ${DIRS[@]}"
  echo "    -v - verbose"
  exit 2
} >&2

verbose=0
while getopts ":e:s:v" o; do
    case "$o" in
    e) EXCLUDE+=("$OPTARG") ;;
    s) STATEFILE="$OPTARG" ;;
    v) verbose=1 ;;
    *) usage;;
    esac
done
shift $((OPTIND-1))

if [[ $# -ne 0 ]]; then
	for i in ${!DIRS[@]}; do
		unset DIRS[$i]
	done
	for d in "$@"; do
		if [[ ! -d "$d" ]]; then
			echo "$0: '$d' is not directory" >&2
			usage
		fi
		DIRS+=("$d")
	done
fi

declare -A A_OLD

max_b=0
a_old_n=0
if [[ -f "$STATEFILE" ]]; then
	while IFS= read -r line; do
		f=${line% *}
		attr=${line#"$f "}
		if [[ $f == $line || $attr == $line || -n ${attr#?*=?*:?*.?*_?*|?*|?} ]]; then
			echo "$0: '$STATEFILE' have strange format, exiting" >&2
			exit 1
		fi
		A_OLD[$f]=$attr
		attr=${attr%=*}
		[[ $max_b -lt $attr ]] && max_b=$attr
		(((++a_old_n%1000)==0 && verbose)) && echo -n "$0: $STATEFILE $a_old_n lines loaded"$'\r'
	done < "$STATEFILE"
	((++max_b))
fi
[[ $verbose -ne 0 ]] && echo "$0: $STATEFILE $a_old_n lines loaded"
exec 9> "$STATEFILE"

i=0
while IFS= read -r line; do
	EXCLUDE[i++]=$line
done < <(stat '-c' '%d/%i' "${EXCLUDE[@]}" "$STATEFILE" "${DIRS[@]}")

all=0
new_files=0
changed_files=0
make_state_line() {
	local attr_all=$2 attr type f

	attr=${attr_all%|*}
	type=${attr_all#"$attr|"}
	if [[ ${type:0:1} == s ]]; then
		type=${type:1:1}
	else
		type=${type:0:1}
	fi
	printf -v f '%q' "$1"
	attr_all="$attr|$type"
	((all++))
	if [[ -n ${A_OLD[$f]} ]]; then
		((a_old_n--))
		if [[ $attr_all == ${A_OLD[$f]#*=} ]]; then
			echo "$f ${A_OLD[$f]}" >&9
			unset A_OLD['$f']
			return 0
		else
			unset A_OLD['$f']
			((changed_files++))
		fi
	else
		((new_files++))
	fi
	echo "$f $max_b=$attr_all" >&9
	OUT+=("$1")
}

scan_a() {
	local p a1=$1
	local -a FILES=("$1")

	tst_exclude_fill_state() {
		local attr_all attr i=0

		while read attr_all; do
			attr=${attr_all%=*}
			for a in "${EXCLUDE[@]}"; do
				if [[ $attr == $a ]]; then
					attr=
					break
				fi
			done
			if [[ -n $attr ]]; then
				if [[ ${attr_all} == ${attr_all%|directory} || ${FILES[i]} == $a1 ]]; then
					make_state_line "${FILES[i]}" "${attr_all#$attr=}"
				else
					DIRS+=("${FILES[i]}")
				fi

			fi
			((i++))
		done < <(stat '-c' '%d/%i=%a:%u.%g_%s|%Y|%F' "${FILES[@]}")
	}

	for p in "$1/"* "$1/."* ; do
		[[ -e "$p" ]] || continue
		[[ "${p:0-2:2}" == /. || "${p:0-3:3}" == /.. ]] && continue
		FILES+=("$p")
		if [[ ${#FILES[*]} -eq 250 ]]; then
			tst_exclude_fill_state
			FILES=()
			a1=
		fi
	done
	if [[ ${#FILES[*]} -ne 0 ]]; then
		tst_exclude_fill_state
	fi
}

while [[ ${#DIRS[*]} -ne 0 ]]; do
	for i in ${!DIRS[@]}; do
		[[ $verbose -ne 0 ]] && echo -n "${DIRS[i]} "
		scan_a "${DIRS[i]}"
		unset DIRS[$i]
		[[ $verbose -ne 0 ]] && echo "(S:$all N:$new_files C:$changed_files)"
	done
done

[[ $verbose -ne 0 ]] && echo Scaned=$all New_files=$new_files Changed_files=$changed_files Deleted_files=$a_old_n

for f in "${!A_OLD[@]}"; do
	echo "$f ${A_OLD[$f]}" >&9
	[[ $verbose -ne 0 ]] && echo "-$f"
done

if [[ -n ${OUT[0]} ]]; then
	for fq in "${OUT[@]}"; do
		printf '%s\0' "$fq"
	done | tar -cvzpf ""${max_b}_$ARCHIVE$ARCHIVE_SUFFIX"" --no-recursion --null -T -
fi

Исходная версия vodz, :

Запускать по процессу на каждый файл - как-то не очень, особенно, когда их в сумме много тысяч.

Таки переписал, stat теперь вызывает пачками, все файлы/каталоги в каталоге, но не более 250 за раз. В statfile сохраняет номер бекапа, добавил ключи вызова, стало удобнее запускать. Других программ не вызывает (кроме stat и tar) и временных файлов не создаёт. С ключём -v даёт подробную статистику. Это полезно хотя бы потому, что каждая тыща файлов в ассоциативный массив влезает всё медленне и медленнее :(

#!/bin/bash

declare -a EXCLUDE=("/proc" "/tmp" "/var/tmp" "/dev" "/sys" "/run")
declare -a OUT DIRS=(/)

STATEFILE=statefile.lst
ARCHIVE=$(date '+%Y%m%d_%H%M%S')
ARCHIVE_SUFFIX=".tar.gz"

usage() {
  echo "Usage: $0 [-s STATEFILE] [-e exclude] [-v] [DIRECTORIES...]" >&2
  echo >&2
  echo "    make a sequency archive with state file, compared changes:" >&2
  echo "        modify time, size, mode, uid and gid" >&2
  echo >&2
  echo "    ARCHIVE is NUMBER_DATE_TIME$ARCHIVE_SUFFIX," >&2
  echo "        NUMBER is of max BACKUP_NUMBER+1 from statefile" >&2
  echo "        DATE_TIME format see from current: $ARCHIVE" >&2
  echo "    STATEFILE - snapshot state of previos backups, default '$STATEFILE'" >&2
  echo "        format: filename BACKUP_NUMBER=MODE:UID.GID_SIZE|TIMESTAMP|type" >&2
  echo "                type: [dryoehfbc] - directory, regular, symlink, socket," >&2
  echo "                      semaphore, shared, fifo, block, character" >&2
  echo -n " excludes default: ARCHIVE STATEFILE DIRECTORIES" >&2
  for a in "${EXCLUDE[@]}"; do
	echo -n " $a" >&2
  done
  echo >&2
  echo "    default DIRECTORIES is ${DIRS[@]}" >&2
  echo "    -v - verbose" >&2
  exit 2
}

verbose=0
while getopts ":e:s:v" o; do
    case "$o" in
    e) EXCLUDE+=("$OPTARG") ;;
    s) STATEFILE="$OPTARG" ;;
    v) verbose=1 ;;
    *) usage;;
    esac
done
shift $((OPTIND-1))

if [[ $# -ne 0 ]]; then
	for i in ${!DIRS[@]}; do
		unset DIRS[$i]
	done
	for d in "$@"; do
		if [[ ! -d "$d" ]]; then
			echo "$0: '$d' is not directory" >&2
			usage
		fi
		DIRS+=("$d")
	done
fi

declare -A A_OLD

max_b=0
a_old_n=0
if [[ -f "$STATEFILE" ]]; then
	while IFS= read -r line; do
		f=${line% *}
		attr=${line#"$f "}
		if [[ $f == $line || $attr == $line || -n ${attr#?*=?*:?*.?*_?*|?*|?} ]]; then
			echo "$0: '$STATEFILE' have strange format, exiting" >&2
			exit 1
		fi
		A_OLD[$f]=$attr
		attr=${attr%=*}
		[[ $max_b -lt $attr ]] && max_b=$attr
		(((++a_old_n%1000)==0 && verbose)) && echo -n "$0: $STATEFILE $a_old_n lines loaded"$'\r'
	done < "$STATEFILE"
	((++max_b))
fi
[[ $verbose -ne 0 ]] && echo "$0: $STATEFILE $a_old_n lines loaded"
exec 9> "$STATEFILE"

i=0
while IFS= read -r line; do
	EXCLUDE[i++]=$line
done < <(stat '-c' '%d/%i' "${EXCLUDE[@]}" "$STATEFILE" "${DIRS[@]}")

all=0
new_files=0
changed_files=0
make_state_line() {
	local attr_all=$2 attr type f

	attr=${attr_all%|*}
	type=${attr_all#"$attr|"}
	if [[ ${type:0:1} == s ]]; then
		type=${type:1:1}
	else
		type=${type:0:1}
	fi
	printf -v f '%q' "$1"
	attr_all="$attr|$type"
	((all++))
	if [[ -n ${A_OLD[$f]} ]]; then
		((a_old_n--))
		if [[ $attr_all == ${A_OLD[$f]#*=} ]]; then
			echo "$f ${A_OLD[$f]}" >&9
			unset A_OLD['$f']
			return 0
		else
			unset A_OLD['$f']
			((changed_files++))
		fi
	else
		((new_files++))
	fi
	echo "$f $max_b=$attr_all" >&9
	OUT+=("$1")
}

scan_a() {
	local p a1=$1
	local -a FILES=("$1")

	tst_exclude_fill_state() {
		local attr_all attr i=0

		while read attr_all; do
			attr=${attr_all%=*}
			for a in "${EXCLUDE[@]}"; do
				if [[ $attr == $a ]]; then
					attr=
					break
				fi
			done
			if [[ -n $attr ]]; then
				if [[ ${attr_all} == ${attr_all%|directory} || ${FILES[i]} == $a1 ]]; then
					make_state_line "${FILES[i]}" "${attr_all#$attr=}"
				else
					DIRS+=("${FILES[i]}")
				fi

			fi
			((i++))
		done < <(stat '-c' '%d/%i=%a:%u.%g_%s|%Y|%F' "${FILES[@]}")
	}

	for p in "$1/"* "$1/."* ; do
		[[ -e "$p" ]] || continue
		[[ "${p:0-2:2}" == /. || "${p:0-3:3}" == /.. ]] && continue
		FILES+=("$p")
		if [[ ${#FILES[*]} -eq 250 ]]; then
			tst_exclude_fill_state
			FILES=()
			a1=
		fi
	done
	if [[ ${#FILES[*]} -ne 0 ]]; then
		tst_exclude_fill_state
	fi
}

while [[ ${#DIRS[*]} -ne 0 ]]; do
	for i in ${!DIRS[@]}; do
		[[ $verbose -ne 0 ]] && echo -n "${DIRS[i]} "
		scan_a "${DIRS[i]}"
		unset DIRS[$i]
		[[ $verbose -ne 0 ]] && echo "(S:$all N:$new_files C:$changed_files)"
	done
done

[[ $verbose -ne 0 ]] && echo Scaned=$all New_files=$new_files Changed_files=$changed_files Deleted_files=$a_old_n

for f in "${!A_OLD[@]}"; do
	echo "$f ${A_OLD[$f]}" >&9
	[[ $verbose -ne 0 ]] && echo "-$f"
done

if [[ -n ${OUT[0]} ]]; then
	for fq in "${OUT[@]}"; do
		printf '%s\0' "$fq"
	done | tar -cvzpf ""${max_b}_$ARCHIVE$ARCHIVE_SUFFIX"" --no-recursion --null -T -
fi