I have a bash script that cuts out a section of a logfile between 2 timestamps, but because of the size of the files, it takes quite a while to run.
If I were to rewrite the script in Perl, could I achieve a significant speed increase - or would I have to move to something like C to accomplish this?
#!/bin/bash
if [ $# -ne 3 ]; then
echo "USAGE $0 <logfile(s)> <from date (epoch)> <to date (epoch)>"
exit 1
fi
LOGFILES=$1
FROM=$2
TO=$3
rm -f /tmp/getlogs??????
TEMP=`mktemp /tmp/getlogsXXXXXX`
## LOGS NEED TO BE LISTED CHRONOLOGICALLY
ls -lnt $LOGFILES|awk '{print $8}' > $TEMP
LOGFILES=`tac $TEMP`
cp /dev/null $TEMP
findEntry() {
RETURN=0
dt=$1
fil=$2
ln1=$3
ln2=$4
t1=`tail -n+$ln1 $fil|head -n1|cut -c1-15`
dt1=`date -d "$t1" +%s`
t2=`tail -n+$ln2 $fil|head -n1|cut -c1-15`
dt2=`date -d "$t2" +%s`
if [ $dt -ge $dt2 ]; then
mid=$dt2
else
mid=$(( (($ln2-$ln1)*($dt-$dt1)/($dt2-$dt1))+$ln1 ))
fi
t3=`tail -n+$mid $fil|head -n1|cut -c1-15`
dt3=`date -d "$t3" +%s`
# finished
if [ $dt -eq $dt3 ]; then
# FOUND IT (scroll back to the first match)
while [ $dt -eq $dt3 ]; do
mid=$(( $mid-1 ))
t3=`tail -n+$mid $fil|head -n1|cut -c1-15`
dt3=`date -d "$t3" +%s`
done
RETURN=$(( $mid+1 ))
return
fi
if [ $(( $mid-1 )) -eq $ln1 ] || [ $(( $ln2-1)) -eq $mid ]; then
# FOUND NEAR IT
RETURN=$mid
return
fi
# not finished yet
if [ $dt -lt $dt3 ]; then
# too high
findEntry $dt $fil $ln1 $mid
else
if [ $dt -ge $dt3 ]; then
# too low
findEntry $dt $fil $mid $ln2
fi
fi
}
# Check timestamps on logfiles
LOGS=""
for LOG in $LOGFILES; do
filetime=`ls -ln $LOG|awk '{print $6,$7}'`
timestamp=`date -d "$filetime" +%s`
if [ $timestamp -ge $FROM ]; then
LOGS="$LOGS $LOG"
fi
done
# Check first and last dates in LOGS to refine further
for LOG in $LOGS; do
if [ ${LOG%.gz} != $LOG ]; then
gunzip -c $LOG > $TEMP
else
cp $LOG $TEMP
fi
t=`head -n1 $TEMP|cut -c1-15`
FIRST=`date -d "$t" +%s`
t=`tail -n1 $TEMP|cut -c1-15`
LAST=`date -d "$t" +%s`
if [ $TO -lt $FIRST ] || [ $FROM -gt $LAST ]; then
# This file is entirely out of range
cp /dev/null $TEMP
else
if [ $FROM -le $FIRST ]; then
if [ $TO -ge $LAST ]; then
# Entire file is within range
cat $TEMP
else
# Last part of file is out of range
STARTLINENUMBER=1
ENDLINENUMBER=`wc -l<$TEMP`
findEntry $TO $TEMP $STARTLINENUMBER $ENDLINENUMBER
head -n$RETURN $TEMP
fi
else
if [ $TO -ge $LAST ]; then
# First part of file is out of range
STARTLINENUMBER=1
ENDLINENUMBER=`wc -l<$TEMP`
findEntry $FROM $TEMP $STARTLINENUMBER $ENDLINENUMBER
tail -n+$RETURN $TEMP
else
# range is entirely within this logfile
STARTLINENUMBER=1
ENDLINENUMBER=`wc -l<$TEMP`
findEntry $FROM $TEMP $STARTLINENUMBER $ENDLINENUMBER
n1=$RETURN
findEntry $TO $TEMP $STARTLINENUMBER $ENDLINENUMBER
n2=$RETURN
tail -n+$n1 $TEMP|head -n$(( $n2-$n1 ))
fi
fi
fi
done
rm -f /tmp/getlogs??????