teamclairvoyant/airflow-maintenance-dags

Add compress feature to the log-cleanup

arturbrandysonwelo opened this issue · 1 comments

Hello.
We want to not delete log files but firstly to compress it and after some days delete it.
For this reason, I modified this DAG and add some lines to it (I also test it and it is working, so maybe we can put this to the new version of this log_cleaner):

DEFAULT_MAX_LOG_AGE_IN_DAYS_TO_COMPRESS = Variable.get(
    "airflow_log_cleanup__max_log_age_in_days_to_compress", 7
)

ENABLE_COMPRESS = True

MAX_LOG_AGE_IN_DAYS_TO_COMPRESS=""" + str(DEFAULT_MAX_LOG_AGE_IN_DAYS_TO_COMPRESS) + """

ENABLE_DELETE=""" + str("true" if ENABLE_DELETE else "false") + """

ENABLE_COMPRESS=""" + str("true" if ENABLE_COMPRESS else "false") + """

echo "ENABLE_COMPRESS:      '${ENABLE_COMPRESS}'"

compress() {
    echo "Executing Find Statement: $1"
    FILES_MARKED_FOR_COMPRESS=`eval $1`
    echo "Process will be compressing the following File(s)/Directory(s):"
    echo "${FILES_MARKED_FOR_COMPRESS}"
    echo "Process will be compressing `echo "${FILES_MARKED_FOR_COMPRESS}" | \
    grep -v '^$' | wc -l` File(s)/Directory(s)"     \
    # "grep -v '^$'" - removes empty lines.
    # "wc -l" - Counts the number of lines
    echo ""
    if [ "${ENABLE_COMPRESS}" == "true" ];
    then
        if [ "${FILES_MARKED_FOR_COMPRESS}" != "" ];
        then
            echo "Executing Compress Statement: $2"
            eval $2
            COMPRESS_STMT_EXIT_CODE=$?
            if [ "${COMPRESS_STMT_EXIT_CODE}" != "0" ]; then
                echo "Compress process failed with exit code \
                    '${COMPRESS_STMT_EXIT_CODE}'"

                echo "Removing lock file..."
                rm -f """ + str(LOG_CLEANUP_PROCESS_LOCK_FILE) + """
                if [ "${REMOVE_LOCK_FILE_EXIT_CODE}" != "0" ]; then
                    echo "Error removing the lock file. \
                    Check file permissions.\
                    To re-run the DAG, ensure that the lock file has been \
                    deleted (""" + str(LOG_CLEANUP_PROCESS_LOCK_FILE) + """)."
                    exit ${REMOVE_LOCK_FILE_EXIT_CODE}
                fi
                exit ${COMPRESS_STMT_EXIT_CODE}
            fi
        else
            echo "WARN: No File(s)/Directory(s) to compress"
        fi
    else
        echo "WARN: You're opted to skip compress the File(s)/Directory(s)!!!"
    fi
}

echo "Running Compress Process..."

    FIND_STATEMENT="find ${BASE_LOG_FOLDER}/scheduler/* -maxdepth 0 -type d -mtime \
     +${MAX_LOG_AGE_IN_DAYS_TO_COMPRESS}"
    COMPRESS_STMT="${FIND_STATEMENT} -exec tar -zcvf {}.tar.gz {} --remove-files \;"

    compress "${FIND_STATEMENT}" "${COMPRESS_STMT}"
    CLEANUP_EXIT_CODE=$?

I think this is a good option to have, i fixed the code formatting below

DEFAULT_MAX_LOG_AGE_IN_DAYS_TO_COMPRESS = Variable.get(
"airflow_log_cleanup__max_log_age_in_days_to_compress", 7
)

ENABLE_COMPRESS = True

MAX_LOG_AGE_IN_DAYS_TO_COMPRESS=""" + str(DEFAULT_MAX_LOG_AGE_IN_DAYS_TO_COMPRESS) + """

ENABLE_DELETE=""" + str("true" if ENABLE_DELETE else "false") + """

ENABLE_COMPRESS=""" + str("true" if ENABLE_COMPRESS else "false") + """

"""
echo "ENABLE_COMPRESS: '${ENABLE_COMPRESS}'"
 
compress() {
    echo "Executing Find Statement: $1"
    FILES_MARKED_FOR_COMPRESS=eval $1
    echo "Process will be compressing the following File(s)/Directory(s):"
    echo "${FILES_MARKED_FOR_COMPRESS}"
    echo "Process will be compressing echo "${FILES_MARKED_FOR_COMPRESS}" | \ grep -v '^$' | wc -l File(s)/Directory(s)"
    # "grep -v '^$'" - removes empty lines.
    # "wc -l" - Counts the number of lines
    echo ""
    
    if [ "${ENABLE_COMPRESS}" == "true" ]; then
        if [ "${FILES_MARKED_FOR_COMPRESS}" != "" ]; then
            echo "Executing Compress Statement: $2"
            eval $2
            COMPRESS_STMT_EXIT_CODE=$?
            
            if [ "${COMPRESS_STMT_EXIT_CODE}" != "0" ]; then
                echo "Compress process failed with exit code
                '${COMPRESS_STMT_EXIT_CODE}'"
                echo "Removing lock file..."
                rm -f """ + str(LOG_CLEANUP_PROCESS_LOCK_FILE) + """
                
                if [ "${REMOVE_LOCK_FILE_EXIT_CODE}" != "0" ]; then
                    echo "Error removing the lock file. \
                    Check file permissions.\
                    To re-run the DAG, ensure that the lock file has been \
                    deleted (""" + str(LOG_CLEANUP_PROCESS_LOCK_FILE) + """)."
                    exit ${REMOVE_LOCK_FILE_EXIT_CODE}
                fi
                exit ${COMPRESS_STMT_EXIT_CODE}
            fi
        else
            echo "WARN: No File(s)/Directory(s) to compress"
        fi
    else
        echo "WARN: You're opted to skip compress the File(s)/Directory(s)!!!"
    fi
}
 
echo "Running Compress Process..."

FIND_STATEMENT="find ${BASE_LOG_FOLDER}/scheduler/* -maxdepth 0 -type d -mtime \
 +${MAX_LOG_AGE_IN_DAYS_TO_COMPRESS}"
COMPRESS_STMT="${FIND_STATEMENT} -exec tar -zcvf {}.tar.gz {} --remove-files \;"

compress "${FIND_STATEMENT}" "${COMPRESS_STMT}"
CLEANUP_EXIT_CODE=$?
"""