#!/bin/bash

set -u

LOGFILE="/var/log/apache2/access.log"
STATEFILE="/var/tmp/apache_log_monitor.offset"
WORKFILE="/var/tmp/apache_log_monitor.newlines"
BLOCKLOG="/var/log/apache2/blocked_ips.log"

THRESHOLD=5
BURST_THRESHOLD=5

# true = print score/details for every IP found in this run
DISPLAY_SCORES=true

# true = print each blocked IP to stdout
PRINT_BLOCKED_IP=true

WHITELIST_REGEX='^(127[.]0[.]0[.]1|::1)$'

GOOD_BOT_UA_REGEX='(Googlebot|AdsBot-Google|GoogleOther|Bingbot|bingbot|Slurp|DuckDuckBot|Applebot|facebookexternalhit|Facebot|LinkedInBot|OAI-SearchBot|Microsoft Exchange/)'
BAD_TOOL_UA_REGEX='(curl/|wget|python-requests|python-urllib|Go-http-client|libwww-perl|scrapy|httpclient|okhttp|aiohttp|nikto|sqlmap|masscan|nmap|zgrab|libredtail-http)'
BAD_BOT_UA_REGEX='(bot|crawler|spider|scraper|harvester|scanner|checker|fetcher|parser|grabber|collector|monitor)'
SUSPICIOUS_METHOD_REGEX='^(PROPFIND|CONNECT|TRACE|TRACK|DEBUG)$'

SUSPICIOUS_PATH_REGEX='(.env([./_-]|$)|/env([./_-]|$)|.git(/|$)|wp-config|wp-admin|wp-json|/xmlrpc.php|/boaform|/cgi-bin/|/HNAP1|/api/sonicos/|/.DS_Store|/docker-compose.yml|/docker-compose.yaml|/config(.|/|$)|/credentials(.|/|$)|/secrets?(.|/|$)|stripe(.|/|_|$)|/debug.log|/phpinfo.php|/vendor/|/storage/|/backup/|/old/|/tests?|/graphql|/swagger|/actuator|/manager/html|/server-status|/application.properties|/application.yml|/application.yaml|/parameters.yml|/.vscode/|/.aws/|/id_rsa|/phpMyAdmin|/pma|/developmentserver/metadatauploader|/[+]CSCOE[+]/logon.html|/remote/login|/admin/login.asp|/login.(asp|jsp|html|htm)|/cgi-bin/login.cgi|/ReportServer|/containers/json|/vendor/phpunit|/phpunit|pearcmd|/index.php.s=/index/think/app/invokefunction|/autodiscover/|/ews/|/ecp/|/owa/|/webclient/|/SDK/webLanguage)'

mkdir -p "$(dirname "$STATEFILE")"
touch "$BLOCKLOG"

if [ ! -f "$LOGFILE" ]; then
    echo "$(date '+%F %T') ERROR: log file not found: $LOGFILE" >> "$BLOCKLOG"
    exit 1
fi

CURRENT_SIZE=$(stat -c%s "$LOGFILE" 2>/dev/null)
if [ -z "${CURRENT_SIZE:-}" ]; then
    echo "$(date '+%F %T') ERROR: could not stat $LOGFILE" >> "$BLOCKLOG"
    exit 1
fi

LAST_OFFSET=0
if [ -f "$STATEFILE" ]; then
    LAST_OFFSET=$(cat "$STATEFILE" 2>/dev/null)
    [[ "$LAST_OFFSET" =~ ^[0-9]+$ ]] || LAST_OFFSET=0
fi

# Handle rotation/truncation
if [ "$CURRENT_SIZE" -lt "$LAST_OFFSET" ]; then
    LAST_OFFSET=0
fi

tail -c +"$((LAST_OFFSET + 1))" "$LOGFILE" > "$WORKFILE" 2>/dev/null || > "$WORKFILE"
echo "$CURRENT_SIZE" > "$STATEFILE"

if [ ! -s "$WORKFILE" ]; then
    rm -f "$WORKFILE"
    exit 0
fi

USE_IPSET=0
USE_IPSET6=0

if command -v ipset >/dev/null 2>&1; then
    USE_IPSET=1
    ipset list apacheblock >/dev/null 2>&1 || ipset create apacheblock hash:ip timeout 86400
    iptables -C INPUT -m set --match-set apacheblock src -j DROP >/dev/null 2>&1 || \
        iptables -I INPUT 1 -m set --match-set apacheblock src -j DROP

    if command -v ip6tables >/dev/null 2>&1; then
        USE_IPSET6=1
        ipset list apacheblock6 >/dev/null 2>&1 || ipset create apacheblock6 hash:ip family inet6 timeout 86400
        ip6tables -C INPUT -m set --match-set apacheblock6 src -j DROP >/dev/null 2>&1 || \
            ip6tables -I INPUT 1 -m set --match-set apacheblock6 src -j DROP
    fi
fi

block_ip() {
    local ip="$1"

    if [[ "$ip" == *":"* ]]; then
        if [ "$USE_IPSET6" -eq 1 ]; then
            ipset add apacheblock6 "$ip" timeout 86400 2>/dev/null || true
            echo "ipset6"
            return 0
        fi

        if command -v ip6tables >/dev/null 2>&1; then
            ip6tables -C INPUT -s "$ip" -j DROP >/dev/null 2>&1 || \
            ip6tables -I INPUT 1 -s "$ip" -j DROP
            echo "ip6tables"
            return 0
        fi

        return 1
    else
        if [ "$USE_IPSET" -eq 1 ]; then
            ipset add apacheblock "$ip" timeout 86400 2>/dev/null || true
            echo "ipset"
            return 0
        fi

        iptables -C INPUT -s "$ip" -j DROP >/dev/null 2>&1 || \
        iptables -I INPUT 1 -s "$ip" -j DROP
        echo "iptables"
        return 0
    fi
}

awk \
  -v threshold="$THRESHOLD" \
  -v burst_threshold="$BURST_THRESHOLD" \
  -v whitelist="$WHITELIST_REGEX" \
  -v goodbot="$GOOD_BOT_UA_REGEX" \
  -v badtool="$BAD_TOOL_UA_REGEX" \
  -v badbot="$BAD_BOT_UA_REGEX" \
  -v suspicious="$SUSPICIOUS_PATH_REGEX" \
  -v suspicious_methods="$SUSPICIOUS_METHOD_REGEX" \
  -v display_scores="$DISPLAY_SCORES" '
BEGIN {
    IGNORECASE = 1
}
{
    ip = $1
    if (ip ~ whitelist) next

    line = $0
    score = 0
    method = ""
    path = ""
    status = ""
    ua = ""
    req = ""

    n = split(line, q, "\"")
    if (n >= 2) req = q[2]
    if (n >= 6) ua = q[n-1]

    split(req, r, " ")
    if (length(r) >= 1) method = r[1]
    if (length(r) >= 2) path = r[2]

    if (match(line, /" [0-9][0-9][0-9] /, s)) {
        status = substr(s[0], 3, 3)
    }

    lpath = tolower(path)
    lua = tolower(ua)
    lmethod = toupper(method)

    reqs[ip]++
    if (path != "") uniq[ip SUBSEP path] = 1

    if (lpath ~ suspicious) score += 5
    if (ua == "" || ua == "-" || ua == "\"\"") score += 2
    if (lua ~ badtool) score += 5
    if (lua ~ badbot && ua !~ goodbot) score += 3

    if (lmethod ~ suspicious_methods) score += 6
    if (lmethod == "CONNECT") score += 8
    if (lmethod == "PROPFIND") score += 7

    if (req ~ /\\x16\\x03\\x01/ || req ~ /\\x03/) score += 8

    # Explicit Exchange / Outlook / Autodiscover probing
    if (lpath ~ /\/autodiscover\// || lpath ~ /\/ews\// || lpath ~ /\/ecp\// || lpath ~ /\/owa\//) score += 8

    # Explicit zgrab scanner detection
    if (lua ~ /zgrab/) score += 8

    # Explicit Powershell / ZDI autodiscover exploit pattern
    if (lpath ~ /@zdi\/powershell/ || lpath ~ /powershell/) score += 8

    if (lmethod == "HEAD" || lmethod == "OPTIONS") score += 1
    if (lmethod == "POST" && lpath ~ suspicious) score += 4

    if (status ~ /^(400|401|403|404|405|444)$/) score += 1
    if (status ~ /^(301|302)$/ && lpath ~ suspicious) score += 1

    scores[ip] += score
    lastline[ip] = line
}
END {
    for (ip in reqs) {
        unique_count = 0
        for (k in uniq) {
            split(k, a, SUBSEP)
            if (a[1] == ip) unique_count++
        }

        total = scores[ip]

        if (reqs[ip] >= burst_threshold) total += 8
        if (unique_count >= 25) total += 6

        decision = "KEEP"
        if (total >= threshold) {
            decision = "BLOCK"
        }

        if (display_scores == "true") {
            print "SCORE\t" ip "\t" total "\t" reqs[ip] "\t" unique_count "\t" decision "\t" lastline[ip]
        }

        if (decision == "BLOCK") {
            print "BLOCK\t" ip "\t" total "\t" reqs[ip] "\t" unique_count "\t" lastline[ip]
        }
    }
}
' "$WORKFILE" | while IFS=$'\t' read -r kind ip score reqs unique_count sample; do
    [ -z "${kind:-}" ] && continue

    if [ "$kind" = "SCORE" ]; then
        echo "score ip=$ip score=$score reqs=$reqs unique_paths=$unique_count decision=$sample"
        continue
    fi

    if [ "$kind" != "BLOCK" ]; then
        continue
    fi

    ALREADY=0
    if [[ "$ip" == *":"* ]]; then
        if [ "$USE_IPSET6" -eq 1 ]; then
            ipset test apacheblock6 "$ip" >/dev/null 2>&1 && ALREADY=1
        else
            ip6tables -C INPUT -s "$ip" -j DROP >/dev/null 2>&1 && ALREADY=1
        fi
    else
        if [ "$USE_IPSET" -eq 1 ]; then
            ipset test apacheblock "$ip" >/dev/null 2>&1 && ALREADY=1
        else
            iptables -C INPUT -s "$ip" -j DROP >/dev/null 2>&1 && ALREADY=1
        fi
    fi

    if [ "$ALREADY" -eq 1 ]; then
        echo "$(date '+%F %T') ALREADY_BLOCKED $ip score=$score reqs=$reqs unique_paths=$unique_count sample=$sample" >> "$BLOCKLOG"
        continue
    fi

    METHOD=$(block_ip "$ip")
    if [ -n "${METHOD:-}" ]; then
        if [ "$PRINT_BLOCKED_IP" = true ]; then
            echo "BLOCKED_IP $ip"
        fi
        echo "$(date '+%F %T') BLOCKED $ip via $METHOD score=$score reqs=$reqs unique_paths=$unique_count sample=$sample" >> "$BLOCKLOG"
    else
        echo "$(date '+%F %T') WARN could not block $ip score=$score reqs=$reqs unique_paths=$unique_count sample=$sample" >> "$BLOCKLOG"
    fi
done

rm -f "$WORKFILE"
exit 0
