#!/usr/bin/perl

# Copyright (c) 2008-2009 Ulrich Kautz 
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
# A copy of the GPL can be found at http://www.gnu.org/licenses/gpl.txt
#
# AUTHOR:  uk@fortrabbit.de
# DATE:    2008-09-10
# NAME:    find-mail-communication.pl
# VERSION: 0.1
# URL:     http://blog.foaa.de/


use strict;
use warnings;

use Data::Dumper;
use Getopt::Long;

our $VERSION = '0.2';

my $USAGE = <<USAGE;
cat /path/to/postfix.log | $0 -t to\@domain.tld -f from\@domain.tld -m 10000
    --ip
        all mails from client ip

    --to | -t
        all mails to recipient
    
    --not-to | -nt
        all mails NOT from sender

    --from | -f
        all mails from sender
    
    --not-from | -nf
        all mails NOT from sender 

    --max_cache_lines | -m
        amount of of lines to cache.. internal setting.. increase if you
        get a couple of empty-FROMs in a "--to"-search

USAGE


my %opt;
GetOptions(
    "ip=s" => \( $opt{ ip } = "" ),
    "f|from=s" => \( $opt{ from } = "" ),
    "nf|not-from=s" => \( $opt{ not_from } = "" ),
    "t|to=s" => \( $opt{ to } = "" ),
    "nt|not-to=s" => \( $opt{ not_to } = "" ),
    "m|max_cache_lines=i" => \( $opt{ max_cache_lines } = 50_000 ),
);


# due to performance .. dont do this in the while loop
my $use_from     = $opt{ from } ne "";
my $use_not_from = $opt{ not_from } ne "";
my $use_to     = $opt{ to } ne "";
my $use_not_to = $opt{ not_to } ne "";
my $use_ip     = $opt{ ip } ne "";

die $USAGE if ! $use_from && ! $use_to && ! $use_ip;

# to lowercase
$opt{ $_ } = lc( $opt{ $_ } ) for keys %opt;

my @last_lines = ();
my %watch_id = ();
my %times = ();
my %count = ();
my $count = 0;
my %ips = ();


INPUT:
while ( my $line = <STDIN> ) {
    chomp $line;
    
    my ( $time ) = $line =~ /^(.*? \d\d?:\d\d?:\d\d?)/;
    
    # find by IP
    if ( $use_ip && ( $line =~ / ([0-9A-F]+): \s+ client = [^\[]* \[ (\Q$opt{ ip }\E [^\]]*) \] /xms || $line =~ / ([0-9A-F]+): .+?  \[ (\Q$opt{ ip }\E [^\]]*) \]: /xms ) ) {
        my $id = $1;
        my $ip = $2;
        $watch_id{ $id } ||= {};
        $watch_id{ $id }->{ ip } ||= {};
        $watch_id{ $id }->{ ip }->{ $ip }++;
        $times{ $id } ||= $time;
        $ips{ $id } ||= $ip;
        $count{ $id } ||= $count++;
        next INPUT;
    }
    
    # find a FROM we are looking for
    elsif ( $use_from && $line =~ /: ([0-9A-F]+): from=<(\Q$opt{ from }\E[^>]*)>/ ) {
        my $id = $1;
        my $from = lc( $2 );
        $watch_id{ $id } ||= {};
        $watch_id{ $id }->{ from } ||= {};
        $watch_id{ $id }->{ from }->{ $from }++;
        $times{ $id } ||= $time;
        $count{ $id } ||= $count++;
        search_old_lines( \@last_lines, $id, client => "" );
        next INPUT;
    }
    
    # find a TO we are looking for
    elsif ( $use_to && $line =~ /: ([0-9A-F]+): to=<([^>]+)>/ ) {
        my $id = $1;
        my $to = lc( $2 );
        if ( $to =~ /^\Q$opt{ to }/ ) {
            $watch_id{ $id } ||= {};
            $watch_id{ $id }->{ to } ||= {};
            $watch_id{ $id }->{ to }->{ $to }++;
            $times{ $id } ||= $time;
            $count{ $id } ||= $count++;
            search_old_lines( \@last_lines, $id, to => $to, client => "" );
            next INPUT;
        }
    }
    
    # looking for a watched ID for FROM
    if ( $line =~ /postfix\/qmgr\[\d+\]: ([0-9A-F]+): from=<([^>]+)>/ ) {
        my $id = $1;
        my $from = lc( $2 );
        if ( defined $watch_id{ $id } ) {
            $watch_id{ $id }->{ from } ||= {};
            $watch_id{ $id }->{ from }->{ $from }++;
            
            next INPUT;
        }
        else {
            push @last_lines, $line;
        }
    }
    
    # looking for a watched ID for TO
    elsif ( $line =~ /postfix\/.+?\[\d+\]: ([0-9A-F]+): to=<([^>]+)>/ ) {
        my $id = $1;
        my $to = lc( $2 );
        if ( defined $watch_id{ $id } ) {
            $watch_id{ $id }->{ to } ||= {};
            $watch_id{ $id }->{ to }->{ $to }++;
            
            next INPUT;
        }
        else {
            push @last_lines, $line;
        }
    }
    
    # looking for a watched ID for TO
    elsif ( $line =~ /postfix\/.+?\[\d+\]: ([0-9A-F]+): client=([^ \[]+?)\[([^\]]+?)\]/ ) {
        my $id = $1;
        my $name = lc( $2 );
        my $ip = $3;
        if ( defined $watch_id{ $id } ) {
            $watch_id{ $id }->{ ip } ||= {};
            $watch_id{ $id }->{ ip }->{ $ip }++;
            next INPUT;
        }
        else {
            push @last_lines, $line;
        }
    }
    
    # keep last 10000 lines in mind ..
    pop @last_lines if $#last_lines > $opt{ max_cache_lines };
}


# if using both: from and to lookup .. makup list with communication between those
if ( $use_not_from || $use_not_to || ( my $use_from_to = $use_from && $use_to ) ) {
    my @ids = keys %watch_id;
    foreach my $id( @ids ) {
        
        my $not_ok    = 3;
        my $from_seen = $use_from ? 0 : 1;
        my $to_seen   = $use_to ? 0 : 1;
        my $ip_seen   = $use_ip ? 0 : 1;
        
        IN_FROM:
        foreach my $from( sort keys %{ $watch_id{ $id }->{ from } } ) {
            if ( $use_from && $from =~ /^\Q$opt{ from }/ ) {
                $from_seen = 1;
                last IN_FROM unless $use_not_from;
            }
            elsif ( $use_not_from && $from =~ /\Q$opt{ not_from }/ ) {
                $not_ok++;
            }
        }
        
        IN_TO:
        foreach my $to( sort keys %{ $watch_id{ $id }->{ to } } ) {
            if ( $use_to && $to =~ /^\Q$opt{ to }/ ) {
                $to_seen = 1;
                last IN_TO unless $use_not_to;
            }
            elsif ( $use_not_to && $to =~ /\Q$opt{ not_to }/ ) {
                $not_ok++;
            }
        }
        
        IN_IP:
        foreach my $ip( sort keys %{ $watch_id{ $id }->{ ip } } ) {
            if ( $use_ip && $ip =~ /^\Q$opt{ ip }/ ) {
                $ip_seen = 1;
            }
        }
        
        $not_ok -= $from_seen;
        $not_ok -= $to_seen;
        $not_ok -= $ip_seen;
        
        delete $watch_id{ $id } if $not_ok;
    }
}


# print out..
foreach my $id( sort { $count{ $a } <=> $count{ $b } } keys %watch_id ) {
    print "Id:\n  * $id\n";
    print "Time:\n  * $times{ $id }\n";
    print "Ip:\n  * $ips{ $id }\n";
    print "From:\n";
    foreach my $from( sort keys %{ $watch_id{ $id }->{ from } } ) {
        print "  * $from\n";
    }
    
    print "To:\n";
    foreach my $to( sort keys %{ $watch_id{ $id }->{ to } } ) {
        print "  * $to\n";
    }
    print "\n\n";
}

printf( "--------------------\nTotal:\n  * \%d Mails\n\n", scalar keys %watch_id );






sub search_old_lines {
    my ( $old_lines_ref, $id, %checks ) = @_;
    # $check_type, $check_mail
    
    my $have_to_from = 0;
    my $have_client = 0;
    foreach my $k( keys %checks ) {
        $have_to_from ++ if $k eq 'to' || $k eq 'from';
        $have_client++ if $k eq 'client';
    }

    # parse last lines for 
    foreach my $old_line( @{ $old_lines_ref } ) {
        if ( $have_to_from && $old_line =~ /postfix\/.+?\[\d+\]: $id: (to|from)=<([^>]+)>/ ) {
            my ( $type, $mail ) = ( $1, $2 );
            $mail = lc( $mail );
            unless ( defined $checks{ $type } && $mail eq $checks{ $type } ) {
                $watch_id{ $id }->{ $type } ||= {};
                $watch_id{ $id }->{ $type }->{ $mail }++;
            }
        }
        elsif ( $have_client && $old_line =~ /postfix\/.+?\[\d+\]: $id: client=([^ \[]+?)\[([^\]]+?)\]/ ) {
            my ( $name, $ip ) = ( $1, $2 );
            $name = lc( $name );
            if ( $checks{ client } eq '' || $name eq  $checks{ client } || $ip eq  $checks{ client } ) {
                $watch_id{ $id }->{ ip } ||= {};
                $watch_id{ $id }->{ ip }->{ $ip }++;
                $ips{ $id } ||= $ip;
            }
        }
    }
}


