Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: New version implements configurable DNS timeout threshold, cleaned up code a bit
No Format
  loadplugin    Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
# This makes DNS queries time out after 10 seconds (2x default)
  ixhash_timeout    10

# This list uses iX Magazine's spam as datasource.
  body 		         IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
  describe 	   IXHASH This mail has been classified as spam @ iX Magazine, Germany
  tflags        IXHASH net
  score         IXHASH 1.5

# This list comes in @ spamtraps run by LogIn & Solutions AG, Germany
# Manually verified stuff
  body 	        LOGINHASHLOGINHASH1 eval:ixhashtest('nospam.login-solutions.de')
  describe 	LOGINHASH   LOGINHASH1 mail has been classified as spam @ LogIn&Solutions AG, Germany
  tflags        LOGINHASHLOGINHASH1 net
  score         LOGINHASHLOGINHASH1 1.5

# This list contains hashes from Mails classified as spam at a larger company based in Germany
# Lots of stuff, but automatically categorized and contributed
  body 	   LOGINHASH2     LOGINHASH eval:ixhashtest('nospam.login-solutions.ag')
  describe 	LOGINHASH   LOGINHASH2 mail has been classified as spam @ unknown company, Germany
  tflags        LOGINHASHLOGINHASH2 net
  score         LOGINHASHLOGINHASH2 1.5

No Format
=head1 NAME

Mail::SpamAssassin::Plugin::iXhash - compute hashesfuzzy checksums from mail bodies and compare to known spam ones via DNS

=head1 SYNOPSIS
  loadplugin    Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
  ixhash_timeout	10
  body 		          IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
  describe   	   IXHASH This mail has been classified as spam @ iX Magazine, Germany
  tflags        IXHASH net
  score         IXHASH 1.5

=head1 DESCRIPTION

iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, removesstrips parts from it and then computes a MD5 hash value from the rest.
These values will then be looked up via DNS. Call it a 'poor man's DCC', if you want.

This plugin is based on 
This plugin is based on parts of the procmail-based project 'NiX Spam', developed by Bert Ungerer.(un@ix.de)
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums

Parts of the code were submitted via heise forum by 'kungfuhasi'
See http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292.

Martin Blapp (mb@imp.ch) found and solved a problem occuring on Perl 5.8.7. - and a way to bypass it. Thanks a lot!

TheFurther hashesimprovements from spam received(DNS timeouts) by Heise/iXDallas magazine are available at ix.dnsbl.manitu.net,
kindly provided by Manuel Schmitt.

The hashes from spam received by LogIn & Solutions AG and some of its customers are available at nospam.login-solutions.de.
A second list based on input from another source can be accessed at nospam.login-solutions.ag.

It's not too difficult to create your own blacklist provided you have enough input (read: spam). Well, even I managed to do
so. If you do likewise please drop that info somewhere so other people can use that one too

=cut


package iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English

our @ISA = qw(Mail::SpamAssassin::Plugin);

sub dbg { Mail::SpamAssassin::dbg (@_); }


sub new {
	my ($class, $mailsa, $serverEngelken (dallase@uribl.com) - see comments for details. 

=cut

package Mail::SpamAssassin::Plugin::iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Util;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English

use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub new {
        my ($class, $mailsa, $server) = @_;
        $class = ref($class) || $class;
        my $self = $class->SUPER::new($mailsa);
        bless ($self, $class);
        $self->set_config($mailsa->{conf});
        $self->register_eval_rule ("ixhashtest");
        return $self;
}


sub set_config {
	my ($self, $conf) = @_;
	$classmy @cmds = ref($class) || $class;
	my# $self = $class->SUPER::new($mailsa);
	bless ($self, $class);
	$self->register_eval_rule ("ixhashtest");
	return $self;
}
implements ixhash_timeout config option - by dallase@uribl.com
	push(@cmds, {
		setting => 'ixhash_timeout',
		default => 5,
		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
        });
	$conf->{parser}->register_commands(\@cmds);
}


sub ixhashtest {
	my ($self, $permsgstatus,$muell,$dnsserver) = @_;
	        dbg("IXHASH: IxHash querying Server $dnsserver");
	my (        my ($digest,$answer,$ixdigest,$body) = "";
	        my @body = $permsgstatus->{msg}->get_body();
	        my $resolver = Net::DNS::Resolver->new;
	# $resolver->debug(1);
	        my $body_copy = "";
	        foreach (@body) {
		$body .= join "", @$_;
	}
	                $body .= join "", @$_;
        }
        my $rr;
	        my $hits = 0;
	# This is code contributed by KungfuHasi. Comments partly# byalarm methe (Dirk Bonengel)
	# Siehe dazu: http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292
	# Danke, KungfuHasi, wer oder was immer Du sein magst!
	#dns query - dallase@uribl.com
        # --------------------------------------------------------------------------
	#  Some spaces to work with
	# The procmail# codehere sayswe atimplement leastproper 16alarms, spacesala orPyzor, tabsRazor2 requiredplugins... 
  	if  ($body =~ /([\s\t].*?){16,}/m ) {
		# Generate first MD5 over Body
		$body_copy = $body;
		# All space class chars just one time
		# This bypasses a problem in Perl 5.8.7 where Perl segfaults
		# if there are more than 2.600 identical chars to be replaced
		$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
		# NOTE: This is the look-forward: (?:\1+)
		$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
		# remove graph class chars and some specials
		$body_copy =~ s/[[:graph:]]+//go;
		# First Digest
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest via method 1");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Resolver-Objekt nehmen und Hash abtesten
		$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		}
	}
	# IF-Condition selbstgemacht - hoffentlich stimmts
	# The original procmail code says:
	# This checksum requires at least 3 of the following characters:
	# >* 1^1 ([<>()|@*'!?,]|:/)
	# (To match something like "Already seen?  http://host.domain.tld/")
	if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
		# Genearation of 2nd Digest
		$body_copy = $body;
		# keep the alarm as $oldalarm, so we dont loose the timeout-child alarm
        # see http://issues.apache.org/SpamAssassin/show_bug.cgi?id=3828#c123
        my $oldalarm = 0;
        my $timeout = $permsgstatus->{main}->{conf}->{'ixhash_timeout'} || 5;
        eval {
		Mail::SpamAssassin::Util::trap_sigalrm_fully(sub { die "ixhash timeout reached"; });
	$oldalarm = alarm($timeout);
	
        #-----------------------------------------------------------------------
        # Creation of hash # 1 if following conditions are met:
        # - mail contains at least 16 spaces or tabs 
	# - mail consists of at least 2 lines 
        if (($body =~ /([\s\t].+?){16,}/ ) && ($body =~ /.*$.*/)){
                # All space class chars just one time
		# Do this in two steps to avoid Perl segfaults
                # if there are more than 2.600 identical chars to be replaced
		# Step One
                $body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
		# Step Two
                $body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
                # remove graph class chars and some specials
                $body_copy =~ s/[[:graph:]]+//go;
                # Create actual digest
                $digest = md5_hex($body_copy);
                dbg ("IXHASH: Computed hash-value $digest via method 1");
                dbg ("IXHASH: Now checking $digest.$dnsserver");
                # Now check via DNS query
                $answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
                if ($answer) {
                        foreach $rr ($answer->answer) {
                                next unless $rr->type eq "A";
                                dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
                                $hits = 1 if $rr->address;
                        }
                }
        }
	# End of computing hash #1 
	#---------------------------------------------------------------------
        
	
	#---------------------------------------------------------------------
        # Creation of hash # 1 if mail contains at least 3 of the following characters:
        # '[<>()|@*'!?,]' or the combination of ':/'
        # (To match something like "Already seen?  http:/host.domain.tld/")
        if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
                $body_copy = $body;
		# remove redundant stuff
                $body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
		# replace '_' with '.'
                $body_copy =~ tr/_/./;
                # replace duplicate chars. This too suffers from a bug in perl
		# so we do it in two steps
		# Step One
                $body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
                # Step Two
                $body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
		# Computing hash...
                $digest = md5_hex($body_copy);
                dbg ("IXHASH: Computed hash-value $digest via method 2");
                dbg ("IXHASH: Now checking $digest.$dnsserver");
                # Now check via DNS query
                $answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
                if ($answer) {
                        foreach $rr ($answer->answer) {
                                next unless $rr->type eq "A";
                                dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
                                $hits = 1 if $rr->address;
                        }
                }
        }

	# End of computing hash #2 
	#-----------------------------------------------------------------------
        
	
	#-----------------------------------------------------------------------
	# Compute hash # 3 if 
        # - there are at least 8 non-empty characters in the body
	# - neither hash #1 nor hash #2 have been computed
        if (($body =~ /[\S]{8,}/) and (length($digest) < 32)) {
                $body_copy = $body;
                $body_copy =~ s/[[:cntrl:][:space:]=]+//g;
                # replace duplicate chars. This too suffers from a bug in perl
		# so we do it in two steps
		# Step One
                $body_copy =~ s/([[:cntrlprint:][:alnum:]%&#;=]+/]]{100})(?:\1+)/$1/g;
		$body_copy =~ tr/_/./;
		# Mod submitted by Martin Blapp (mb@imp.ch)
		# This bypasses a problem in Perl 5.8.7 where# PerlStep segfaults
		# if there are more than 2.600 identical chars to be replaced
		Two
                $body_copy =~ s/([[:printgraph:]]{100})(?:\1+)/$1/g;
		# This (original code) still applies
		$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		# Computing actual hash
                $digest = md5_hex($body_copy);
                dbg ("IXHASH: Computed hash-value $digest via method 3");
                dbg ("IXHASH: ComputedNow hash-valuechecking $digest.$dnsserver");
                # Check via method 2");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		 DNS
                $answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
		                if ($answer) {
			                        foreach $rr ($answer->answer) {
				                                next unless $rr->type eq "A";
				                                dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		}
	}
	# Requirement here in procmail:
	# >* [^ ][^ ][^ ][^ ]
	# (Min. 8 non-empty characters in the body/ Min. 8 nicht-leere Zeichen im Body)
	# sowie: Hash 1 und 2 trafen nicht!
	if (($body =~ /[\S]{8,}/) and (length($digest) < 32)) {
		$body_copy = $body;
		$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
		# Mod submitted by Martin Blapp (mb@imp.ch)
		# This bypasses a problem in Perl 5.8.7 where Perl segfaults
		# if there are more than 2.600 identical chars to be replaced
		$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
		# This (original code) still applies
		$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest via method 3");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		}
	}                        $hits = 1 if $rr->address;
                        }
                }
        }
        if (defined $oldalarm) {
		alarm $oldalarm;  $oldalarm = undef;
        }
}; # End of sub ixhashtest

# Error handling - parto of Dallas' code
if ($@ =~ m/timeout/) {
	dbg("IXHASH: $timeout second timeout exceeded while checking $digest.$dnsserver");
}

return $hits;

}
1;