Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: A few Changes to keep in line with the original NiXSpam-Code.

...

No Format
=head1 NAME

Mail::SpamAssassin::Plugin::iXhash - compute hashes from mail bodies and compare to known spam ones via DNS

=head1 SYNOPSIS
  loadplugin    Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
  
  body 		IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
  describe 	IXHASH This mail has been classified as spam @ iX Magazine, Germany
  tflags        IXHASH net
  score         IXHASH 1.5

=head1 DESCRIPTION

iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, removes parts from it and then computes a MD5 hash value from the rest.
These values will then be looked up via DNS. Call it a 'poor man's DCC', if you want.

This plugin is based on the procmail-based project 'NiXSpamNiX Spam', developed by Bert UngerUngerer.
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums

Parts of the code were submitted via heise forum by 'kungfuhasi'  
See http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292.

Martin Blapp (mb@imp.ch) found a problem occuring on Perl 5.8.7. - and a way to bypass it. Thanks a lot!

The hashes from spam received by Heise/iX magazine are available at ix.dnsbl.manitu.net, 
kindly provided by Manuel Schmitt. 

The hashes from spam received by LogIn & Solutions AG and some of its customers are availabeavailable at nospam.login-solutions.de.
A second list based on input from another source can be accessed at nospam.login-solutions.ag.

It's not too difficult to create your own blacklist provided you have enough input (read: spam). Well, even I managed to do 
so. If you do likewise please drop that info somewhere so other people can use that one too.


=cut


package iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English

our @ISA = qw(Mail::SpamAssassin::Plugin);

sub dbg { Mail::SpamAssassin::dbg (@_); }


sub new {
	my ($class, $mailsa, $server) = @_;
	$class = ref($class) || $class;
	my $self = $class->SUPER::new($mailsa);
	bless ($self, $class);
	$self->register_eval_rule ("ixhashtest");
	return $self;
}

sub ixhashtest {
	my ($self, $permsgstatus,$muell,$dnsserver) = @_;
	dbg("IXHASH: IxHash querying Server $dnsserver"); 
	my ($digest,$answer,$ixdigest,$body) = "";
	my @body = $permsgstatus->{msg}->get_body();
	my $resolver = Net::DNS::Resolver->new;
	# $resolver->debug(1);
	my $body_copy = "";
	foreach (@body) {
		$body .= join "", @$_;
	}
	my $rr;
	my $hits = 0;
	# This is code contributed by KungfuHasi. Comments partly by me (Dirk Bonengel)
	# Siehe dazu: http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292
	# Danke, KungfuHasi, wer oder was immer Du sein magst!
	#-------------------------------------------------------------------------
	# Some spaces to work with
	# The procmail code says at least 16 spaces or tabs required...
  	if ($body =~ /([\s\t].*?){16,}/m ) {
		# Generate first MD5 over Body
		$body_copy = $body;
		# All space class chars just one time time
		# This bypasses a problem in Perl 5.8.7 where Perl segfaults
		# if there are more than 2.600 identical chars to be replaced
		$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
		# NOTE: This is the look-forward: (?:\1+)
		$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
		# remove graph class chars and some specials
		$body_copy =~ s/[[:graph:]]+//go;
		# First Digest
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest via method 1");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Resolver-Objekt nehmen und Hash abtesten
		$answer = $resolver->send>search($digest.'.'.$dnsserver, "A", "IN");
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		}
	}
	# IF-Condition selbstgemacht - hoffentlich stimmts
	# The original procmail code says:
	# This checksum requires at least 23 of the following characters:
	# >* 1^1 ([<>()|@*'!?,]|:/)
	# (To match something like "Already seen?  http://host.domain.tld/")
	if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){23,}/m ) {
		# Genearation of 2nd Digest
		$body_copy = $body;
		$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
		$body_copy =~ tr/_/./;
		# Mod submitted by Martin Blapp (mb@imp.ch)
		# This bypasses a problem in Perl 5.8.7 where Perl segfaults
		# if there are more than 2.600 identical chars to be replaced
		$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
		# This (original code) still applies
		$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest via method 2");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		$answer = $resolver->send>search($digest.'.'.$dnsserver, "A", "IN");
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		} 
	}
	# Requirement here in procmail:
	# >* [^ ][^ ][^ ][^ ]
	# (someMin. 8 non-empty characters in the body/ einMin. paar8 nicht-leere Zeichen im Body)
	# sowie: Hash 1 und 2 trafen nicht!
	if (($body =~ /[^\s\t][^\s\t][^\s\t][^\s\t]S]{8,}/) and (length($digest) < 32)) {
		$body_copy = $body;
		$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
		# Mod submitted by Martin Blapp (mb@imp.ch)
		# This bypasses a problem in Perl 5.8.7 where Perl segfaults
		# if there are more than 2.600 identical chars to be replaced
		$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
		# This (original code) still applies
		$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest via method 3");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		$answer = $resolver->send>search($digest.'.'.$dnsserver, "A", "IN");
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
				$hits = 1 if $rr->address;
				return $hits;
			}
		} 
	}
}
1;