=head1 NAME
Mail::SpamAssassin::Plugin::iXhash - compute hashesfuzzy checksums from mail bodies and compare to known spam ones via DNS
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
ixhash_timeout 10
body IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
describe IXHASH This mail has been classified as spam @ iX Magazine, Germany
tflags IXHASH net
score IXHASH 1.5
=head1 DESCRIPTION
iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, removesstrips parts from it and then computes a MD5 hash value from the rest.
These values will then be looked up via DNS. Call it a 'poor man's DCC', if you want.
This plugin is based on
This plugin is based on parts of the procmail-based project 'NiX Spam', developed by Bert Ungerer.(un@ix.de)
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums
Parts of the code were submitted via heise forum by 'kungfuhasi'
See http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292.
Martin Blapp (mb@imp.ch) found and solved a problem occuring on Perl 5.8.7. - and a way to bypass it. Thanks a lot!
TheFurther hashesimprovements from spam received(DNS timeouts) by Heise/iXDallas magazine are available at ix.dnsbl.manitu.net,
kindly provided by Manuel Schmitt.
The hashes from spam received by LogIn & Solutions AG and some of its customers are available at nospam.login-solutions.de.
A second list based on input from another source can be accessed at nospam.login-solutions.ag.
It's not too difficult to create your own blacklist provided you have enough input (read: spam). Well, even I managed to do
so. If you do likewise please drop that info somewhere so other people can use that one too
=cut
package iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English
our @ISA = qw(Mail::SpamAssassin::Plugin);
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub new {
my ($class, $mailsa, $serverEngelken (dallase@uribl.com) - see comments for details.
=cut
package Mail::SpamAssassin::Plugin::iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Util;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub new {
my ($class, $mailsa, $server) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless ($self, $class);
$self->set_config($mailsa->{conf});
$self->register_eval_rule ("ixhashtest");
return $self;
}
sub set_config {
my ($self, $conf) = @_;
$classmy @cmds = ref($class) || $class;
my# $self = $class->SUPER::new($mailsa);
bless ($self, $class);
$self->register_eval_rule ("ixhashtest");
return $self;
}
implements ixhash_timeout config option - by dallase@uribl.com
push(@cmds, {
setting => 'ixhash_timeout',
default => 5,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
});
$conf->{parser}->register_commands(\@cmds);
}
sub ixhashtest {
my ($self, $permsgstatus,$muell,$dnsserver) = @_;
dbg("IXHASH: IxHash querying Server $dnsserver");
my ( my ($digest,$answer,$ixdigest,$body) = "";
my @body = $permsgstatus->{msg}->get_body();
my $resolver = Net::DNS::Resolver->new;
# $resolver->debug(1);
my $body_copy = "";
foreach (@body) {
$body .= join "", @$_;
}
$body .= join "", @$_;
}
my $rr;
my $hits = 0;
# This is code contributed by KungfuHasi. Comments partly# byalarm methe (Dirk Bonengel)
# Siehe dazu: http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292
# Danke, KungfuHasi, wer oder was immer Du sein magst!
#dns query - dallase@uribl.com
# --------------------------------------------------------------------------
# Some spaces to work with
# The procmail# codehere sayswe atimplement leastproper 16alarms, spacesala orPyzor, tabsRazor2 requiredplugins...
if ($body =~ /([\s\t].*?){16,}/m ) {
# Generate first MD5 over Body
$body_copy = $body;
# All space class chars just one time
# This bypasses a problem in Perl 5.8.7 where Perl segfaults
# if there are more than 2.600 identical chars to be replaced
$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
# NOTE: This is the look-forward: (?:\1+)
$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
# remove graph class chars and some specials
$body_copy =~ s/[[:graph:]]+//go;
# First Digest
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 1");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Resolver-Objekt nehmen und Hash abtesten
$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
$hits = 1 if $rr->address;
return $hits;
}
}
}
# IF-Condition selbstgemacht - hoffentlich stimmts
# The original procmail code says:
# This checksum requires at least 3 of the following characters:
# >* 1^1 ([<>()|@*'!?,]|:/)
# (To match something like "Already seen? http://host.domain.tld/")
if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
# Genearation of 2nd Digest
$body_copy = $body;
# keep the alarm as $oldalarm, so we dont loose the timeout-child alarm
# see http://issues.apache.org/SpamAssassin/show_bug.cgi?id=3828#c123
my $oldalarm = 0;
my $timeout = $permsgstatus->{main}->{conf}->{'ixhash_timeout'} || 5;
eval {
Mail::SpamAssassin::Util::trap_sigalrm_fully(sub { die "ixhash timeout reached"; });
$oldalarm = alarm($timeout);
#-----------------------------------------------------------------------
# Creation of hash # 1 if following conditions are met:
# - mail contains at least 16 spaces or tabs
# - mail consists of at least 2 lines
if (($body =~ /([\s\t].+?){16,}/ ) && ($body =~ /.*$.*/)){
# All space class chars just one time
# Do this in two steps to avoid Perl segfaults
# if there are more than 2.600 identical chars to be replaced
# Step One
$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
# Step Two
$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
# remove graph class chars and some specials
$body_copy =~ s/[[:graph:]]+//go;
# Create actual digest
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 1");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Now check via DNS query
$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
$hits = 1 if $rr->address;
}
}
}
# End of computing hash #1
#---------------------------------------------------------------------
#---------------------------------------------------------------------
# Creation of hash # 1 if mail contains at least 3 of the following characters:
# '[<>()|@*'!?,]' or the combination of ':/'
# (To match something like "Already seen? http:/host.domain.tld/")
if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
$body_copy = $body;
# remove redundant stuff
$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
# replace '_' with '.'
$body_copy =~ tr/_/./;
# replace duplicate chars. This too suffers from a bug in perl
# so we do it in two steps
# Step One
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
# Step Two
$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
# Computing hash...
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 2");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Now check via DNS query
$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
$hits = 1 if $rr->address;
}
}
}
# End of computing hash #2
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# Compute hash # 3 if
# - there are at least 8 non-empty characters in the body
# - neither hash #1 nor hash #2 have been computed
if (($body =~ /[\S]{8,}/) and (length($digest) < 32)) {
$body_copy = $body;
$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
# replace duplicate chars. This too suffers from a bug in perl
# so we do it in two steps
# Step One
$body_copy =~ s/([[:cntrlprint:][:alnum:]%&#;=]+/]]{100})(?:\1+)/$1/g;
$body_copy =~ tr/_/./;
# Mod submitted by Martin Blapp (mb@imp.ch)
# This bypasses a problem in Perl 5.8.7 where# PerlStep segfaults
# if there are more than 2.600 identical chars to be replaced
Two
$body_copy =~ s/([[:printgraph:]]{100})(?:\1+)/$1/g;
# This (original code) still applies
$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
$digest = md5_hex($body_copy);
# Computing actual hash
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 3");
dbg ("IXHASH: ComputedNow hash-valuechecking $digest.$dnsserver");
# Check via method 2");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Hash abtesten
DNS
$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
$hits = 1 if $rr->address;
return $hits;
}
}
}
# Requirement here in procmail:
# >* [^ ][^ ][^ ][^ ]
# (Min. 8 non-empty characters in the body/ Min. 8 nicht-leere Zeichen im Body)
# sowie: Hash 1 und 2 trafen nicht!
if (($body =~ /[\S]{8,}/) and (length($digest) < 32)) {
$body_copy = $body;
$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
# Mod submitted by Martin Blapp (mb@imp.ch)
# This bypasses a problem in Perl 5.8.7 where Perl segfaults
# if there are more than 2.600 identical chars to be replaced
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
# This (original code) still applies
$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 3");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Hash abtesten
$answer = $resolver->search($digest.'.'.$dnsserver, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address);
$hits = 1 if $rr->address;
return $hits;
}
}
} $hits = 1 if $rr->address;
}
}
}
if (defined $oldalarm) {
alarm $oldalarm; $oldalarm = undef;
}
}; # End of sub ixhashtest
# Error handling - parto of Dallas' code
if ($@ =~ m/timeout/) {
dbg("IXHASH: $timeout second timeout exceeded while checking $digest.$dnsserver");
}
return $hits;
}
1;
|