=head1 NAME
Mail::SpamAssassin::Plugin::iXhash - compute hashes from mail bodies and compare to known spam ones via DNS
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
body IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
describe IXHASH This mail has been classified as spam @ iX Magazine, Germany
tflags IXHASH net
score IXHASH 1.5
=head1 DESCRIPTION
iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, removes
- if certain requirements are met - irrelevant parts from it and then computes a MD5 hash value from the rest.
These values will then be looked up via DNS. Call it a 'poor man's DCC', if you want.
This plugin is based on the procmail-based project 'NiXSpam', developed by Bert Ungerer (un-at-ix-dot-de), editor with the German IT magazine iX.
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums
Parts of the code were submitted via heise forum by 'kungfuhasi'
See http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292.
The hashes from spam received by Heise/iX magazine are available at ix.dnsbl.manitu.net,
kindly provided by Manuel Schmitt.
It's not too difficult to create your own blacklist provided you have enough input (read: spam). If you do so please drop that info somewhere so other people can use that one too.
I know I'm a lousy coder but at least I'm willing to learn: If you have improvements to make let me know: dirk-dot-bonengel-at-login-solutions.de
=cut
package iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English
our @ISA = qw(Mail::SpamAssassin::Plugin);
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub new {
my ($class, $mailsa, $server) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless ($self, $class);
$self->register_eval_rule ("ixhashtest");
return $self;
}
sub ixhashtest {
my ($self, $permsgstatus,$muell,$dnsserver) = @_;
dbg("IXHASH: IxHash querying Server $dnsserver");
my ($digest, $digest2) = "";
my ($answer,$ixdigest,$body) = "";
my @body = $permsgstatus->{msg}->get_body();
my $resolver = Net::DNS::Resolver->new;
my $body_copy = "";
foreach (@body) {
$body .= join "", @$_;
}
my $rr;
my $hits = 0;
# This is code contributed by KungfuHasi. Comments party by me (Dirk Bonengel)
# Siehe dazu: http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292
# Danke, KungfuHasi, wer oder was immer Du sein magst!
#-------------------------------------------------------------------------
# Some spaces to work with
# The procmail code says at least 16 spaces or tabs required...
if ($body =~ /[\s\t].+[\s\t].*$?.*$?.*[\s\t]/ ) {
# Generate first MD5 over Body
$body_copy = $body;
# All space class chars just one time
# NOTE: This is the look-forward: (?:\1+)
$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
# remove graph class chars and some specials
$body_copy =~ s/[[:graph:]]+//go;
# First Digest
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Resolver-Objekt nehmen und Hash abtesten
$answer = $resolver->send($digest.'.'.$dnsserver);
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
$hits = 1 if $rr->address;
return $hits;
}
}
}
# IF-Condition selbstgemacht - hoffentlich stimmts
# The original procmail code says:
# This checksum requires at least 2 of the following characters:
# >* 1^1 ([<>()|@*'!?,]|:/)
# (To match something like "Already seen? http://host.domain.tld/")
if ($body =~ /(([<>\(\)\|@\*'!?,]|:\/).*?([<>\(\)\|@\*'!?,]|:\/))/ms ) {
# Genearation of 2nd Digest
$body_copy = $body;
$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
$body_copy =~ tr/_/./;
$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest\n");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Hash abtesten
$answer = $resolver->send($digest.'.'.$dnsserver);
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
$hits = 1 if $rr->address;
return $hits;
}
}
}
# Requirement here in procmail:
# >* [^ ][^ ][^ ][^ ]
# (some non-empty characters in the body/ ein paar nicht-leere Zeichen im Body)
if ($body =~ /[^\s\t][^\s\t][^\s\t][^\s\t]/) {
$body_copy = $body;
$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest\n");
dbg ("IXHASH: Now checking $digest.$dnsserver");
# Hash abtesten
$answer = $resolver->send($digest.'.'.$dnsserver);
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
$hits = 1 if $rr->address;
return $hits;
}
}
}
}
1;
|