Changeset 13 for trunk


Ignore:
Timestamp:
01/13/10 17:40:03 (14 years ago)
Author:
Kris Deugau
Message:

/trunk/dnsbl

Update extract-data from live copy again

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/dnsbl/extract-data

    r12 r13  
    8787my %urilist;
    8888
    89 for (my $i=0; $i<$msgcount; $i++) {
     89# put together an array of netblocks we won't/can't list for various reasons
     90my @dontlistme = (
     91    # Hotmail/Windows Live Mail
     92        NetAddr::IP->new("65.52.0.0/14"),
     93
     94    # AOL - note only some IPs show mail-ish rDNS
     95    #IP-Network                    205.188.0.0/16
     96    #IP-Network                    64.12.0.0/16
     97        NetAddr::IP->new("205.188.105.140/29"),
     98        NetAddr::IP->new("205.188.169.196/29"),
     99        NetAddr::IP->new("205.188.249.128/29"),
     100        NetAddr::IP->new("205.188.249.64/29"),
     101
     102    # Google/GMail
     103        NetAddr::IP->new("209.85.128.0/17"),
     104        NetAddr::IP->new("72.14.192.0/18"),
     105
     106    # Yahoo!/Inktomi
     107        NetAddr::IP->new("98.136.0.0/14"),
     108        NetAddr::IP->new("66.196.64.0/18"),
     109        NetAddr::IP->new("67.195.0.0/16"),
     110        NetAddr::IP->new("69.147.64.0/18"),
     111        NetAddr::IP->new("206.190.32.0/18"),
     112        NetAddr::IP->new("68.142.192.0/18"),
     113        NetAddr::IP->new("216.252.96.0/19"),
     114        NetAddr::IP->new("124.83.128.0/17"),
     115        NetAddr::IP->new("217.146.184.0/21"),
     116        NetAddr::IP->new("124.108.96.0/20"),
     117        NetAddr::IP->new("76.13.0.0/16"),
     118        NetAddr::IP->new("68.180.128.0/17"),
     119        NetAddr::IP->new("209.191.64.0/18"),
     120        NetAddr::IP->new("212.82.104.0/21"),
     121        NetAddr::IP->new("66.163.160.0/19"),
     122
     123    # Bell Canada - note only some IPs show mail-ish rDNS
     124    #IP-Network                    209.226.0.0/16
     125    #IP-Network                    207.236.0.0/16
     126        NetAddr::IP->new("209.226.175.0/24"),
     127        NetAddr::IP->new("207.236.237.0/26"),
     128
     129    # Craigslist
     130    #IP-Network                    208.82.236.0/22
     131        NetAddr::IP->new("208.82.236.0/22"),
     132
     133    # Apple.com/mac.com - note only some IPs show mail-ish rDNS
     134    #IP-Network                    17.0.0.0/8
     135    # asmtpout0(11-30).mac.com
     136    # 17.148.16.        011 -> 86       030 -> 105
     137        NetAddr::IP->new("17.148.16.64/26"),
     138
     139    # Vodafone - note only some IPs show mail-ish rDNS
     140    #route:        212.183.128.0/19
     141        NetAddr::IP->new("212.183.156.224/29"),
     142
     143    # Eastlink (formerly Persona [Sudbury etc]) - only one IP observed with mail-ish rDNS
     144    #IP-Network                    24.222.0.0/16
     145        NetAddr::IP->new("24.222.0.30"),
     146
     147    # Cogeco - only a few IPs observed with mail-ish rDNS
     148    #IP-Network                    216.221.64.0/19
     149        NetAddr::IP->new("216.221.81.192"),
     150        NetAddr::IP->new("216.221.81.96/30"),
     151
     152    # UAlberta - only one IP observed with mail-ish rDNS
     153    #IP-Network                    129.128.0.0/16
     154        NetAddr::IP->new("129.128.5.19"),
     155    ); # done def for @dontlistme
     156
     157MSG: for (my $i=0; $i<$msgcount; $i++) {
    90158  my $msg = $imap->message_string($msgs[$i]);
    91159
     
    135203  my @untrusted = @{$stmsg->{metadata}->{relays_untrusted}};
    136204
    137   my $sa_intip = new NetAddr::IP $untrusted[0]->{ip};
    138 
    139 
    140   my %headerlist = %{$imap->parse_headers($msgs[$i], "Received")};
    141   my $recvnum = 0;
    142   my $recv = $headerlist{'Received'}[$recvnum];
    143   next if !$recv;
    144 
    145   my $relayip;
    146 
    147   #Received: from mail.company.com [ip.add.re.ss]
    148   #        by localhost with POP3 (fetchmail-6.2.5)
    149   #        for kdeugau@localhost (single-drop); Fri, 15 May 2009 11:45:10 -0400 (EDT)
    150   if ($recv =~ /from mail\.company\.com \[ip\.add\.re\.ss\]\s*by localhost with POP3 \(fetchmail/) {
    151     $recvnum += 1;
    152     $recv = $headerlist{'Received'}[$recvnum];
    153   }
    154 
    155   if ($recv =~ /^by mx\d\.company\.com \(Postfix, from userid \d+\)/) {
    156     $recvnum++;
    157     $recv = $headerlist{'Received'}[$recvnum];
    158   }
    159 
    160 # le sigh.  gotta bypass a message if we can't parse the headers.  Outlook
    161 # does an admirable job of mangling things for us.  >:(
    162   if ($recv !~ /by mx\d\.company\.com \(Postfix\)/) {
    163     print "phtui:  $recv\n";
    164     next;
    165   }
    166 
    167 ##fixme
    168 # le sigh.  skip IP extraction on tagged spam reported as nonspam, since the real spam is a layer deeper.
    169 next if $recv =~ /from localhost by mfs\d with SpamAssassin/;
    170 
    171 # Postini puts the "real" received: header one layer further out - SA is configured to compensate for this so we do too
    172   #IP-Network                    64.18.0.0/20
    173   #IP-Network-Block              064.018.000.000 - 064.018.015.255
    174   #Org-Name                      Postini, Inc.
    175   if ($recv =~ /\[64\.18\.(?:[0-9]|1[0-5])\.\d+]\) by mx\d\.company\.com/) {
    176     $recv = $recv = $headerlist{'Received'}[++$recvnum];
    177 #Received: from source ([208.95.48.65]) (using TLSv1) by
    178 # exprod5mx230.postini.com ([64.18.4.10]) with SMTP; Fri, 10 Jul
    179     my ($tmprelayip) = ($recv =~ /from source \(\[([\d.]+)\]\) (?:\(using TLSv1\) )?by (?:exprod\dm[xo]b?|chipmx)\d+\.postini\.com/);
    180     $relayip = new NetAddr::IP $tmprelayip;
    181 
    182   } elsif ($recv =~ /\[137\.82\.45\.(?:[0-9]|1[0-5])\]\) by mx\d\.company\.com/) {
    183 # Customer with (spam)forwarding from UBC - enough to justify this code
    184   #IP-Network                    137.82.0.0/16
    185   #IP-Network-Block              137.082.000.000 - 137.082.255.255
    186   #Org-Name                      University of British Columbia
    187   # only 137.82.45.0/28 or so seem to be outbound relays (duh)
    188     $recv = $recv = $headerlist{'Received'}[++$recvnum];
    189 #Received: from bcnbib.gov.ar (200-42-22-14.dup.prima.net.ar [200.42.22.14])
    190 #  by mr4.mail-relay.ubc.ca (Postfix)
    191     my ($tmprelayip) = ($recv =~ /from \[?[a-zA-Z0-9._-]+\]? \([a-zA-Z0-9._-]+ \[([\d+.]+)\]\) by mr\d\.mail-relay\.ubc\.ca \(Postfix\)/);
    192     $relayip = new NetAddr::IP $tmprelayip;
    193 
    194   } else {
    195     my ($tmprelayip) = ($recv =~ /\[([\d+.]+)\]\) by mx\d\.company\.com/);
    196     $relayip = new NetAddr::IP $tmprelayip;
    197   }
    198 
    199 print "eep, no ip from manual extraction\n$recv\n" if !$relayip;
    200 print "SA vs manual extraction, relay IP mismatch: $sa_intip vs $relayip on\n\t$recv\n" if $sa_intip != $relayip;
    201 
    202 # Hotmail/Windows Live Mail may originate or relay spam, but we can't blacklist them
    203   #Received: from blu0-omc4-s23.blu0.hotmail.com (blu0-omc4-s23.blu0.hotmail.com
    204   # [65.55.111.162]) by mx2.company.com (Postfix)
    205 #  next if $recv =~ /from (?:bay|blu|col|snt)0-omc\d+-s\d+\.(?:bay|blu|col|snt)0\.hotmail\.com
    206 \((?:bay|blu|col|snt)0-omc\d+-s\d+\.(?:bay|blu|col|snt)0\.hotmail\.com \[65.5[2345].\d+\.\d+\]\) by mx\d\.company\.com/;
    207   #IP-Network                    65.52.0.0/14
    208   #IP-Network-Block              065.052.000.000 - 065.055.255.255
    209   #Org-Name                      Microsoft Corp
    210   my $hotmail1 = new NetAddr::IP "65.52.0.0/14";
    211 print "$.: $recv\n" if !defined ($relayip);
    212   next if $relayip->within($hotmail1);
    213 
    214 # AOL may originate or relay spam, but we can't blacklist them
    215   #Received: from omr-m33.mx.aol.com (omr-m33.mx.aol.com [64.12.143.145]) by
    216   # mx1.company.com (Postfix) with ESMTP id 7B9431C3255 for <webmaster@tyenet.com>;
    217   next if $recv =~ /from (?:omr|im[or])-[dm][ab]?\d+\.mx\.aol\.com \((?:omr|im[or])-[dm][ba]?\d+\.mx\.aol\.com \[[\d.]+\]\) by mx\d\.company\.com/;
    218 
    219 # Google may relay spam, GMail may originate it, but we can't blacklist them.
    220   #IP-Network                    209.85.128.0/17
    221   #IP-Network-Block              209.085.128.000 - 209.085.255.255
    222   #Org-Name                      Google Inc.
    223   next if $recv =~ /\[209\.85\.(?:1(?:2[89]|[3-9]\d)|2(?:[0-4]\d|5[0-5]))\.\d+\]\) by mx\d\.company\.com/;
    224   #OrgName:    Google Inc.
    225   #NetRange:   72.14.192.0 - 72.14.255.255
    226   #CIDR:       72.14.192.0/18
    227   next if $recv =~ /\[72\.14\.(?:19[2-9]|2(?:[0-4]\d|5[0-5]))\.\d+\]\) by mx\d\.company\.com/;
    228 
    229 # Yahoo! may ... yadda yadda yadda  (geeze they've got a whack of netblocks for mail...)
    230  #IP-Network                    98.136.0.0/14
    231   my $yahoo1 = new NetAddr::IP "98.136.0.0/14";
    232   next if $relayip->within($yahoo1);
    233  #IP-Network                    66.196.64.0/18
    234  #Org-Name                      Inktomi Corporation
    235  # Inktomi ~~ Yahoo!
    236   my $yahoo2 = new NetAddr::IP "66.196.64.0/18";
    237   next if $relayip->within($yahoo2);
    238  #IP-Network                    67.195.0.0/16
    239   my $yahoo3 = new NetAddr::IP "67.195.0.0/16";
    240   next if $relayip->within($yahoo3);
    241  #IP-Network                    69.147.64.0/18
    242   my $yahoo4 = new NetAddr::IP "69.147.64.0/18";
    243   next if $relayip->within($yahoo4);
    244  #IP-Network                    206.190.32.0/19
    245  #Org-Name                      Yahoo! Broadcast Services, Inc.
    246   my $yahoo5 = new NetAddr::IP "206.190.32.0/18";
    247   next if $relayip->within($yahoo5);
    248  #IP-Network                    68.142.192.0/18
    249  #Org-Name                      Inktomi Corporation
    250   my $yahoo6 = new NetAddr::IP "68.142.192.0/18";
    251   next if $relayip->within($yahoo6);
    252  #IP-Network                    216.252.96.0/19
    253   my $yahoo7 = new NetAddr::IP "216.252.96.0/19";
    254   next if $relayip->within($yahoo7);
    255  #inetnum:      124.83.128.0 - 124.83.255.255
    256   my $yahoo8 = new NetAddr::IP "124.83.128.0/17";
    257   next if $relayip->within($yahoo8);
    258  #inetnum:        217.146.184.0 - 217.146.191.47
    259   my $yahoo9 = new NetAddr::IP "217.146.184.0/21";
    260   next if $relayip->within($yahoo9);
    261  #inetnum:      124.108.96.0 - 124.108.111.255
    262   my $yahoo10 = new NetAddr::IP "124.108.96.0/20";
    263   next if $relayip->within($yahoo10);
    264  #IP-Network                    76.13.0.0/16
    265   my $yahoo11 = new NetAddr::IP "76.13.0.0/16";
    266   next if $relayip->within($yahoo11);
    267  #IP-Network                    68.180.128.0/17
    268   my $yahoo12 = new NetAddr::IP "68.180.128.0/17";
    269   next if $relayip->within($yahoo12);
    270  #IP-Network                    209.191.64.0/18
    271   my $yahoo13 = new NetAddr::IP "209.191.64.0/18";
    272   next if $relayip->within($yahoo13);
    273  #route:          212.82.104.0/21
    274   my $yahoo14 = new NetAddr::IP "212.82.104.0/21";
    275   next if $relayip->within($yahoo14);
    276  #IP-Network                    66.163.160.0/19
    277   my $yahoo15 = new NetAddr::IP "66.163.160.0/19";
    278   next if $relayip->within($yahoo15);
    279 
    280 # and the same goes for Bell Canada.  *le sigh*
    281 #IP-Network                    209.226.0.0/16
    282 #IP-Network-Block              209.226.000.000 - 209.226.255.255
    283 #Org-Name                      Bell Canada
    284 #Received: from tomts35-srv.bellnexxia.net (tomts35.bellnexxia.net
    285 # [209.226.175.109]) by mx2.company.com (Postfix) with ESMTP id B415C16752D for
    286 # <user@compnay.com>; Sat,  4 Jul 2009 10:48:24 -0400 (EDT)
    287 # hmm.  tomts\d(-srv)?.bellnexxia.net only seem to be in .175/24.  we'll just drop those ones for now...
    288 # especially since there appear to be hosted customers etc in the same ARIN allocation above.
    289   my $bell1 = new NetAddr::IP "209.226.175.0/24";
    290   next if $relayip->within($bell1);
    291   #IP-Network                    207.236.0.0/16
    292   # only listing a subsection - rDNS hosts look like Bell SMTP hardware
    293   my $bell2 = new NetAddr::IP "207.236.237.0/26";
    294   next if $relayip->within($bell2);
    295 
    296 # ... and your little dog too!
    297 #IP-Network                    208.82.236.0/22
    298 #IP-Network-Block              208.082.236.000 - 208.082.239.255
    299 #Org-Name                      Craigslist, Inc.
    300   my $craigslist1 = new NetAddr::IP "208.82.236.0/22";
    301   next if $relayip->within($craigslist1);
    302 
    303 # not gonna whitelist the whole enchilada... just the asmtpout0(11-30).mac.com
    304 # 17.148.16     011 -> 86       030 -> 105
    305 #IP-Network                    17.0.0.0/8
    306 #IP-Network-Block              017.000.000.000 - 017.255.255.255
    307 #Org-Name                      Apple Computer, Inc.
    308   my $apple1 = new NetAddr::IP "17.148.16.64/26";
    309   next if $relayip->within($apple1);
    310 
    311 # and Vodafone...
    312 # 212.183.156.227  (.227 through .230 have server rdns)
    313 #route:        212.183.128.0/19
    314 #descr:        Vodafone UK
    315 #inetnum:        212.183.156.0 - 212.183.156.255
    316 #descr:          Vodafone Limited
    317   my $voda1 = new NetAddr::IP "212.183.156.224/29";
    318   next if $relayip->within($voda1);
    319 
    320 # ooohhh, Eastlink wants to join the party
    321 #24.222.0.30
    322 #IP-Network                    24.222.0.0/16
    323 #IP-Network-Block              024.222.000.000 - 024.222.255.255
    324 #Org-Name                      Bragg Communications Incorporated
    325   my $eastlink1 = new NetAddr::IP "24.222.0.30";
    326   next if $relayip->within($eastlink1);
    327 
    328 # and now Cogeco
    329 #216.221.81.192
    330 #IP-Network                    216.221.64.0/19
    331 #IP-Network-Block              216.221.064.000 - 216.221.095.255
    332 #Org-Name                      Cogeco Telecom
    333 # only ignoring systems-looking IPs or blocks with mostly systems-looking IPs
    334   my $cogeco1 = new NetAddr::IP "216.221.81.192";
    335   next if $relayip->within($cogeco1);
    336   my $cogeco2 = new NetAddr::IP "216.221.81.96/30";
    337   next if $relayip->within($cogeco2);
    338 
    339 # and UAlberta
    340 #129.128.5.19
    341 #IP-Network                    129.128.0.0/16
    342 #IP-Network-Block              129.128.000.000 - 129.128.255.255
    343 #Org-Name                      University of Alberta
    344   my $ualberta1 = new NetAddr::IP "129.128.5.19";
    345   next if $relayip->within($ualberta1);
     205  my $relayip = new NetAddr::IP $untrusted[0]->{ip};
     206
     207  foreach my $block (@dontlistme) {
     208    next MSG if $relayip->within($block);
     209  }
    346210
    347211  $iplist{$relayip->addr}++ if $relayip;
    348 #  print "$recv\n";
    349 #  print "$relayip\n\n";
    350 #  print $imap->get_header($msgs[$i], "From"); print "\n";
    351 
    352 
    353 #  last if $i > 15;
     212
     213#  last if $i > 2;
    354214  sleep 1;
    355215} # IMAP message iteration
Note: See TracChangeset for help on using the changeset viewer.