Changeset 23 for trunk


Ignore:
Timestamp:
08/31/10 15:35:18 (14 years ago)
Author:
Kris Deugau
Message:

/trunk/dnsbl

Shuffle processing in DNSBL::export() for 1.5-10x speedup (depending

on DB version, OS, Perl, phase of moon):

  • prepare some statement handles more globally, so we're not recreating them for each netblock (moved to new sub initexport())
  • pass the bitmask down the recursion chain instead of retrieving parent "islisted?" info for each netblock
  • remove complex JOIN on recursion loop so we don't wait so long for the DB to finish

Trim commented stale code.

Location:
trunk/dnsbl
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/dnsbl/DNSBL.pm

    r15 r23  
    125125
    126126
     127## DNSBLDB::initexport()
     128# Prepare a couple of statement handles for later processing in export().  Assists in ~3x speed increase.
     129my $parsth;
     130my $sthmoron;
     131sub initexport {
     132  $parsth = $dbh->prepare("SELECT count(i.ip),b.block,b.level,b.listme AS oobblock,o.listme AS ooborg ".
     133        "FROM iplist i INNER JOIN blocks b ON i.ip << b.block INNER JOIN orgs o ON b.orgid = o.orgid ".
     134        "WHERE b.block >>= ? ".
     135        "GROUP BY b.block,b.level,b.listme,o.listme ORDER BY b.block");
     136  $sthmoron = $dbh->prepare("SELECT ip,s4list FROM iplist WHERE ip << ? ORDER BY ip");
     137}
     138
     139
    127140## DNSBL::ipexists()
    128141# return report count if the IP has been reported, otherwise return undef
     
    259272  my $level = shift || 0;
    260273  my $container = shift || '0.0.0.0/0';
    261   my $oobblock = shift || 0;
    262   my $ooborg = shift || 0;
     274  my $bitmask = shift || 0;
    263275
    264276  if ($level > 3) {
     
    280292  my ($nblocks) = $sth->fetchrow_array();
    281293
     294  # need this for a bunch of things, may as well do it here
     295  my ($masklen) = ($container =~ m|/(\d+)$|);
     296
     297# Update the bitmask variable with the current block info as needed.
     298# Much faster than retrieving this data later (~3x faster!).
     299  my $listme;
     300  my $listorg;
     301  my $bcount;
     302  if ($container ne '0.0.0.0/0') {
     303    $sth = $dbh->prepare("SELECT count(*) FROM iplist WHERE ip << ?");
     304    $sth->execute($container);
     305    ($bcount) = $sth->fetchrow_array();
     306
     307    $sth = $dbh->prepare("SELECT b.listme,o.listme ".
     308        "FROM blocks b INNER JOIN orgs o ON b.orgid=o.orgid ".
     309        "WHERE b.block = ?");
     310    $sth->execute($container);
     311    ($listme,$listorg) = $sth->fetchrow_array();
     312
     313    $bitmask |= $bitfields{$level-1} if $bcount >= $autolist{$masklen};
     314    $bitmask |= $bitfields{block} if $listme;
     315    $bitmask |= $bitfields{org} if $listorg;
     316  }
     317
     318# hm.  can't seem to move this prepare elsewhere.  :(
    282319  if ($nblocks > 0) {
    283     my $sql = "SELECT b.block,b.listme,o.orgname,o.listme ".
    284         "FROM blocks b INNER JOIN orgs o ON b.orgid=o.orgid ".
    285         "WHERE b.level=$level and b.block << '$container' ORDER BY b.block, masklen(b.block) DESC";
    286     $sth = $dbh->prepare($sql);
    287     $sth->execute();
    288     while (my ($cidr,$listblock,$org,$listorg) = $sth->fetchrow_array()) {
    289       $self->export($listhosts,$mode,$level+1,$cidr,$listblock,$listorg);
     320    my $sthsubblocks = $dbh->prepare("SELECT block FROM blocks ".
     321        "WHERE level=? and block << ? ORDER BY block, masklen(block) DESC");
     322    $sthsubblocks->execute($level, $container);
     323    while (my ($cidr) = $sthsubblocks->fetchrow_array()) {
     324      $self->export($listhosts,$mode,$level+1,$cidr,$bitmask);
    290325    }
    291326  } # avoid checking content of subs if we don't have any
     
    303338  $level--;
    304339
    305   # need this for a bunch of things, may as well do it here
    306   my ($masklen) = ($container =~ m|/(\d+)$|);
    307 
    308 # Snag all parent block "is-it-listed?" data, and stuff it into a single
    309 # variable we can use later.  Much faster than retrieving this data
    310 # individually, for each octet iteration.
    311 
    312   my $mycount = 0;
    313   my $sql = "SELECT count(i.ip),b.block,b.level,b.listme AS oobblock,o.listme AS ooborg ".
    314         "FROM iplist i INNER JOIN blocks b ON i.ip << b.block INNER JOIN orgs o ON b.orgid = o.orgid ".
    315         "WHERE b.block >>= ? ".
    316         "GROUP BY b.block,b.level,b.listme,o.listme ORDER BY b.block";
    317   my $parsth = $dbh->prepare($sql);
    318   $parsth->execute($container);
    319   my $pdata = 0;
    320   while (my ($pcount,$p,$plev,$pblock,$porg) = $parsth->fetchrow_array) {
    321     my ($pmasklen) = ($p =~ m|\d+/(\d+)$|);
    322     $pdata |= $bitfields{$plev} if $pcount >= $autolist{$pmasklen};
    323     $pdata |= $bitfields{block} if $pblock;
    324     $pdata |= $bitfields{org} if $porg;
    325     $mycount = $pcount if $p eq $container;
    326   }
    327 
    328340  if ($mode eq 'cidr') {
    329     $listhosts->{$container} |= $pdata if $pdata && ($ooborg || $oobblock || ($mycount >= $autolist{$masklen}));
     341    $listhosts->{$container} |= $bitmask if $bitmask && ($listme || $listorg || ($bcount >= $autolist{$masklen}));
    330342  } else {
    331343  # if $cidr->masklen is <= 24, iterate on /24 boundaries for bulk sublisting
     
    333345  # if $cidr->masklen is <= 8, iterate on /8 boundaries for bulk sublisting
    334346
    335     if ($pdata) {
     347    if ($bitmask) {
    336348      my @blocksubs;
    337349      if ($masklen <= 30 && $masklen > 24) {
     
    340352          my $host = "$net$entry";
    341353          $listhosts->{$host} = 0 if !defined($listhosts->{$host});
    342           $listhosts->{$host} |= $pdata;
     354          $listhosts->{$host} |= $bitmask;
    343355        }
    344356      } elsif ($masklen <= 24 && $masklen > 16) {
     
    346358        for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
    347359          my $twofour = "$net$entry.*";
    348           $listhosts->{$twofour} |= $pdata;
     360          $listhosts->{$twofour} |= $bitmask;
    349361        }
    350362      } elsif ($masklen <= 16 && $masklen > 8) {
     
    352364        for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
    353365          my $sixteen = "$net$entry.*";
    354           $listhosts->{$sixteen} |= $pdata;
     366          $listhosts->{$sixteen} |= $bitmask;
    355367        }
    356368      } elsif ($masklen <= 8) {
     
    358370        for (my $entry = $octet; $entry < ($octet + $howmany[$masklen]); $entry++) {
    359371          my $eight = "$entry.*";
    360           $listhosts->{$eight} |= $pdata;
     372          $listhosts->{$eight} |= $bitmask;
    361373        }
    362374      }
    363375
    364 #print "DEBUG1: $container, ".(@blocksubs + 0)."\n";
    365 # this seems to be a BIG timesink...  execution time ~1:30 without, ~4:30 with
    366 #if (0){
    367 #    $sth = $dbh->prepare("select block,level,listme from blocks where block >> ?");
    368 #    my $sth2 = $dbh->prepare("select count(*) from iplist where ip << ?");
    369 #    foreach (@blocksubs) {
    370 #print "  DEBUG: $_ container-is-listed check\n";
    371 # collect info on container block(s)
    372 #      $sth->execute($container);
    373 #      while (my ($parent, $plev, $listme) = $sth->fetchrow_array()) {
    374 #       $sth2->execute($parent);
    375 #       my ($parlen) = ($parent =~ m|/(\d+)|);
    376 #       my ($parcount) = $sth2->fetchrow_array();
    377 #print "  DEBUG: $parent: $parlen, $parcount, $plev\n";
    378 #       $listhosts->{$_} |= $bitfields{$plev} if $parcount >= $autolist{$parlen};  #hmm.
    379 #       $listhosts->{$_} |= $bitfields{block} if $listme;
    380 #      }
    381 #    }
    382 #}
    383 
    384376    } # generate autolist entries for ips/octets not (yet) seen in reports
    385377
    386378  } # cidr vs classful mode
    387379
    388   $sth = $dbh->prepare("SELECT ip,s4list FROM iplist WHERE ip << ? ORDER BY ip");
    389   $sth->execute($container);
    390   while (my ($ip,$moron) = $sth->fetchrow_array()) {
    391     $listhosts->{$ip} |= $pdata;
     380  $sthmoron->execute($container);
     381  while (my ($ip,$moron) = $sthmoron->fetchrow_array()) {
     382    $listhosts->{$ip} |= $bitmask;
    392383    if ($moron) {
    393384      $listhosts->{$ip} = $bitfields{slist};
  • trunk/dnsbl/export-dnsbl

    r7 r23  
    2626#$dnsbl->export($ipref,$mode,1,'76.73.0.0/17');
    2727#$dnsbl->export($ipref,$mode,1,'174.36.0.0/15');
     28$dnsbl->initexport;
    2829$dnsbl->export($ipref,$mode);
    2930
Note: See TracChangeset for help on using the changeset viewer.