#!/usr/bin/perl
# Import a BIND zone file
##
# Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
# 
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version. 
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
##

use strict;
use warnings;
use Data::Dumper;

use lib '.';
use DNSDB;

my $dnsdb = new DNSDB;
my $doimport = 0;

#print Dumper(\%reverse_typemap);

local $dnsdb->{dbh}->{AutoCommit} = 0;
local $dnsdb->{dbh}->{RaiseError} = 1;

##fixme:  command arguments/flags to set these to alternate values
my $group = 1;
my $status = 1;
my $location = '';
# we'll update this with the actual serial number from the SOA record later
my $serial = time();

my $zname = shift @ARGV;
my $origzone = $zname;
die "usage: bind-import zonename\n" if !$zname;
my $rev = 'n';
my $zid;

my %amap;
my %namemap;
my %cmap;

##fixme:  this is wrong, BIND zone files are generally complete and we're adding.  merging records is an entire fridge full of worms.
##fixme:  for import, should arguably check for zone *non*existence
if ($zname =~ /\.arpa\.?$/ || $zname =~ m,^[\d./]+$,) {
  $rev = 'y';
  $zname = _zone2cidr($zname) if $zname =~ /\.arpa\.?$/;
  $zid = $dnsdb->revID($zname,':ANY:');
  if ($zid) {
    die "zone $origzone already present, not merging records\n";
#$zname = new NetAddr::IP $zname;
#    $zname = DNSDB::_ZONE($zname, 'ZONE', 'r', '.').($zname->{isv6} ? '.ip6.arpa' : '.in-addr.arpa');
  }
  $zid = $dnsdb->{dbh}->do("INSERT INTO revzones (revnet,group_id,status,default_location,zserial) VALUES (?,?,?,?,?) RETURNING rnds_id",
                undef, ($zname, $group, $status, $location, $serial));

} else {
  $zid = $dnsdb->domainID($zname,':ANY:');
  if ($zid) {
    die "zone $origzone already present, not merging records\n";
  }
  $zid = $dnsdb->{dbh}->do("INSERT INTO domains (domain,group_id,status,default_location,zserial) VALUES (?,?,?,?,?) RETURNING domain_id",
                undef, ($zname, $group, $status, $location, $serial));
}

die "error creating zone stub for $zname: ".$dnsdb->{dbh}->errstr if !$zid;


# still no sane way to expose a human-friendly view tag on the command line.
my $view = shift @ARGV;
$view = '' if !$view;

##fixme:  retrieve defttl from SOA record
my $zonettl = 900;
my $defttl = $zonettl;
my $origin = "$zname.";	# to append to unqualified names

# need to spin up a full state machine-ish thing, because BIND zone files are all about context
# see ch4, p56-72 in the grasshopper book
my $prevlabel = '';
my $curlabel = '';

my $i = 0;

while (my $rec = <>) {
  chomp $rec;
  next if $rec =~ /^\s*$/;
  next if $rec =~ /^\s*;/;	# comments
  next if $rec =~ /^\s*\)/;	# SOA closing (possibly other records too?)
				# arguably should do some more targeted voodoo when parsing the SOA details

  # skip stale records that have no value
  next if /^ip-192-168-1(12|20)-\d+/;
  next if /ip.add.re.\d+\s*$/;

#last if ++$i > 4;
print "($rec)\n";
  if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE|GENERATE)\s+(.+)/) ) {
    # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
    if ($macro eq 'TTL') {
      $mdetail =~ s/\s*;.+$//;
      if ($mdetail =~ /^\d+$/) {
        $defttl = $mdetail;
      } else {
        warn "invalid \$TTL: $rec\n";
      }
    } elsif ($macro eq 'ORIGIN') {
##fixme:  going to skip the stupid case of "$ORIGIN com." and the like that lie
# between . and the root domain we were told we're importing;  anyone using such
# a mess outside the root servers is clearly insane

# $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.

print "origin ($mdetail)\n";
      if ($mdetail =~ /\.$/) {
        $origin = $mdetail;
      } else {
        # append current origin to unqualified origin
        $origin = "$mdetail.$origin";
      }

#      if ($mdetail eq '.' || $mdetail =~ /$zname\.$/ || $zname =~ /$mdetail\.$/) {
#        $origin = $mdetail;
#      } else {
#        # if we continue, we either use an $ORIGIN that's out of zone, or ignore it and potentially publish incorrect records.
#        die "bad \$ORIGIN: $_\n";
#      }

    }
    # not handling $INCLUDE or $GENERATE (altho the latter seems to be mostly a less-flexible version of the template types)
    next;
  }

  my $origrec = $rec;

  # leading whitespace indicates "same label as last record"
  if ($rec =~ /^\s/) {
    $curlabel = $prevlabel;
print "  found empty label, using previous label\n";
  } else {
    ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
  }

print "  found '$curlabel'\n";

  # magic name!
  $curlabel = "$zname." if $curlabel eq '@';

  # append $ORIGIN if name is not fully qualified.
  if ($curlabel !~ /\.$/) {
    $curlabel .= ".$origin";
  }
print "  expanded '$curlabel'\n";

  # check for zone scope.  skip bad records.
  if ($curlabel !~ /$zname.$/) {
    warn "bad record $origrec, maybe bad \$ORIGIN?\n";
last;
    next;
  }

  # trim the label, if any
  $rec =~ s/^([\w\@_.-]*)\s+//;

#  # records must begin in the first column, no leading whitespace
#  my ($name) = /^([\w\@_.-]+)\s/;

# foo IN A 1.2.3.4
#   IN A 2.3.4.5
# =
# foo.zone. IN A 1.2.3.4
# foo.zone. IN A 2.3.4.5

#  # "empty" label records inherit the previous label
#  # RRs start in the first column by definition, so leading whitespace indicates an inherited label
#  if (/^\s+/) {
#    # fatal error.  if there is no previous label, we can by definition not set
#    # the current label based on it.  this can only happen on the very first
#    # record, following records will *ALWAYS* have a previous label
#    die "bad first record ($_):  no previous label\n" if !$prevlabel;
#    $name = $prevlabel;
#  }

print "$i ($rec)\n";#\t$curlabel";


#  # append zone name to record name if missing AND not dot-terminated;
#  # this happens automagically for forward zones, but not reverse because Reasons.  (fixme?)
#  # suck up and deal with the error if the dot-termiated name is out of zone;  should be
#  # impossible with valid BIND zone file but...
#  if ($name !~ /\.$/) {
#    $name .= ".$zname" if $name !~ /$zname$/;
#  } else {
#    warn "skipping out-of-zone record:\n\t($_)\n" if $name !~ /$zname\.$/;
#    next;
#  }


  my $nc = 0;
  my $class = 'IN';
  my $ttl;
  my $type;
  my $badrec;
  my $curatom = 'class';

  eval {
    for (; $nc < 3; $nc++) {
      my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
      # should be safe?
      last if !$atom;
      last if $type;
#print "nc:$nc: $atom\n";
      if ($atom =~ /^\d+$/) {
        if (defined($ttl)) {
          die "bad record ($origrec)\n";
#          warn "bad record ($origrec)\n";
#          $badrec = 1;
#          last;
        } else {
          if ($curatom ne 'class' && $curatom ne 'ttl') {
            die "bad record ($origrec)\n";
#            warn "bad record ($origrec)\n";
#            $badrec = 1;
#            last;
          }
          $curatom = 'ttl';
          $ttl = $atom;
        }
      }

      elsif ($atom =~ /^IN|CS|CH|HS$/) {
#print "a$nc: d2: atom [$atom]\n        $rec\n" if $i == $debugid;
        if ($atom =~ /CS|CH|HS/) {
          die "unsupported class $atom in record ($origrec)\n";
#          warn "unsupported class $atom in record ($origrec)\n";
#          $badrec = 1;
#          last;
        }
        $curatom = 'class';
        $class = $atom;
      }

      elsif ($atom =~ /^[A-Z]+/) {
#      print "dbg: type $atom\n";
        if ($reverse_typemap{$atom}) {
          $type = $atom;
        } else {
          die "unknown type $atom in record ($origrec)\n";
        }
      }
      $rec =~ s/^$atom\s*//;
    }
  };
  if ($@) {
    warn $@;
    next;
  }

  # set default TTL here so we can detect a TTL in the loop above
  $ttl = $defttl if !defined($ttl);

#next if $badrec;


##fixme:  drop curlabel?  not sure it's needed
#$curlabel = $name;
$prevlabel = $curlabel;
##todo:  BIND conflates a repeated label with repeating the TTL too.  Matter of opinion whether that's really correct or not.


## by convention the optional TTL leads the optional class, but they're apparently swappable.
#  my ($ttl) = /^(\d+)?\s/;
#  if (defined $ttl) {
#    # TTL may be zero
#    s/(\d+)?\s+//;
#  } else {
#    # Fall back to zone default TTL
#    $ttl = $zonettl;
#  }
#  my ($class) = /^(IN|CS|CH|HS|\d+)\s/;
#  if (defined $class) {
#    if ($class =~ /\d+/) {
#      
#    }
#    if ($class ne 'IN') {
#      warn "Non-Internet class ($class) records not supported:\n\t$origrec\n";
#      next;
#    }
#    s/(IN|CS|CH|HS)\s+//;
#  } else {
#    $class = 'IN';
#  }
#  my ($type) = /([A-Z-]+)\s/;
#  if (!$reverse_typemap{$type}) {
#    warn "Unknown type $type, skipping\n\t($rec)\n";
#    next;
#  }
#  s/([A-Z-]+)\s+//;
#  chomp;


  my $itype = $reverse_typemap{$type};
  my $rdata = $rec;

  # SOA is the only type that may span multiple lines.  Probably.  Note even AXFRed zones write multiline SOA records:
  #@       IN      SOA     test.example.invalid.   test.example.invalid.   (2020082500 7200 900 604800 3600)
  #        IN      NS      olddns.example.com.
  #        IN      MX      1 fred.foo.bar.invalid.
  #foo     IN      A       192.168.16.45
  # AXFR'ed zone file gets written as
  #$ORIGIN .
  #$TTL 3600       ; 1 hour
  #example.invalid         IN SOA  test.example.invalid. test.example.invalid. (
  #                                2020082500 ; serial
  #                                7200       ; refresh (2 hours)
  #                                900        ; retry (15 minutes)
  #                                604800     ; expire (1 week)
  #                                3600       ; minimum (1 hour)
  #                                )
  #                        NS      olddns.example.com.
  #                        MX      1 fred.foo.bar.invalid.
  #$ORIGIN example.invalid.
  #foo                     A       192.168.16.45

  if ($type eq 'SOA') {
    my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
    die "Can't parse gibberish SOAish record: $rec\n" if !$ns;
    $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;

    # There are probably more efficient ways to do this but the SOA record
    # format is essentially character based, not line-based.
    # In theory the SOA serial etc may be spread over up to 5 lines, in any combination.

    # Parse fields from $rdata if present
    my @soabits;
    my @soafirst = split /\s+/, $rdata;
    while (my $f = shift @soafirst) {
      last if $f !~ /^\d/;
      push @soabits, $f;
    }

    # Read more lines if we don't have enough SOA fields filled
    while (scalar(@soabits) < 5) {
      my $tmp = <>;
      $tmp =~ s/^\s*//;
      my @tmpsoa = split /\s+/, $tmp;
      while (my $f = shift @tmpsoa) {
        last if $f !~ /^\d/;
        push @soabits, $f;  
      }
      if (scalar(@soabits) == 5) {
        last;
      }
    }
  } # SOA

  # Quotes may arguably be syntactically required, but they're not actually part of the record data
  elsif ($type eq 'TXT') {
    $rdata =~ s/^"//;
    $rdata =~ s/"$//;
  }

# temp hack for hosts file
elsif ($type eq 'A') {
#  if ($amap{$name}) {
#    print "urp:  dupe name $name $rdata\n";
#  } else {
    push @{$amap{$curlabel}}, $rdata;
#  }
  push @{$namemap{$rdata}}, $curlabel;
}
elsif ($type eq 'CNAME') {
  push @{$cmap{$rdata}}, $curlabel;
}

no warnings qw(uninitialized);
#print "parsed: '$name' '$class' '$ttl' '$type'->'$itype' '$rdata'\n";
#print;
#;imap   IN      900     CNAME   deepnet.cx.
##fixme:  not sure how to handle the case where someone leaves off the class.
  if ($doimport) {
    my ($code, $msg);
    if ($rev eq 'n') {
      ($code,$msg) = $dnsdb->addRec('n', $rev, $zid, \$curlabel, \$itype, \$rdata, $ttl);
    } else {
      ($code,$msg) = $dnsdb->addRec('n', $rev, $zid, \$rdata, \$itype, \$curlabel, $ttl);
    }
    print "$code: $msg\n";
  }
#  $i++;
}


#print Dumper \%amap;
#print Dumper \%namemap;
#print Dumper \%cmap;

foreach my $n (keys %amap) {
  foreach my $ip (@{$amap{$n}}) {
#print "$ip	$n\n";
    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
  }
}

foreach my $c (keys %cmap) {
  if ($amap{$c}) {
    print Dumper(\@{$amap{$c}});
  }
#  print $amap{$c};
}

# cname targ -> IP

#foreach my $ip (sort keys %namemap) {
#  print "$ip	".join(' ', @{$namemap{$ip}})."\n";
#}

$dnsdb->{dbh}->rollback;