#!/usr/bin/perl
# Convert a BIND zone file to a hosts file
##
# Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
# 
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version. 
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
##

use strict;
use warnings;
use Data::Dumper;

# push "the directory the script is in" into @INC
use FindBin;
use lib "$FindBin::RealBin/";

use DNSDB;

my $dnsdb = new DNSDB;
my $doimport = 0;

#print Dumper(\%reverse_typemap);

my $zname = shift @ARGV;
my $rev = 'n';
my $zid;

my %amap;
my %namemap;
my %cmap;

# this bit irrelevant for a hosts file
#if ($zname =~ /\.arpa\.?$/ || $zname =~ m,^[\d./]+$,) {
#  $rev = 'y';
#  $zname = _zone2cidr($zname) if $zname =~ /\.arpa\.?$/;
#  $zid = $dnsdb->revID($zname,':ANY:');
#  if ($zid) {
#    $zname = new NetAddr::IP $zname;
#    $zname = DNSDB::_ZONE($zname, 'ZONE', 'r', '.').($zname->{isv6} ? '.ip6.arpa' : '.in-addr.arpa');
#  }
#} else {
#  $zid = $dnsdb->domainID($zname,':ANY:');
#}
#
#die "zone $zname not on file\n" if !$zid;

# still no sane way to expose a human-friendly view tag on the command line.
my $view = shift @ARGV;
$view = '' if !$view;

##fixme:  retrieve defttl from SOA record
#my $zonettl = 900;
#my $defttl = $zonettl;
# need an ultimate fallback for this one
my $defttl = 900;
my $origin = "$zname.";   # to append to unqualified names
my $curlabel;
my $prevlabel;

my $i = 0;

# need to spin up a full state machine-ish thing, because BIND zone files are all about context
while (my $rec = <>) {
  chomp $rec;
  next if $rec =~ /^\s*$/;
  next if $rec =~ /^\s*;/;
  next if $rec =~ /^\s*\)/;	# SOA closing (possibly other records too?)
			# arguably should do some more targeted voodoo when parsing the SOA details
#print "$i: ($rec)\n";
#last if ++$i > 5;

  if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) {
    # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
    if ($macro eq 'TTL') {
      # irrelevant for a hosts file
    } elsif ($macro eq 'ORIGIN') {
#print "origin ($mdetail)\n";
      # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.
      if ($mdetail =~ /\.$/) {
        $origin = $mdetail;
      } else {
        # append current origin to unqualified origin
        $origin = "$mdetail.$origin";
      }
    }
##fixme: should arguably handle $INCLUDE
# probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS
    next;
  }

  my $origrec = $rec;

##fixme:  convert to optional skipfile?
# skip stale records that have no value
#next if /^ip-\d+-\d+-\d+/;
#next if /^ip.pre.fix.\d+.static.colo/;

  # leading whitespace indicates "same label as last record"
  if ($rec =~ /^\s/) {
    $curlabel = $prevlabel;
  } else {
    ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
  }

  # magic name!
  $curlabel = "$zname." if $curlabel eq '@';

  # append $ORIGIN if name is not fully qualified.
  if ($curlabel !~ /\.$/) {
    $curlabel .= ".$origin";
  }

  # check for zone scope.  skip bad records.
  if ($curlabel !~ /$zname.$/) {
    warn "bad record $origrec, maybe bad \$ORIGIN?\n";
    next;
  }

  # trim the label, if any
  $rec =~ s/^([\w\@_.-]*)\s+//;

#print "r$i ($rec)\n\t$curlabel\n";

  my $nc = 0;
my $debugid = -1;
  my %seenatoms;
# we don't actually use these but we have to recognize them
my $class = 'IN';
# not preset as we need to detect whether it's present in the record
my $ttl;
#my $ttl = $defttl;
my $type;
  my $badrec;
  my $curatom = 'class';
##fixme:  maybe wrap this in an eval() instead of the warn/badrec/last bits?
  for (; $nc < 3; $nc++) {
    my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
    # should be safe?
    last if !$atom;
#print "a$nc: l: $rec\n	$atom\n" if $i == $debugid;
    if ($atom =~ /^\d+$/) {
#print "a$nc: d: atom [$atom]\n	$rec\n" if $i == $debugid;
      if (defined($ttl)) {
        warn "bad record ($origrec)\n";
        $badrec = 1;
        last;
      } else {
        if ($curatom ne 'class' && $curatom ne 'ttl') {
          warn "bad record ($origrec)\n";
          $badrec = 1;
          last;
        }
        $curatom = 'ttl';
        $ttl = $atom;
      }
    }
    elsif ($atom =~ /^IN|CS|CH|HS$/) {
#print "a$nc: d2: atom [$atom]\n        $rec\n" if $i == $debugid;
      if ($atom =~ /CS|CH|HS/) {
        warn "unsupported class $atom in record ($origrec)\n";
        $badrec = 1;
        last;
      }
      $curatom = 'class';
      $class = $atom;
    }
    elsif ($atom =~ /^[A-Z]+/) {
#print "a$nc: d3a: probable type [$atom]\n" if $i == $debugid;
      if ($reverse_typemap{$atom}) {
#print "a$nc: d3b: atom [$atom]\n        $rec\n" if $i == $debugid;
        $type = $atom;
      } else {
        warn "unknown type $atom in record ($origrec)\n";
        $badrec = 1;
        last;
      }
    }
    $rec =~ s/^$atom\s*//;
#print "a$nc: next: $rec\n" if $i == $debugid;
  } # class/type/TTL loop

#last if $i > 15;
  next if $badrec;

#print Dumper(\%reverse_typemap);
$ttl = $defttl if !defined($ttl);

#print "class $class, ttl $ttl, type $type\n";
#last;

  my $itype = $reverse_typemap{$type};
#  s/([A-Z-]+)\s+//;
#  chomp;
  my $rdata = $rec;

  $prevlabel = $curlabel;

##fixme:  squish this down for this script since SOA records are irrelevant
  if ($type eq 'SOA') {
    my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
    die "Can't parse gibberish SOAish record: $_\n" if !$ns;
    $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;

    # There are probably more efficient ways to do this but the SOA record
    # format is essentially character based, not line-based.
    # In theory the SOA serial etc may be spread over up to 5 lines, in any combination.

    # Parse fields from $rdata if present
    my @soabits;
    my @soafirst = split /\s+/, $rdata;
    while (my $f = shift @soafirst) {
      last if $f !~ /^\d/;
      push @soabits, $f;
    }

    # Read more lines if we don't have enough SOA fields filled
    while (scalar(@soabits) < 5) {
      my $tmp = <>;
      $tmp =~ s/^\s*//;
      my @tmpsoa = split /\s+/, $tmp;
      while (my $f = shift @tmpsoa) {
        last if $f !~ /^\d/;
        push @soabits, $f;
      }
      if (scalar(@soabits) == 5) {
        last;
      }
    }
  } # SOA

##fixme:  trim dupes if possible
  elsif ($type eq 'A') {
#    push @{$amap{$curlabel}}, $rdata;
#    push @{$namemap{$rdata}}, $curlabel;

    # need the name->IP map so we can reverse-map the CNAMEs on output
    $amap{$curlabel}{$rdata}++;
    $namemap{$rdata}{$curlabel}++;

#print "$origrec\n";
  } # A record

  elsif ($type eq 'CNAME') {
#    push @{$cmap{$rdata}}, $curlabel;
##todo:  expand $rdata with $origin if unqualified

    $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
#print "$origrec\n";
  } # CNAME record


#  last if ++$i > 10;
} # <STDIN>


#print Dumper \%amap;
#foreach my $n (keys %amap) {
#  foreach my $ip (@{$amap{$n}}) {
##print "$ip\t$n\n";
#    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
#  }
#}

#print Dumper \%namemap;
#foreach my $ip (sort keys %namemap) {
#  print "$ip\t".join(' ', @{$namemap{$ip}})."\n";
#}

#print Dumper \%cmap;


foreach my $cn (keys %cmap) {
  print "$cn -> $cmap{$cn}\n";
#  warn "CNAME $cn out of zone\n" if !$namemap{$cn};
}
