#!/usr/bin/perl
# Convert a BIND zone file to a hosts file
##
# Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
# 
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version. 
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
##

use strict;
use warnings;
use Getopt::Long;

use Data::Dumper;

# push "the directory the script is in" into @INC
use FindBin;
use lib "$FindBin::RealBin/";

use DNSDB;

my @skipdefs;
my $skipfile;
my $dryrun = 0;

GetOptions(
        "skip=s" => \@skipdefs,
        "skipfile=s" => \$skipfile,
        "test|dry-run" => \$dryrun,
);

my $zname = shift @ARGV;

my $usage = "usage: bind2hosts zone [--skip pattern [--skip pattern2 ...]] [--skipfile file]
	zonename < zonefile

	--skip
		Specify a string to skip in the records.  If an IP-like string is
		used, and the zone is a reverse zone, it will also search for the
		octet-reversed form.  Specify multiple times to skip multiple
		different record patterns.
	--skip-file
		A file containing patterns to skip.  Patterns from the file and
		any --skip arguments are merged.
	zonename
		The name of the zone to import.  Required.

	Zone data will be read from STDIN.
";
if (!$zname) {
  die $usage;
}

if ($skipfile) {
  if (-f $skipfile) {
    open SKIP, "<$skipfile";
    while (<SKIP>) {
      chomp;
      push @skipdefs, $_;
    }
    close SKIP;
  } else {
    warn "skipfile $skipfile requested but it doesn't seem to exist.  Continuing.\n";
  } 
}  

my $rev = 'n';
my $zid;

my %amap;
my %namemap;
my %cmap;

my $dnsdb = new DNSDB;

##fixme:  retrieve defttl from SOA record
#my $zonettl = 900;
#my $defttl = $zonettl;
# need an ultimate fallback for this one
my $defttl = 900;
my $origin = "$zname.";   # to append to unqualified names
my $curlabel;
my $prevlabel;

my $i = 0;

# need to spin up a full state machine-ish thing, because BIND zone files are all about context
while (my $rec = <>) {
  chomp $rec;
  next if $rec =~ /^\s*$/;
  next if $rec =~ /^\s*;/;
  next if $rec =~ /^\s*\)/;	# SOA closing (possibly other records too?)
			# arguably should do some more targeted voodoo when parsing the SOA details
#print "$i: ($rec)\n";
#last if ++$i > 5;

  my $skipflag = 0;
  foreach (@skipdefs) {
#print "skipdbg: $_ =~ $rec\n" if $rec =~ /207/;
    if ($rec =~ /\Q$_\E/) {
      $skipflag = 1;
#      print "skip: $rec\n";
    }
  }
  next if $skipflag;

  if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) {
    # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
    if ($macro eq 'TTL') {
      # irrelevant for a hosts file
    } elsif ($macro eq 'ORIGIN') {
      # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.
      if ($mdetail =~ /\.$/) {
        $origin = $mdetail;
      } else {
        # append current origin to unqualified origin
        $origin = "$mdetail.$origin";
      }
    }
##fixme: should arguably handle $INCLUDE
# probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS
    next;
  }

  # yay for special cases
  $origin = '' if $origin eq '.';

  my $origrec = $rec;

##fixme:  convert to optional skipfile?
# skip stale records that have no value
#next if /^ip-\d+-\d+-\d+/;
#next if /^ip.pre.fix.\d+.static.colo/;

  # leading whitespace indicates "same label as last record"
  if ($rec =~ /^\s/) {
    $curlabel = $prevlabel;
  } else {
    ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
  }

  # magic name!
  $curlabel = "$zname." if $curlabel eq '@';

  # append $ORIGIN if name is not fully qualified.
  if ($curlabel !~ /\.$/) {
    $curlabel .= ".$origin";
  }

  # check for zone scope.  skip bad records.
  if ($curlabel !~ /$zname.$/) {
    warn "bad record $origrec, maybe bad \$ORIGIN?\n";
    next;
  }

  # trim the label, if any
  $rec =~ s/^([\w\@_.-]*)\s+//;

  my $nc = 0;
  my %seenatoms;
  # we don't actually use these but we have to recognize them
  my $class = 'IN';
  # not preset as we need to detect whether it's present in the record
  my $ttl;
  my $type;
  my $badrec;
  my $curatom = 'class';

  # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
  # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
  eval {
    for (; $nc < 3; $nc++) {
      last if $type;	# short-circuit if we've got a type, further data is record-specific.
      my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
      # should be safe?
      last if !$atom;
      if ($atom =~ /^\d+$/) {
        if (defined($ttl)) {
          # we already have a TTL, so another all-numeric field is invalid.
          die "bad record ($origrec)\n";
        } else {
          if ($curatom ne 'class' && $curatom ne 'ttl') {
            die "bad record ($origrec)\n";
          }
          $curatom = 'ttl';
          $ttl = $atom;
        }
      }
      elsif ($atom =~ /^IN|CS|CH|HS$/) {
        if ($atom =~ /CS|CH|HS/) {
          die "unsupported class $atom in record ($origrec)\n";
        }
        $curatom = 'class';
        $class = $atom;
      }
      elsif ($atom =~ /^[A-Z]+/) {
        # check against dnsadmin's internal list of known DNS types.
        if ($reverse_typemap{$atom}) {
          $type = $atom;
        } else {
          die "unknown type $atom in record ($origrec)\n";
        }
        $curatom = 'type';
      }
      $rec =~ s/^$atom\s*//;
    } # class/type/TTL loop
  };
  if ($@) {
    warn $@;
    next;
  }


  $ttl = $defttl if !defined($ttl);

  my $itype = $reverse_typemap{$type};
  my $rdata = $rec;

  $prevlabel = $curlabel;

##fixme:  squish this down for this script since SOA records are irrelevant
  if ($type eq 'SOA') {
    my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
    die "Can't parse gibberish SOAish record: '$rdata'/'$origrec'\n" if !$ns;
    $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;

    # There are probably more efficient ways to do this but the SOA record
    # format is essentially character based, not line-based.
    # In theory the SOA serial etc may be spread over up to 5 lines, in any combination.

    # Parse fields from $rdata if present
    my @soabits;
    my @soafirst = split /\s+/, $rdata;
    while (my $f = shift @soafirst) {
      last if $f !~ /^\d/;
      push @soabits, $f;
    }

    # Read more lines if we don't have enough SOA fields filled
    while (scalar(@soabits) < 5) {
      my $tmp = <>;
      $tmp =~ s/^\s*//;
      my @tmpsoa = split /\s+/, $tmp;
      while (my $f = shift @tmpsoa) {
        last if $f !~ /^\d/;
        push @soabits, $f;
      }
      if (scalar(@soabits) == 5) {
        last;
      }
    }
  } # SOA

##fixme:  trim dupes if possible

  elsif ($type eq 'A') {
    # need the name->IP map so we can reverse-map the CNAMEs on output
#    $amap{$curlabel}{$rdata}++;
    push @{$amap{$curlabel}}, $rdata;
# why doesn't this work?  causes ALL cases of multi-named IPs to get skipped, not just duplicates.  O_o
#    push @{$namemap{$rdata}}, $curlabel unless grep $curlabel, @{$namemap{$rdata}};
#    push @{$namemap{$rdata}}, $curlabel;# unless grep $curlabel, @{$namemap{$rdata}};
    $namemap{$rdata}{$curlabel}++;

  } # A record

  elsif ($type eq 'CNAME') {
##todo:  expand $rdata with $origin if unqualified
    $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
  } # CNAME record

  # all other record types are irrelevant for a hosts file

} # <STDIN>




#print Dumper \%cmap;

while (my ($cn, $targ) = each %cmap) {
#print "dbg: ".Dumper($targ);
  if (!$amap{$targ}) {
    if ($cmap{$targ}) {
warn "chained cname $cn => $targ\n";
      my $tmpcn = $targ;
      $targ = $cmap{$tmpcn};
warn "  chain target $cn => $tmpcn => $targ\n";
#      next if !$amap{$targ};
      if (!$amap{$targ}) {
        if ($cmap{$targ}) {
#print "  second chain?\n";
          $tmpcn = $targ;
          $targ = $cmap{$tmpcn};
        } else {
#print "not found\n";
next;
        }
      }
    } else {
      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
      warn "CNAME $cn => $targ not found\n";
      next;
    }
  }
#  print Dumper (\%{$amap{$cmap{$cn}}});
#  print "$cn -> $cmap{$cn}\n";
#  $amap{$cmap{$cn}}{$cn}++ if $cmap{$cn} =~ /$zname.$/ && $amap{$cmap{$cn}};
#  print "dangling CNAME $cn\n" if !$namemap{$cmap{$cn}};
#  print "$cn -> $cmap{$cn}\n";
#  warn "CNAME $cn out of zone\n" if !$namemap{$cn};
  my $targip = $amap{$targ}[0];
#print "$cn => $targ\n" if $targ =~ /(webftp|landing)/;
#print $targip;
#  push @{$namemap{$targip}}, $targ unless grep $targ, @{$namemap{$targip}};
  $namemap{$targip}{$cn}++;# unless grep $targ, @{$namemap{$targip}};
}

#print Dumper \%amap;
#foreach my $n (keys %amap) {
#  foreach my $ip (keys %{$amap{$n}}) {
#print "$ip\t$n\n";
#    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
#    $namemap{$ip}{$n}++;
#  }
#}

#print Dumper \%namemap;
foreach my $ip (sort keys %namemap) {
  print "$ip\t".join(' ', sort keys %{$namemap{$ip}})."\n";
}
