#!/usr/bin/perl
# Convert a BIND zone file to a hosts file
##
# Copyright 2020 Kris Deugau <kdeugau@deepnet.cx>
# 
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version. 
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
##

use strict;
use warnings;
use Getopt::Long;

use Data::Dumper;

# push "the directory the script is in" into @INC
use FindBin;
use lib "$FindBin::RealBin/";

use DNSDB;

my @skipdefs;
my $skipfile;
my $dryrun = 0;
# CNAME chain depth
my $maxdepth = 3;

GetOptions(
        "skip=s" => \@skipdefs,
        "skipfile=s" => \$skipfile,
        "test|dry-run" => \$dryrun,
);

my $zname = shift @ARGV;

my $usage = "usage: bind2hosts zone [--skip pattern [--skip pattern2 ...]] [--skipfile file]
	zonename < zonefile

	--skip
		Specify a string to skip in the records.  If an IP-like string is
		used, and the zone is a reverse zone, it will also search for the
		octet-reversed form.  Specify multiple times to skip multiple
		different record patterns.
	--skipfile
		A file containing patterns to skip.  Patterns from the file and
		any --skip arguments are merged.
	zonename
		The name of the zone to import.  Required.

	Zone data will be read from STDIN.
";
if (!$zname) {
  die $usage;
}

if ($skipfile) {
  if (-f $skipfile) {
    open SKIP, "<$skipfile";
    while (<SKIP>) {
      chomp;
      push @skipdefs, $_;
    }
    close SKIP;
  } else {
    warn "skipfile $skipfile requested but it doesn't seem to exist.  Continuing.\n";
  } 
}  

my $rev = 'n';
my $zid;

my %amap;
my %namemap;
my %cmap;

my $dnsdb = new DNSDB;

# need an ultimate fallback for this one
my $defttl = 900;
my $origin = "$zname.";   # to append to unqualified names
my $curlabel;
my $prevlabel;

my $i = 0;

# need to spin up a full state machine-ish thing, because BIND zone files are all about context
while (my $rec = <>) {
  chomp $rec;
  next if $rec =~ /^\s*$/;
  next if $rec =~ /^\s*;/;
  next if $rec =~ /^\s*\)/;	# SOA closing (possibly other records too?)
				# arguably should do some more targeted voodoo when parsing the SOA details

  my $skipflag = 0;
  foreach (@skipdefs) {
    if ($rec =~ /\Q$_\E/) {
      $skipflag = 1;
      # might want to do something with the skipped records someday
    }
  }
  next if $skipflag;

  if (my ($macro,$mdetail) = ($rec =~ /^\s*\$(TTL|ORIGIN|INCLUDE)\s+(.+)/) ) {
    # macro sort of thing;  $TTL and $ORIGIN most common.  $INCLUDE is a thing, expect it to be rare in live use tho
    if ($macro eq 'TTL') {
      # irrelevant for a hosts file
    } elsif ($macro eq 'ORIGIN') {
      # $ORIGIN supports cascading/nesting, by watching for fully-qualified names vs partial names.
      if ($mdetail =~ /\.$/) {
        $origin = $mdetail;
      } else {
        # append current origin to unqualified origin
        $origin = "$mdetail.$origin";
      }
    }
##fixme: should arguably handle $INCLUDE
# probably NOT going to handle $GENERATE, since that type of record set is best handled in DNS
    next;
  }

  # yay for special cases
  $origin = '' if $origin eq '.';

  my $origrec = $rec;

  # leading whitespace indicates "same label as last record"
  if ($rec =~ /^\s/) {
    $curlabel = $prevlabel;
  } else {
    ($curlabel) = ($rec =~ /^([\w\@_.-]+)\s/);
  }

  # magic name!
  $curlabel = "$zname." if $curlabel eq '@';

  # append $ORIGIN if name is not fully qualified.
  if ($curlabel !~ /\.$/) {
    $curlabel .= ".$origin";
  }

  # check for zone scope.  skip bad records.
  if ($curlabel !~ /$zname.$/) {
    warn "bad record $origrec, maybe bad \$ORIGIN?\n";
    next;
  }

  # trim the label, if any
  $rec =~ s/^([\w\@_.-]*)\s+//;

  # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
  # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
  my $nc = 0;
  my %seenatoms;
  # we don't actually use these but we have to recognize them
  my $class = 'IN';
  # not preset as we need to detect whether it's present in the record
  my $ttl;
  my $type;
  my $badrec;
  my $curatom = 'class';
  eval {
    for (; $nc < 3; $nc++) {
      last if $type;	# short-circuit if we've got a type, further data is record-specific.
      my ($atom) = ($rec =~ /^([\w\d.]+)\s/);
      # should be safe?
      last if !$atom;
      if ($atom =~ /^\d+$/) {
        if (defined($ttl)) {
          # we already have a TTL, so another all-numeric field is invalid.
          die "bad record ($origrec)\n";
        } else {
          if ($curatom ne 'class' && $curatom ne 'ttl') {
            die "bad record ($origrec)\n";
          }
          $curatom = 'ttl';
          $ttl = $atom;
        }
      }
      elsif ($atom =~ /^IN|CS|CH|HS$/) {
        if ($atom =~ /CS|CH|HS/) {
          die "unsupported class $atom in record ($origrec)\n";
        }
        $curatom = 'class';
        $class = $atom;
      }
      elsif ($atom =~ /^[A-Z\d-]+/) {
        # check against dnsadmin's internal list of known DNS types.
        if ($reverse_typemap{$atom}) {
          $type = $atom;
        } else {
          die "unknown type $atom in record ($origrec)\n";
        }
        $curatom = 'type';
      }
      $rec =~ s/^$atom\s*//;
    } # class/type/TTL loop
  };
  if ($@) {
    warn $@;
    next;
  }

  $ttl = $defttl if !defined($ttl);

  # Just In Case we need the original rdata after we've sliced off more pieces
  my $rdata = $rec;
  $prevlabel = $curlabel;

##fixme:  squish this down for this script since SOA records are irrelevant
  if ($type eq 'SOA') {
    my ($ns, $adminmail) = ($rdata =~ /([\w.]+)\s+([\w.]+)\s+\(/);
    die "Can't parse gibberish SOAish record: '$rdata'/'$origrec'\n" if !$ns;
    $rdata =~ s/([\w.]+)\s+([\w.]+)\s+\(\s*//;

    # There are probably more efficient ways to do this but the SOA record
    # format is essentially character based, not line-based.
    # In theory the SOA serial etc may be spread over "many" lines, bounded by ().

    # Parse fields from $rdata if present
    my @soabits;
    my @soafirst = split /\s+/, $rdata;
    while (my $f = shift @soafirst) {
      last if $f !~ /^\d/;
      push @soabits, $f;
    }

    # Read more lines if we don't have enough SOA fields filled
    while (scalar(@soabits) < 5) {
      my $tmp = <>;
      $tmp =~ s/^\s*//;
      my @tmpsoa = split /\s+/, $tmp;
      while (my $f = shift @tmpsoa) {
        last if $f !~ /^\d/;
        push @soabits, $f;
      }
      if (scalar(@soabits) == 5) {
        last;
      }
    }
  } # SOA

##fixme:  trim dupes if possible

  elsif ($type eq 'A') {
    # need the name->IP map so we can reverse-map the CNAMEs on output
    push @{$amap{$curlabel}}, $rdata;
    $namemap{$rdata}{$curlabel}++;
  } # A record

  elsif ($type eq 'CNAME') {
    $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
  } # CNAME record

  # all other record types are irrelevant for a hosts file

} # <STDIN>

#print Dumper \%cmap;

# Walk the CNAME list and see if we can match the targets in-zone.
# Out-of-zone CNAMEs are out of scope for this conversion.
foreach my $cn (sort keys %cmap) {
  my $targ = $cmap{$cn};
#print "dbg: ".Dumper($targ);
  my @targlist;
#  push @targlist, $targ;  # mostly for error reporting
my $dangle = 0;

my $depth = 1;  # CNAME -> A

# check this as a loop for consistent fail/break conditions.  bonus:  may allow user choice for CNAME depth?
  for (; $dangle == 0; $depth++) {

#print "d:$depth  checking $cn -> $targ\n";
push @targlist, $targ;

  # Depth limit.  If made user-selectable should arguably set a hard
  # limit because deeply chained CNAMEs are Baaaaad, mmkaay?
  if ($depth >= $maxdepth) {
    warn "CNAMEs too deeply chained, skipping: $cn => ".join(' => ', @targlist)."\n";
    last;
  }

# break if CNAME target is in the A record list
  last if $amap{$targ};
  if ($cmap{$targ}) {
#     note the new target
    my $tmpcn = $targ;
    $targ = $cmap{$tmpcn};
#print "    chaining $tmpcn to new $targ\n";
  } else {
#     target is either out of zone or doesn't exist
    $dangle = 1;
    last;
  }


#warn "chained cname $cn => $targ\n";
    # CNAME to another CNAME
#$tmpcn => $targ\n";

#  last if $dangle;

#      if (!$amap{$targ}) {
#        if ($cmap{$targ}) {
#          $tmpcn = $targ;
#          $targ = $cmap{$tmpcn};
#push @targlist, $targ;
#warn "  chain target $cn => ".join(' => ', @targlist).
#	"\n";
#        } else {
#          warn "skipping dangling CNAME $cn => $targlist[0] => $targlist[1]\n";
#          next;
#        }
#      }
#    } else {
#      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
#      warn "skipping dangling CNAME $cn => $targ\n";
#      next;
#    }
#  }


  } # CNAME recursion loop

next if $dangle;

#print "    chain target $cn => ".join(' => ', @targlist)."\n";
  if ($amap{$targ}) {
    # absent any criteria, we use the first IP a name was associated with
    my $targip = $amap{$targ}[0];
    $namemap{$targip}{$cn}++;
  } else {
  }
} # each %cmap

#print Dumper \%amap;
#foreach my $n (keys %amap) {
#  foreach my $ip (keys %{$amap{$n}}) {
#print "$ip\t$n\n";
#    push @{$namemap{$ip}}, $n unless grep $n, @{$namemap{$ip}};
#    $namemap{$ip}{$n}++;
#  }
#}

#print Dumper \%namemap;
foreach my $ip (sort keys %namemap) {
  print "$ip\t".join(' ', sort keys %{$namemap{$ip}})."\n";
}
