Index: /trunk/bind2hosts
===================================================================
--- /trunk/bind2hosts	(revision 806)
+++ /trunk/bind2hosts	(revision 807)
@@ -33,4 +33,6 @@
 my $skipfile;
 my $dryrun = 0;
+# CNAME chain depth
+my $maxdepth = 3;
 
 GetOptions(
@@ -50,5 +52,5 @@
 		octet-reversed form.  Specify multiple times to skip multiple
 		different record patterns.
-	--skip-file
+	--skipfile
 		A file containing patterns to skip.  Patterns from the file and
 		any --skip arguments are merged.
@@ -84,7 +86,4 @@
 my $dnsdb = new DNSDB;
 
-##fixme:  retrieve defttl from SOA record
-#my $zonettl = 900;
-#my $defttl = $zonettl;
 # need an ultimate fallback for this one
 my $defttl = 900;
@@ -101,14 +100,11 @@
   next if $rec =~ /^\s*;/;
   next if $rec =~ /^\s*\)/;	# SOA closing (possibly other records too?)
-			# arguably should do some more targeted voodoo when parsing the SOA details
-#print "$i: ($rec)\n";
-#last if ++$i > 5;
+				# arguably should do some more targeted voodoo when parsing the SOA details
 
   my $skipflag = 0;
   foreach (@skipdefs) {
-#print "skipdbg: $_ =~ $rec\n" if $rec =~ /207/;
     if ($rec =~ /\Q$_\E/) {
       $skipflag = 1;
-#      print "skip: $rec\n";
+      # might want to do something with the skipped records someday
     }
   }
@@ -138,9 +134,4 @@
   my $origrec = $rec;
 
-##fixme:  convert to optional skipfile?
-# skip stale records that have no value
-#next if /^ip-\d+-\d+-\d+/;
-#next if /^ip.pre.fix.\d+.static.colo/;
-
   # leading whitespace indicates "same label as last record"
   if ($rec =~ /^\s/) {
@@ -167,4 +158,6 @@
   $rec =~ s/^([\w\@_.-]*)\s+//;
 
+  # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
+  # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
   my $nc = 0;
   my %seenatoms;
@@ -176,7 +169,4 @@
   my $badrec;
   my $curatom = 'class';
-
-  # now that we've collected and trimmed off the record's label, unpack the class, TTL, and type.
-  # class and TTL may be omitted, and may appear in either class,TTL or TTL,class order.
   eval {
     for (; $nc < 3; $nc++) {
@@ -204,5 +194,5 @@
         $class = $atom;
       }
-      elsif ($atom =~ /^[A-Z]+/) {
+      elsif ($atom =~ /^[A-Z\d-]+/) {
         # check against dnsadmin's internal list of known DNS types.
         if ($reverse_typemap{$atom}) {
@@ -221,10 +211,8 @@
   }
 
-
   $ttl = $defttl if !defined($ttl);
 
-  my $itype = $reverse_typemap{$type};
+  # Just In Case we need the original rdata after we've sliced off more pieces
   my $rdata = $rec;
-
   $prevlabel = $curlabel;
 
@@ -237,5 +225,5 @@
     # There are probably more efficient ways to do this but the SOA record
     # format is essentially character based, not line-based.
-    # In theory the SOA serial etc may be spread over up to 5 lines, in any combination.
+    # In theory the SOA serial etc may be spread over "many" lines, bounded by ().
 
     # Parse fields from $rdata if present
@@ -266,15 +254,9 @@
   elsif ($type eq 'A') {
     # need the name->IP map so we can reverse-map the CNAMEs on output
-#    $amap{$curlabel}{$rdata}++;
     push @{$amap{$curlabel}}, $rdata;
-# why doesn't this work?  causes ALL cases of multi-named IPs to get skipped, not just duplicates.  O_o
-#    push @{$namemap{$rdata}}, $curlabel unless grep $curlabel, @{$namemap{$rdata}};
-#    push @{$namemap{$rdata}}, $curlabel;# unless grep $curlabel, @{$namemap{$rdata}};
     $namemap{$rdata}{$curlabel}++;
-
   } # A record
 
   elsif ($type eq 'CNAME') {
-##todo:  expand $rdata with $origin if unqualified
     $cmap{$curlabel} = $rdata.($rdata =~ /\./ ? '' : ".$origin");
   } # CNAME record
@@ -284,46 +266,82 @@
 } # <STDIN>
 
-
-
-
 #print Dumper \%cmap;
 
-while (my ($cn, $targ) = each %cmap) {
+# Walk the CNAME list and see if we can match the targets in-zone.
+# Out-of-zone CNAMEs are out of scope for this conversion.
+foreach my $cn (sort keys %cmap) {
+  my $targ = $cmap{$cn};
 #print "dbg: ".Dumper($targ);
-  if (!$amap{$targ}) {
-    if ($cmap{$targ}) {
-warn "chained cname $cn => $targ\n";
-      my $tmpcn = $targ;
-      $targ = $cmap{$tmpcn};
-warn "  chain target $cn => $tmpcn => $targ\n";
-#      next if !$amap{$targ};
-      if (!$amap{$targ}) {
-        if ($cmap{$targ}) {
-#print "  second chain?\n";
-          $tmpcn = $targ;
-          $targ = $cmap{$tmpcn};
-        } else {
-#print "not found\n";
-next;
-        }
-      }
-    } else {
-      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
-      warn "CNAME $cn => $targ not found\n";
-      next;
-    }
-  }
-#  print Dumper (\%{$amap{$cmap{$cn}}});
-#  print "$cn -> $cmap{$cn}\n";
-#  $amap{$cmap{$cn}}{$cn}++ if $cmap{$cn} =~ /$zname.$/ && $amap{$cmap{$cn}};
-#  print "dangling CNAME $cn\n" if !$namemap{$cmap{$cn}};
-#  print "$cn -> $cmap{$cn}\n";
-#  warn "CNAME $cn out of zone\n" if !$namemap{$cn};
-  my $targip = $amap{$targ}[0];
-#print "$cn => $targ\n" if $targ =~ /(webftp|landing)/;
-#print $targip;
-#  push @{$namemap{$targip}}, $targ unless grep $targ, @{$namemap{$targip}};
-  $namemap{$targip}{$cn}++;# unless grep $targ, @{$namemap{$targip}};
-}
+  my @targlist;
+#  push @targlist, $targ;  # mostly for error reporting
+my $dangle = 0;
+
+my $depth = 1;  # CNAME -> A
+
+# check this as a loop for consistent fail/break conditions.  bonus:  may allow user choice for CNAME depth?
+  for (; $dangle == 0; $depth++) {
+
+#print "d:$depth  checking $cn -> $targ\n";
+push @targlist, $targ;
+
+  # Depth limit.  If made user-selectable should arguably set a hard
+  # limit because deeply chained CNAMEs are Baaaaad, mmkaay?
+  if ($depth >= $maxdepth) {
+    warn "CNAMEs too deeply chained, skipping: $cn => ".join(' => ', @targlist)."\n";
+    last;
+  }
+
+# break if CNAME target is in the A record list
+  last if $amap{$targ};
+  if ($cmap{$targ}) {
+#     note the new target
+    my $tmpcn = $targ;
+    $targ = $cmap{$tmpcn};
+#print "    chaining $tmpcn to new $targ\n";
+  } else {
+#     target is either out of zone or doesn't exist
+    $dangle = 1;
+    last;
+  }
+
+
+#warn "chained cname $cn => $targ\n";
+    # CNAME to another CNAME
+#$tmpcn => $targ\n";
+
+#  last if $dangle;
+
+#      if (!$amap{$targ}) {
+#        if ($cmap{$targ}) {
+#          $tmpcn = $targ;
+#          $targ = $cmap{$tmpcn};
+#push @targlist, $targ;
+#warn "  chain target $cn => ".join(' => ', @targlist).
+#	"\n";
+#        } else {
+#          warn "skipping dangling CNAME $cn => $targlist[0] => $targlist[1]\n";
+#          next;
+#        }
+#      }
+#    } else {
+#      # skip depth-3 (?) CNAMES;  any such zone does not belong as a hosts file anyway
+#      warn "skipping dangling CNAME $cn => $targ\n";
+#      next;
+#    }
+#  }
+
+
+  } # CNAME recursion loop
+
+next if $dangle;
+
+#print "    chain target $cn => ".join(' => ', @targlist)."\n";
+  if ($amap{$targ}) {
+    # absent any criteria, we use the first IP a name was associated with
+    my $targip = $amap{$targ}[0];
+    $namemap{$targip}{$cn}++;
+  } else {
+  }
+} # each %cmap
 
 #print Dumper \%amap;
