From: James Jacobs <jajacobs@odwin.ucsd.edu>
Subject: cio3
To: pmy2n@poe.acc.virginia.edu (Patrick M. Yott)
Date: Thu, 18 Sep 1997 8:17:06 PDT
Reply-To: jajacobs@odwin.ucsd.edu

patrick,

another for iassist ftp archive.

here is a newer version of cio ("cio3") that only works with perl5.
it has some added features including reading filenames on command line
or from stdin, and checking to make sure a file has newlines if no
lrl is specified.

----------------------------------------------------------------------
#!/opt/perl5/bin/perl

	# perl4.001 #!/usr/local/bin/perl
'di';
'ig00';

$using_wrapman = 1;

# perl 5 version


######################################################################
# $Source: /ssdb/jajacobs/bin/RCS/cio3,v $
# $Author: jajacobs $
# $Revision: 1.1 $
# $Date: 97/09/18 08:08:23 $
# $State: Exp $
######################################################################

&setup ;
&infile;
if ($infile) {
	while (@ARGV) {
		$file = shift @ARGV;
		if (-e $file) {
			if (open ($IN, "$file") || die "could not open") {
print "
############### EXAMINING FILE: $file ###############

";

			}
		} else {
			print "
ERROR: no file \"$file\"!  cio exiting.

";


			exit (1);
		}
	}
	if (!$file) {
		print "############################
	NO INFILE! $file 

examples: 

   cio  datafile 
   cio < datafile -s
   cat datafile | cio  -s
   zcat compressed_datafile.Z | cio  -s



";
		exit;
	} 
	
#&print_header ;
&get_record ;
#&measure_record ;
#&check_for_bad_chars ;
&last_recs;
#&print_report ;
} else {

#&print_header ;
&get_record ;
#&measure_record ;
&check_for_bad_chars ;
&last_recs;
#&print_report ;

}
 
&end_rpt ;

sub setup {
######################################################################
# set up vars.
# command line options:
#	c#	"count" show # lines at beginning and end 
#	e	$e_rec not used
#	f#	force cio to processs # of "bad" records
#	h	show help screen
#	l	logical record length (no newlines)
#	m	show man page
#	n#	show ever #th record
#	q	quick option: only first 5 and last 5 recs examined
#	s	take input from stdin
#	v 	version number
######################################################################


$chunk = 32767 ;
$width = 75;
$rec = 0; $maxrec = 0; $minrec = 99999;
$n = 1;
$f = 1;
$a = 0;
$zero = "end record";
$version = "2.1 (changes in progress)";


require "getopts.pl";
&Getopts('n:l:f:c:e:hqmsv') ;

if ($opt_h) {
	&help;
}

if ($opt_m) {
	&Usage();
}

if ($opt_f) {
	$max_bad = $opt_f;
} else {
	$max_bad = 5;
}

if ($opt_c) {
	$end = $opt_c + 1 ;
	$count = $opt_c;
} else {
	$end = 6;
	$count = 5;
}

if ($opt_e) {
	$e_rec = $opt_e;
} else {
	$e_rec = 5;
}

if ($opt_l) {
	$chunk = $opt_l ;
	if ( $opt_l > 0  && $opt_l < 32767 ) {
		print "debug $opt_l\n";
	} else {
		print "ERROR: bad value for -l flag: $opt_l\n";
		exit;
	}
}

if ($opt_v) {
	print "
	cio version:    $version
	program name:   $basename$0
	perl version:   $]

";
	exit ;
}


} 1 ;# end of sub setup

sub infile {
	if ($opt_s) {
		$IN = "STDIN";
	} else {
		$IN = "IN" ;
		$infile = 1;
	}
} 1 ; # end of sub infile



sub get_record {
	while (read ($IN, $tmp, $chunk) ) {
		$chunk_count++ ;

		if (!$opt_l) {
			if (!( $tmp =~ /\n/ )) {
				$length = length($tmp);
				print STDERR "
ERROR: No Newlines!  cio exiting.
	Chunk number $chunk_count was $length bytes, had no newline delimiter,
	and no lrl set.

	Use the -l flag to set the logical record length of files
	that have no newline characters.

	(cio reads in chunks of $chunk bytes and needs either a
	newline in each chunk or an lrl set to less than $chunk.
	for more information type: 
		$0 -m 
	for the man page.)
";
			exit (1);




			} else {
				if ($rec == 0) {
					&print_header ;
				}
				$length = length ($tmp) ;
				#print "ok---------- $length\n";
				$record =  $remainder . $` ;
				$rec++;
				$remainder = $' ;
				&measure_record ;
				&bad_chars;
				#print "$record\n";
				while ($remainder) {
					$tmp = $remainder ;
					if ( $tmp =~ /\n/ ) { 
						$record =  $` ;
						$rec++;
						$remainder = $' ;
						#print "$record\n";
						&measure_record ;
						&bad_chars ;
						#print "." ;
						#print "$record\n";
					}  else {
						#print "last\n";
						last;
					}
				}
			}
		} else { 			# end of if not opt_l
			if ($rec == 0) {
				&print_header ;
			}
			$rec++ ;
			$record = $tmp;
			&measure_record ;
			&bad_chars ;
			#print "$record\n";
		}

	} # end of while read

}  1 ; # end of sub get_record

sub measure_record {
######################################################################
# examine records for length and for printing samples recs 
######################################################################

	############################################################
	# get length of current record
	############################################################

$len = length($record); 
#if (!$opt_l&&!$opt_f) {

	##########################################################
	# check for minimum and maximum record lengths
	###########################################################

    $minrec = $len < $minrec ? $len : $minrec;
    $maxrec = $len > $maxrec ? $len : $maxrec;

	###########################################################
	#  count ($length) the number of records of this length ($len)
	#  note: associative array.
	##########################################################
    $length{$len}++;

	############################################################
	# if this is the first record of this length, keep track of the record 
	# number of this record as $onerec[$len] 
	# Increment $a to keep track of how many unique record lengths there
	# are in this file.
	############################################################
    if ($length{$len} == 1) {
	if ($tailing) {
		$onerec[$len] = "last few" ;
		$a++;
	} else { 
		$onerec[$len] = $rec ;
		$a++;
	}
     }

   ######################################################################
   # check for 'n' value and if on first $count records.
   # (if the record number is not a multiple of opt_n, don't write the line.)
   # (if the record number is in first $count or if it is a multiple of 
   # opt_n, write the line.)
   # create a short record $line for writing.
   # check for length of original line and choose appropriate end of line
   # character.
   ######################################################################

	if ($opt_n) {
	    	$lastline[$n] = substr($record, 0, ($width - 5));
		$lastrec[$n] = $rec;
		if (length $record > ($width - 5)) {
			$lastcont[$n] = '>';
		} else {
			$lastcont[$n] = '|';
		}
		
	    	unless ( $rec % $opt_n) {
	    		$line = $lastline[$n]; 
			$cont = $lastcont[$n];
			write;
		
    		} elsif  ($rec <= $count) { 
	    		$line = $lastline[$n]; 
			$cont = $lastcont[$n];
			write;
		}	
		$n++;
		if ($n == $end) {
			$n = 1;
		}
    	} else {
	    	$lastline[$n] = substr($record, 0, ($width - 5));
		$lastrec[$n] = $rec;
		if (length $record > ($width - 5)) {
			$lastline[$n] = $lastline[$n] ;
			$lastcont[$n] = '>';
		} else {
			$lastline[$n] = $lastline[$n] ;
			$lastcont[$n] = '|';
		}
     		if  ($rec <= $count) { 
	    		$line = $lastline[$n] ; 
			$cont = $lastcont[$n];
			write;
		}
		$n++;
		if ($n == $end) {
			$n = 1;
		}
	}
#}  # end of if not opt_l and not opt_f 

}  # end of sub measure_record 


sub check_for_bad_chars {
}



sub help {
	print <<EOF;

cio version:    $version
program name:   $basename$0
perl version:   $]


This /usr/bin/perl script checks data files.  Reads files on command
line or reads from STDIN if given the -s flag.  Writes to STDOUT.  
Prints part of file for examination. Assumes datafile has newlines, 
unless -l flag used.  If -l flag is not used, checks to make sure 
there are newlines and bails if it cannot find them.
Reports:  - minimum and maximum record lengths,
          - number of records of each record length,
          - for records of unique record length reports the record number.
  
By default, prints first 70 columns of first 5 records.  (Inserts a "|" at 
the end of lines 70 or shorter and a ">" at the end of lines or record length 
71 and longer.   Also prints the last 5 records.
Flags:	
	-c#   "count" show # lines at beginning and end 
	-e#  specify number of last lines instead of last 5
 	-f#  force cio to processs # of "bad" records
	-h   prints this help screen.
	-l#  give lrecl for files with no newlines.
	-m   print man page.
	-n#  prints 1st 5 lines and every nth record. 
	-q   quick mode: prints only 5 lines, (opt.: first nth record) and rpt.
	-s   read from stdin
	-v   prints cio version.
 Examples:
   $basename$0  datafile
   $basename$0  datafile -q 
   $basename$0  datafile -q -n 500
   $basename$0  datafile -l80 -n 500
   $basename$0  datafile > reportfile
   zcat compressed_datafile.Z | $basename$0 -s
   zcat compressed_datafile.Z | $basename$0 -s -n 1000 | more
   cat datafile | $basename$0  -s
EOF
exit(1);

} #end of help.

######################################################################
# sub for writing last records
######################################################################
sub last_recs {

if (!$opt_q) {

	$n--;

	if ($n == $end) {
		$f = 1; 

	} else {
		$f = $n +1;
	}

	if ($opt_c) {
		print "\nLAST $opt_c RECORDS: \n\n";
	}
	else {
		print "\nLAST ", $end-1, " RECORDS:\n\n";
	}

	#################################################################
	# set up STUFF2 as output format for write.
	#################################################################
	$~ = "STUFF2";

	$z = 1;
	while ( $z < $end) {
		write ;
		$z++;
		$f++;
		if ($f == $end) {
			$f = 1;
		}
	}
} #end of writing last recs

} #end of last_recs sub


sub end_rpt  {
######################################################################
#  do end report
######################################################################


	##################################################
	# if we bailed because we were using opt_l
	# and we read more than 32768 bytes without encountering 
	# newline char, skip the ending printing...
	# call this the no_newline exclusion
	##################################################
if (!$no_newline) {

$~ = 'STDOUT';

	##################################################
	# print bottom ruler, min and max rec lengths
	# and number of records checked or found
	##################################################

print "------ +----*----+----*----+----*----+----*----+----*----+----*----+----*----+\n";
print "                10        30        30        40        50        60 \n";

print "\nMax rec len:	", $maxrec, "\n";
print "Min rec len:	", $minrec, "\n";

	if ($opt_q||$bail||$e_bail) {
		print "Total number of records checked: ", $rec, "\n";
	} else {
		print "Total number of records: ", $rec, "\n" ;
	}
print "\n";

if ($cr) {
	print "------------------------------------------------\n";
	print "NOTE: This file contains $cr Carriage Returns.\n";
	print "------------------------------------------------\n";
}

   ######################################################################
   # make a new array $l to store both the length of records and
   # the number of records of each length. 
   ######################################################################

$i = 0;
foreach $x (keys %length) {
	$l[$i++] = "$x:$length{$x}";
	#print "$l\n";
    		
}


   ######################################################################
   # sort the records/length array @l  to array @sorted
   # use reverse sort so that final array will print with most frequent
   # record size first, least frequent record size last.
   ######################################################################

@sorted = reverse sort sortlengths @l ;

   ######################################################################
   # split the @sorted array to get its separate values for the 
   # record size and numbers of records of that size.
   #
   # write the records-sizes and count of record-sizes.
   # use stdout format to print  counts of recs of each size
   # use ONE format to write if $s_count == 1.
   ######################################################################

$i = 0;

while ( $i < $a) {
	
	($s_length, $s_count) = split(/:/,$sorted[$i]);
	if ($s_count == 1) {
		$~ = ONE ;
		write ;
		$sorted[$i] = "";
		$l[$i] = "";
		$i++;
	} else {
		$~ = STDOUT;
		write STDOUT;
		$sorted[$i] = "";
		$l[$i] = "";
		$i++;
	}
}

	############################################################
	# print notes on lowercase, uppercase and tabs if found
	############################################################
print "\n";

	if ($lowercase) {
		print "NOTE: Data has lowercase characters beginning with record: $lowercase_rec:\n----\n$lowercase_\n----\n";
	}
	if ($uppercase) {
		print "NOTE: Data has uppercase characters beginning with record: $uppercase_rec:\n----\n$uppercase_\n----\n";
	}

if ($tab) {
	print "NOTE: This file has tabs beginning in record $tab_rec: \n---\n$tab_\n---\n";
	}
$g = 1;

	############################################################
	# print report on bad chars found, if any
	############################################################
if ($e_count) {
	if ($e_bail) {
		;
	} else {
		$e_percent = ($e_count / $total) * 100;
		if ($e_percent >= 90) {
			$bail = 1;
print "###########
NOTE: $e_percent% of the $total characters examined appear 
to be EBCDIC, not ASCII.  
If you wish to re-analyze this file and let cio process more records, 
use the -f flag to force cio to analze this file anyway.  
Use the -f flag to specify how many records to analyze before ceasing.\n";
&clean_up;
&end_rpt;
		}
	}
} # end of e_count option
if ($bad_n) {
 	while ($g < ($bad_n+1) )	 {
		printf ("NOTE: Record %d has a bad character: (octal:%3o)\n", $bad_rec[$g], $bad_char[$g]);
		$g++;
	}
}

	############################################################
	# print message if the file looks like ebcdic
	############################################################

if ($e_count) {
	if ($e_count == 1) {
print "NOTE:	out of a total of $total characters checked,
	there is $e_count characters that may be EBCDIC\n";
	} else {
print "NOTE:	out of a total of $total characters checked,
	there are $e_count characters that may be EBCDIC\n";
	}
}

	############################################################
	# if no bad chars found, print good news
	############################################################

if (!$bad_n && !$e_bail) {
	print "NOTE:  Good News!  cio didn't find any bad characters!\n";

	if ($cr) {
		print "...except for those $cr Carriage Returns!\n";
	}
}


	############################################################
	# if we're reading files named on command line, 
	# print that we're done with this one.
	############################################################

if ($file) {
print "
########## END OF REPORT FOR FILE $file ##########

";
}

} #  end of no_newline exclusion

&clean_up;

} # end of sub end_rpt 


######################################################################
# subroutine for sorting. 
######################################################################

sub sortlengths {
	local ($x1, $y1) = split(/:/, $a);
	local ($x2, $y2) = split(/:/, $b);
	$y1 <=> $y2;
}


######################################################################
sub print_header {

$~ = "STUFF";

print "                10        20        30        40        50        60 \n";
print "------ +----*----+----*----+----*----+----*----+----*----+----*----+----*----+\n";

}



###############################################################################
# sub clean_up resets values in case there is another file to examine.
###############################################################################

sub clean_up {

if ($opt_f) {
	$max_bad = $opt_f;
} else {
	$max_bad = 5;
}

if ($opt_c) {
	$end = $opt_c + 1 ;
	$count = $opt_c;
} else {
	$end = 6;
	$count = 5;
}

if ($opt_e) {
	$e_rec = $opt_e;
} else {
	$e_rec = 5;
}

$width = 75;
$rec = 0; $maxrec = 0; $minrec = 99999;
$n = 1;
$f = 1;
$a = 0;

$b = 0;
$bad_n = 0;
$bad_count = 0;
$cr_add = 0;
$e_count = 0;
$i = 0;
$len = 0;
$lowercase = 0;
$lowercase_rec = 0;
$lowercase_ = 0;
$no_newline = 0;
$one_bad = 0;
$s_count = 0;
$s_length = 0;
$tab = 0;
$tab_rec = 0;
$tab_ = 0;
$total  = 0;
$uppercase = 0;
$uppercase_rec = 0;
$uppercase_ = 0;

foreach $key (keys %length) {
	delete $length{$key};
}
foreach $key (keys %onerec) {
	delete $onerec{$key};
}
foreach $key (keys %bad_char_rec) {
	delete $bad_char_rec{$key};
}

} #end of sub clean_up


sub bad_chars {
######################################################################
# examine records for bad bytes.
######################################################################

$text = "has a bad character: ";
	$total = $total + $len ;

	if  (!$lowercase)  {
		if ($record =~ (/[a-z]/) ) {
			$lowercase = 1;
			$lowercase_rec = $rec; 
			$lowercase_ = $record ;
		}
	}

	if (!$uppercase)  {
		if ($record =~ (/[A-Z]/) ) {
			$uppercase = 1;
			$uppercase_rec = $rec;
			$uppercase_ = $record ;
		}
	}
	
	if (!$tab) {
		if ($record =~ (/\011/) ) {
			$tab = 1;
			$tab_rec = $rec;
			$tab_ = $record;
		}
	}
	if ($record =~ (/[^a-zA-Z0-9_ ]/) ) {

		if ($record =~ (/[\000-\037]/) ) {

	if ($record =~ (/\000/) ) {
		&bad_add;}
	if ($record =~ (/\001/) ) {
		&bad_add;}
	if ($record =~ (/\002/) ) {
		&bad_add;}
	if ($record =~ (/\003/) ) {  
		&bad_add;}
	if ($record =~ (/\004/) ) {  
		&bad_add;}
	if ($record =~ (/\005/) ) {  
		&bad_add;}
	if ($record =~ (/\006/) ) {  
		&bad_add;}
	if ($record =~ (/\007/) ) {  
		&bad_add;}
	if ($record =~ (/\010/) ) {  
		&bad_add;}
	if ($record =~ (/\012/) ) {  
		&bad_add;}
	if ($record =~ (/\013/) ) {  
		&bad_add;}
	if ($record =~ (/\014/) ) {  
		&bad_add;}
	if ($record =~ (/\015/) ) {  
		&cr_add;}
	if ($record =~ (/\016/) ) {  
		&bad_add;}
	if ($record =~ (/\017/) ) {  
		&bad_add;}
	if ($record =~ (/\020/) ) {  
		&bad_add;}
	if ($record =~ (/\021/) ) {  
		&bad_add;}
	if ($record =~ (/\022/) ) {  
		&bad_add;}
	if ($record =~ (/\023/) ) {  
		&bad_add;}
	if ($record =~ (/\024/) ) {  
		&bad_add;}
	if ($record =~ (/\025/) ) {  
		&bad_add;}
	if ($record =~ (/\026/) ) {  
		&bad_add;}
	if ($record =~ (/\027/) )  {  
		&bad_add;}
	if ($record =~ (/\030/) ) {  
		&bad_add;}
	if ($record =~ (/\031/) ) {  
		&bad_add;}
	if ($record =~ (/\032/) ) {  
		&bad_add;}
	if ($record =~ (/\033/) ) {  
		&bad_add;}
	if ($record =~ (/\034/) ) {  
		&bad_add;}
	if ($record =~ (/\035/) ) {  
		&bad_add;}
	if ($record =~ (/\036/) ) {  
		&bad_add;}
	if ($record =~ (/\037/) ) {
		&bad_add;}

} #end of 000 to 037 loop
		if ($record =~ (/[\177-\377]/) ) {

	if ($record =~ (/\177/) ) {  
		&bad_add;}
	if ($record =~ (/\200/) ) {  
		&bad_add;}
	if ($record =~ (/\201/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\202/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\203/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\204/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\205/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\206/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\207/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\210/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\211/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\212/) ) {  
		&bad_add;}
	if ($record =~ (/\213/) ) {  
		&bad_add;}
	if ($record =~ (/\214/) ) {  
		&bad_add;}
	if ($record =~ (/\215/) ) {  
		&bad_add;}
	if ($record =~ (/\216/) ) {  
		&bad_add;}
	if ($record =~ (/\217/) ) {  
		&bad_add;}
	if ($record =~ (/\220/) ) {  
		&bad_add;}
	if ($record =~ (/\221/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\222/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\223/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\224/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\225/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\226/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\227/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\230/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\231/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\232/) ) {  
		&bad_add;}
	if ($record =~ (/\233/) ) {  
		&bad_add;}
	if ($record =~ (/\234/) ) {  
		&bad_add;}
	if ($record =~ (/\235/) ) {  
		&bad_add;}
	if ($record =~ (/\236/) ) {  
		&bad_add;}
	if ($record =~ (/\237/) ) {  
		&bad_add;}
	if ($record =~ (/\240/) ) {  
		&bad_add;}
	if ($record =~ (/\241/) ) {  
		&bad_add;}
	if ($record =~ (/\242/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\243/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\244/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\245/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\246/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\247/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\250/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\251/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\252/) ) {  
		&bad_add;}
	if ($record =~ (/\253/) ) {  
		&bad_add;}
	if ($record =~ (/\254/) ) {  
		&bad_add;}
	if ($record =~ (/\255/) ) {  
		&bad_add;}
	if ($record =~ (/\256/) ) {  
		&bad_add;}
	if ($record =~ (/\257/) ) {  
		&bad_add;}
	if ($record =~ (/\260/) ) {  
		&bad_add;}
	if ($record =~ (/\261/) ) {  
		&bad_add;}
	if ($record =~ (/\262/) ) {  
		&bad_add;}
	if ($record =~ (/\263/) ) {  
		&bad_add;}
	if ($record =~ (/\264/) ) {  
		&bad_add;}
	if ($record =~ (/\265/) ) {  
		&bad_add;}
	if ($record =~ (/\266/) ) {  
		&bad_add;}
	if ($record =~ (/\267/) ) {  
		&bad_add;}
	if ($record =~ (/\270/) ) {  
		&bad_add;}
	if ($record =~ (/\271/) ) {  
		&bad_add;}
	if ($record =~ (/\272/) ) {  
		&bad_add;}
	if ($record =~ (/\273/) ) {  
		&bad_add;}
	if ($record =~ (/\274/) ) {  
		&bad_add;}
	if ($record =~ (/\275/) ) {  
		&bad_add;}
	if ($record =~ (/\276/) ) {  
		&bad_add;}
	if ($record =~ (/\277/) ) {  
		&bad_add;}
	if ($record =~ (/\300/) ) {  
		&bad_add;}
	if ($record =~ (/\301/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\302/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\303/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\304/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\305/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\306/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\307/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\310/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\311/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\312/) ) {  
		&bad_add;}
	if ($record =~ (/\313/) ) {  
		&bad_add;}
	if ($record =~ (/\314/) ) {  
		&bad_add;}
	if ($record =~ (/\315/) ) {  
		&bad_add;}
	if ($record =~ (/\316/) ) {  
		&bad_add;}
	if ($record =~ (/\317/) ) {  
		&bad_add;}
	if ($record =~ (/\320/) ) {  
		&bad_add;}
	if ($record =~ (/\321/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\322/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\323/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\324/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\325/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\326/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\327/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\330/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\331/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\332/) ) {  
		&bad_add;}
	if ($record =~ (/\333/) ) {  
		&bad_add;}
	if ($record =~ (/\334/) ) {  
		&bad_add;}
	if ($record =~ (/\335/) ) {  
		&bad_add;}
	if ($record =~ (/\336/) ) {  
		&bad_add;}
	if ($record =~ (/\337/) ) {  
		&bad_add;}
	if ($record =~ (/\340/) ) {  
		&bad_add;}
	if ($record =~ (/\341/) ) {  
		&bad_add;}
	if ($record =~ (/\342/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\343/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\344/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\345/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\346/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\347/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\350/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\351/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\352/) ) {  
		&bad_add;}
	if ($record =~ (/\353/) ) {  
		&bad_add;}
	if ($record =~ (/\354/) ) {  
		&bad_add;}
	if ($record =~ (/\355/) ) {  
		&bad_add;}
	if ($record =~ (/\356/) ) {  
		&bad_add;}
	if ($record =~ (/\357/) ) {  
		&bad_add;}
	if ($record =~ (/\360/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\361/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\362/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\363/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\364/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\365/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\366/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\367/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\370/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\371/) ) {  
		$e_count++;
		&bad_add;}
	if ($record =~ (/\372/) ) {  
		&bad_add;}
	if ($record =~ (/\373/) ) {  
		&bad_add;}
	if ($record =~ (/\374/) ) {  
		&bad_add;}
	if ($record =~ (/\375/) ) {  
		&bad_add;}
	if ($record =~ (/\376/) ) {  
		&bad_add;}
	if ($record =~ (/\377/) ) {  
		&bad_add;}

	} # end of 177-377

} #end of looking for not a-z not A-Z not 0-9 not " _"

if ($rec >= $max_bad) {
	$e_percent = ($e_count / $total) * 100;
	if ($e_percent >= 90) {
		$bail = 1;
		$e_bail = 1;
	}
}

if ($one_bad) {
	$bad_count++;
	$one_bad = 0;
}


} # end of sub bad_chars


######################################################################
# sub bad_add called by sub bad_chars
######################################################################
sub bad_add {
		$one_bad = 1;
		$bad_n++;
		$bad_char_rec[$bad_count] = $record;
		$stuff = unpack (C,$&);
 		$bad_rec[$bad_n] = $rec; 
		$bad_char[$bad_n] = $stuff ;

} # end of sub bad_add



######################################################################
# formats for writes:
######################################################################

format STDOUT =
There are @>>>>>>>>>>> records of length @>>>>>>>>>. First one is: @<<<<<<<<<
$s_count, $s_length, $onerec[$s_length]
.

format ONE =
There is  @>>>>>>>>>>> record  of length @>>>>>>>>>. Rec. number: @<<<<<<<<<
$s_count, $s_length, $onerec[$s_length]
.

format STUFF =
@>>>>> |@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$rec, $line.$cont 
.

format STUFF2 =
@>>>>> |@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$lastrec[$f], $lastline[$f].$lastcont[$f]
.

format STUFF3 =
@>>>>> |@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$rec, $LLine.$llastcont
.


sub Usage {
	local($pager);
	
	$, = ": ";
#######################################################################
# insert check for which more which less if exist more, etc...
# here...
#######################################################################

	$w_pager = `which more` ;

  $pager = ($ENV{'MANPAGER'} || $ENV{'PAGER'} || $w_pager || '/usr/bin/more');

	if (@_) {
		print STDERR $progName, @_;
		print "\n";
		print "Hit <return> for manpage, interrupt to abort...";
		$_ = <STDIN>;
	}
	exec("nroff -man $0 | $pager");

}
###############################################################

    # These next few lines are legal in both Perl and nroff.

.00;                       # finish .ig
 
'di           \" finish diversion--previous line must be blank
.nr nl 0-1    \" fake up transition to first page again
.nr % 0         \" start at page 1
'; __END__ ##### From here on it's a standard manual page #####
.TH cio 1 "January 17, 1994"
.AT 3
.SH NAME
cio \- "Check It Out" -- examines datafiles and reports on contents. 
.SH SYNOPSIS
.B cio [-ehqln] 
.SH DESCRIPTION
.I cio

This /usr/bin/perl script checks a datafile and creates a report on
the contents of the datafile (see below). 

Cio assumes datafile has newlines, unless -l flag is used (see below).
The terms "record" and "line" are used synonymously in this
description of cio.

Cio reads from stdin. 

Cio creates a report on the datafile it examines and writes this
report to stdout.  The report includes: the minimum and maximum record 
lengths in a datafile, the number of records of each length, and, for 
records that have a unique length, it reports the record number. The 
report also prints part of file for examination. 

By default the report prints the first 70 columns of the first 5
records and the last 5 records.  The report prints the "|" at the 
end of each record of 70 or fewer characters and a ">" at the end of 
lines of record length 71 and longer.  Additional lines may be printed 
by using
the 
.B -e
and
.B -n 
flags (see below).

.SH OPTIONS
	-e#	Instead of including the last 5 records, the report
		includes the last # of records specified by the -e
		flag.

	-h	Prints short help message. 

	-l#	By default, cio assumes that a file being examine has
		a newline character delimiting each record.  For those
		files that do not have newline characters but that do
		have fixed length records, the -l flag followed by a
		number may be used to specify the "logical record length"  
		(lrl) The number following the -l is the record length used
		by cio to examine the file.  A number must be specified.

	-m	Prints this man page.

	-n#	The report includes the first 70 characters of the
		first 5 records automatically.  Additional records may be
		included by using the -n option followed by a number.
		Cio will then print every nth record in the file as
		specified by the number following the -n flag.  A
		number must be specified.

	-q	Large files may take a few minutes to process.  The -q
		flag is the "quick" option.  It produces a report with
		the first 5 records of the file, plus the first -n
		record (if specified) and reports record lengths based
		on its examination of the first n + 5 records only.

	-v  	Prints cio version number, program name, perl version.

.SH EXAMPLES

   cio -h

	Prints brief help message.

   cio -m 

	Prints this man page.

   cio < datafile

	This reads "datafile" and sends report to stdout. The report
	includes the first 70 characters of the first 5 records. 

   cio < datafile > reportfile

	As above, but writes report to "reportfile".

   cat datafile | cio 

	Since cio reads from stdin, it can take the output of any
	pipe.

   zcat compressed_datafile.Z | cio 

	As above, but reads data in "compressed_datafile.Z". 

   zcat compressed_datafile.Z | cio | more

	Naturally, reports can be piped through a pager such as
	"more".

   cio < datafile -n 500

	This reads "datafile" and sends report to stdout. The report
	includes the first 70 characters of the first 5 records and
	every 500th record (e.g., records 500, 1000, 1500, etc.) in 
	the file.

   cio < datafile -q 

	This reads "datafile" in the "quick" mode. The report is based
	on an examination of only the first 5 records of "datafile."
	The first 70 characters of records 1-5 are printed in the 
	report.

   cio < datafile -q -n 500

	This reads "datafile" in the "quick" mode. The report is based
	on an examination of only the first 5 records of "datafile."
	The first 70 characters of records 1-5 and record 500 are
	printed in the report.

   cio < datafile -l 80 -n 500

	"datafile" is assumed to be a datafile with no newline
	characters and a fixed record length ("logical record length")
	of 80 characters.  The report includes the first 70 characters 
	of the first 5 records and every 500th record in the file.

.SH ENVIRONMENT
No environment variables are used.
.SH FILES
None.
.SH AUTHOR
Jim Jacobs
.SH "SEE ALSO"

.SH DIAGNOSTICS

.SH BUGS



