#!/usr/bin/perl 'di'; 'ig00'; # # $Header$ # # $Log$ $using_wrapman = 1; ################################################################## # this perl script converts an osiris type 1 data dictionary # written in ebcdic characters and binary values into all ascii # characters. # it includes a table that assigns all printable # characters to an array; the order of the list is the # order of the ebcdic character set. values which have no # assigned character in ebcdic are assigned ^. ################################################################## #******************************************************************************* # Name : Usage # Purpose : Prints out usage message and bails # Arguments : 1 - String containing useful error message. # Return Value : None # Calls : Exit # Globals Accessed : # Notes : #******************************************************************************* sub Usage { local($pager); $, = ": "; $pager = ($ENV{'MANPAGER'} || $ENV{'PAGER'} || '/usr/bin/more'); if (@_) { print STDERR $progName, @_; print "\n"; print "Hit for manpage, interrupt to abort..."; $_ = ; } exec("nroff -man $0 | $pager"); } #******************************************************************************* # Name : ParseArg # Purpose : Parses options from the command line (or a # file...). Expects arguments to be as specified # above. Leading "-" is optional -- this allows # reading in of arguments from a format file. # Arguments : 1 - argument word. # Return Value : non-zero if illegal flag. # Calls : ReadFormat (if a format-file is specified). # Globals Accessed : All of the above globals. # Notes : Hmmmm..... #******************************************************************************* sub ParseArg { local ($cmdline, @args) = @_; local ($o, $l); local ($b, $e); local ($x); local (@files) = (); PARSEARGS: while ($_ = shift @args) { # If reading ARGV, check for leading hyphen, otherwise it's a filename if ($cmdline && !s/^-//) { push(@files, $_); next PARSEARGS; } # Parse for -help option &Usage() if (/^h(elp)?$/); # if we know it's a flag (has a preceding minus), generate an error # cuz if we got to here, it's not a valid option if ($cmdline) { # generate an error if it's not a valid command-line option &Usage("Bad Option: $_") if $cmdline; } } } # MAIN beins here. ###################################################################### # Parse command line arguments. Anything not beginning with a "-" (with # the exception of "-" and "--") is taken to be a filename ###################################################################### @files = &ParseArg(1, @ARGV); # If no files specified, make STDIN the input file. @files = ("-") unless (@files); ################################################################## # set up the array of characters for conversion from # ebcdic to ascii. ################################################################## @ebcdic = ( '^', '^', '^', '^', '^', "\t", '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', "\n", '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', ' ', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '.', '<', '(', '+', '|', '&', '^', '^', '^', '^', '^', '^', '^', '^', '^', '!', '$', '*', ')', ':', '~', '-', '/', '^', '^', '^', '^', '^', '^', '^', '^', '|', ',', '%', '_', '>', '?', '^', '^', '^', '^', '^', '^', '^', '^', '^', '`', ':', '#', '@', "'", '=', '"', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '^', '^', '^', '^', '^', '^', '^', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', '^', '^', '^', '^', '^', '^', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '^', '^', '^', '[', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', '^', ']', '^', '^', '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', '^', '^', '^', '^', '^', '^', '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '^', '^', '^', '^', '^', '[', "\\", '^', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '^', '^', '^', '^', '^', ']', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '^', '^', '^', '^', '^', '^'); ################################################################## # create some arrays that we'll use later to print # character part of file. ################################################################## @chars1 = (0); @chars2 = (6..29); @chars3=(40..46); @chars4=(47..54, 58..62, 71..74); $recno = 0; ################################################################## # here's a ruler for the top of the file for debugging. ################################################################## #printf "00000000011111111112222222222333333333344444444445555555555666666666677777777778\n12345678901234567890123456789012345678901234567890123456789012345678901234567890\n----.----*----.----*----.----*----.----*----.----*----.----*----.----*----.----*\n"; ################################################################## # process all the records except the first one: recno = 0 # read 80 bytes into an array $record, # unpack the array into characters and binary numbers # print the binary numbers as ints and convert the characters from # ebcdic to ascii and print them. ################################################################## while (read(STDIN, $record, 80)) { if ($recno) { @array = unpack ('C2 n C C C24 n n C n C43', $record) ; @chararray = unpack ('C80', $record) ; foreach $char (@chars1) { $hex = $chararray[$char]; $chararray[$char] = $ebcdic[$hex]; printf "%s", $chararray[$char]; } printf "%5lu %1u %1u ", @array[2,3,4]; foreach $char (@chars2) { $chararray[$char] = $ebcdic[$chararray[$char]]; printf "%s", $chararray[$char]; } printf "%5lu %5lu %1u %5lu ", @array[29,30,31,32]; foreach $char (@chars3) { $chararray[$char] = $ebcdic[$chararray[$char]]; printf "%s", $chararray[$char]; } printf " "; foreach $char (@chars4) { $chararray[$char] = $ebcdic[$chararray[$char]]; printf "%s", $chararray[$char]; } printf "\n"; ################################################################## # for recno = 0 i.e., the first record, # process it differently because it has a different structure. ################################################################## } else { @array = unpack ('N N N', $record) ; printf "%-4lu %-4lu %-4lu ", @array[0,1,2]; printf " \n"; } $recno++; } ############################################################### # the rest of this file is the man page. it can be # viewed by calling the prgram with the -h flag (i.e., o2a -h) # or by sending this file through nroff -man. ############################################################### # These next few lines are legal in both Perl and nroff. .00; # finish .ig 'di \" finish diversion--previous line must be blank .nr nl 0-1 \" fake up transition to first page again .nr % 0 \" start at page 1 '; __END__ ##### From here on it's a standard manual page ##### .TH O2A 1 "October 22, 1993" .AT 3 .SH NAME o2a \- Osiris Dictionary to Ascii converter .SH SYNOPSIS .B o2a [-h] < osiris_file_in_ebcdic > converted_file_in_ascii .SS OPTIONS .TP 1.25i .B -h prints this help message using nroff -man. .SH DESCRIPTION .B o2a is a perl(1) script that takes an .B Osiris Type-1 Data Dictionary File written in EBCDIC characters and binary numbers and converts it to an .B OSIRIS Data Dictionary written in ASCII characters only. O2a takes input from STDIN and outputs to STDOUT. .B OSIRIS Dictionaries are used to describe OSIRIS Data Files. .B OSIRIS Data Files are simple text files with one record (line) per record. The OSIRIS Dictionary describes the structure of the Data File and the location, size, name and other information about each variable in the Data File. OSIRIS Type-1 Dictionaries are written with EBCDIC characters and certain numbers encoded in binary format. The OSIRIS software and certain other statistical software can read these Dictionary Files directly, enabling the analyst to easily describe the Data File to the statistical software without having to type in the variable locations and descriptions. Unfortunately, not all software can successfully read the OSIRIS Type-1 Dictionary. O2a can be used to convert an OSIRIS Type-1 Dictionary File to ASCII characters. A .B Codebook, provided below, describes the layout of the ASCII output of o2a so that it can be used as a Dictionary with any software capable of reading ASCII characters. .SH CODEBOOK The Codebook on the following pages gives the layout of the .B OSIRIS Dictionary in ASCII that is output by o2a. Note that each record (line) in the Dictionary has 80 characters followed by a linefeed character (decimal 10 -- noted as "LF" in the codebook). Note also that the Dictionary consists of a one line .B "Dictionary Descriptor Record" followed by multiple .B "Variable Descriptor Records," one line for each variable in the Data File. The Codebook below describes these two kinds of records that are output by o2a. The Dictionary can be read with standard statistical software, if desired. .I Type-1 OSIRIS Dictionary -- ASCII Version .B Dictionary Descriptor Record Column Content --------------------------------------------------------- 1 1 2-5 (blank) 6-9 First Variable Number 10 (blank) 11-14 Last Variable Number 15-80 (blank) 81 LF .B Variable Descriptior Records Column Content --------------------------------------------------------- 1 T 2-6 Variable Number 7 (blank) 8 Character type and storage mode 0: Numeric, character mode 1: Alphabetic, character mode 2: Numeric, fixed-point binary mode 3: Numeric, floating-point binary 9 (blank) 10 Variable type 0: Single response 1: Multiple response 11 (blank) 12-35 Variable Name (or blanks) 36-40 Starting location of the variables within each data record 41 (blank) 42-46 Field width of one (or the only) response 1-9: numeric variables 1-255: alphabetic variables 47 (blank) 48 Number of decimal places 0-9: numeric variables 0: alphabetic variables 49 (blank) 50-54 Number of responses. If the variable has more than 1 response; columns 8-48 describe the first response and the remaining responses are assumed to be in adjacent fields of the same description. 55 (blank) 56-62 First missing data code (or blanks) 63 (blank) 64-70 Second missing data code (or blanks) 71 (blank) 72-75 Reference value (or blanks) 76-77 (blanks) 78-80 Study ID (or blanks) 81 LF Note that a Type-1 .B data file has, fixed-length records, i.e., the record length is equal to the sum of the widths of all of the response fields of all the variables. .SH ENVIRONMENT No environment variables are used. .SH FILES None. .SH AUTHOR Jim Jacobs, University of California, San Diego. jajacobs@ucsd.edu .SH BUGS No known bugs. Please inform the author of any.