Loading ...
Sorry, an error occurred while loading the content.

re: Splitting/extracting into temp files from a large master file

Expand Messages
  • baba_babuskha
    Hi, Below is my script and a sample input file that you must cut and paste into a separate file test.txt . The script worked great when the ID were all
    Message 1 of 1 , Oct 5, 2006
    • 0 Attachment
      Hi,

      Below is my script and a sample input file that you must cut and
      paste into a separate file "test.txt". The script worked great when
      the ID were all different numbers but now they can be the same and
      the only thing that distinguishes them from the others is what
      follows the "term". I want to be able to check to see if there is
      a "term" in the string_array and if so grab everything within the
      term="grab text" up to the closing angle bracket and pass that into
      the file being created.

      For example:

      <tag id="1(1)"> would output a external file called

      test-1-1.tmp


      <tag id="1(1) term="test"> would output a external file called

      test-1-1-test.tmp


      <tag id="1(1)" term="test two"> would output a external file called

      test-1-1-test-two.tmp


      <tag id="1(1)" term="last test"> would output a external file called

      test-1-1-last-test.tmp


      How would I go about this with my script below?

      Thanks much


      Syntax: perl test.pl tag tag c:\temp test


      #!/usr/bin/perl -w

      # - START PASTE
      #
      # Cut and past the below into an external file test.txt
      # <tag id="1(1)">
      #
      # AAA
      #
      # </tag>
      #
      # <tag id="1(1) term="test">
      #
      # BBB includes test
      #
      # </tag>
      #
      # <tag id="1(1)" term="test two">
      #
      # CCC includes test two
      #
      # </tag>
      #
      # <tag id="1(1)" term="last test">
      #
      # DDD includes last test
      # </tag>
      #
      # - END PASTE

      require 5.000;
      use Env;
      use strict;
      use Getopt::Long;

      # Pass filename parameter
      my $filename = $ARGV[0];
      my $starttag = $ARGV[1];
      my $endtag = $ARGV[2];
      my $outdir = $ARGV[3];
      my $ref_filename = $ARGV[4];


      #my $sf_filename;
      my $outfile;
      my $string;
      my $write=1;
      my @string_array;

      open (FILE, $filename) or die "cannot open $filename: $!\n";
      # For testing
      # print " Input: $filename\n";
      # print " Start: $starttag\n";
      # print " End: $endtag\n";
      # print "Destination (.tmp): $outdir\n\n";
      while (<FILE>) {
      if ($_ =~ /^<$starttag id=/) {
      $write=0;
      $string = $_;
      get_out_file_name();
      remove_old_file();
      }
      if ($write==0) {
      open (NEWFILE, ">>$outdir\\$ref_filename-$outfile")
      or die "cannot open $outdir\\$ref_filename-$outfile: $!
      \n";
      print NEWFILE $_;
      }
      if ($_ =~ /^<\/$endtag>$/) {
      $write=1;
      }
      }
      close NEWFILE;
      close FILE;

      #---------------------------------
      # subroutines
      #---------------------------------
      sub get_out_file_name {
      @string_array=split(/"+/, $string);


      print "STRING_ARRAY: @string_array\n";

      $outfile = $string_array[1] . ".tmp";

      print "OUTFILE: $outfile\n";

      $outfile=~s/\)\(/-/;
      $outfile=~s/\(/-/;
      $outfile=~s/\)/-/;
      $outfile=~s/\./_/;
      $outfile=~s/\-.tmp/.tmp/;
      $outfile=~s/-\(/-/;
      $outfile=~s/\).tmp/.tmp/;
      $outfile=~s/_tmp/.tmp/;
      $outfile=~s/\)\(/-/;
      $outfile=~s/\)\(/-/;
      $outfile=~s/\./_/;
      $outfile=~s/_tmp/.tmp/;
      $outfile=~s/ /_/;
      # ($ref_filename, undef) = split(/\./, $filename);
      @string_array=undef;}

      sub remove_old_file {
      print "Removing: $outdir\\$ref_filename-$outfile\n" if (-
      e "$outdir\\$ref_filename-$outfile");
      print " Output: $outdir\\$ref_filename-$outfile\n";
      if (-e "$outdir\\$ref_filename-$outfile") {
      unlink "$outdir\\$ref_filename-$outfile" };
      }
      #----------------------------------------------------------
    Your message has been successfully submitted and would be delivered to recipients shortly.