|
NAMEMCE::Shared::Handle - Handle helper classVERSIONThis document describes MCE::Shared::Handle version 1.876DESCRIPTIONA handle helper class for use as a standalone or managed by MCE::Shared.SYNOPSIS# non-shared or local construction for use by a single process # shorter, mce_open is an alias for MCE::Shared::Handle::open use MCE::Shared::Handle; MCE::Shared::Handle->open( my $fh, "<", "bio.fasta" ) or die "open error: $!"; MCE::Shared::Handle::open my $fh, "<", "bio.fasta" or die "open error: $!"; mce_open my $fh, "<", "bio.fasta" or die "open error: $!"; # construction for sharing with other threads and processes # shorter, mce_open is an alias for MCE::Shared::open use MCE::Shared; MCE::Shared->open( my $fh, "<", "bio.fasta" ) or die "open error: $!"; MCE::Shared::open my $fh, "<", "bio.fasta" or die "open error: $!"; mce_open my $fh, "<", "bio.fasta" or die "open error: $!"; # example, output is serialized, not garbled use MCE::Hobo; use MCE::Shared; mce_open my $ofh, ">>", \*STDOUT or die "open error: $!"; mce_open my $ifh, "<", "file.log" or die "open error: $!"; sub parallel { $/ = "\n"; # can set the input record separator while (my $line = <$ifh>) { printf {$ofh} "[%5d] %s", $., $line; } } MCE::Hobo->create( \¶llel ) for 1 .. 4; $_->join() for MCE::Hobo->list(); # handle functions my $bool = eof($ifh); my $off = tell($ifh); my $fd = fileno($ifh); my $char = getc($ifh); my $line = readline($ifh); binmode $ifh; seek $ifh, 10, 0; read $ifh, my($buf), 80; print {$ofh} "foo\n"; printf {$ofh} "%s\n", "bar"; open $ofh, ">>", \*STDERR; syswrite $ofh, "shared handle to STDERR\n"; close $ifh; close $ofh; API DOCUMENTATIONMCE::Shared::Handle->new ( )Called by MCE::Shared for constructing a shared-handle object.open ( filehandle, expr )open ( filehandle, mode, expr )open ( filehandle, mode, reference )In version 1.007 and later, constructs a new object by opening the file whose filename is given by "expr", and associates it with "filehandle". When omitting error checking at the application level, MCE::Shared emits a message and stop if open fails.# non-shared or local construction for use by a single process use MCE::Shared::Handle; MCE::Shared::Handle->open( my $fh, "<", "file.log" ) or die "$!"; MCE::Shared::Handle::open my $fh, "<", "file.log" or die "$!"; mce_open my $fh, "<", "file.log" or die "$!"; # ditto # construction for sharing with other threads and processes use MCE::Shared; MCE::Shared->open( my $fh, "<", "file.log" ) or die "$!"; MCE::Shared::open my $fh, "<", "file.log" or die "$!"; mce_open my $fh, "<", "file.log" or die "$!"; # ditto mce_open ( filehandle, expr )mce_open ( filehandle, mode, expr )mce_open ( filehandle, mode, reference )Native Perl-like syntax to open a file for reading:# mce_open is exported by MCE::Shared or MCE::Shared::Handle. # It creates a shared file handle with MCE::Shared present # or a non-shared handle otherwise. mce_open my $fh, "< input.txt" or die "open error: $!"; mce_open my $fh, "<", "input.txt" or die "open error: $!"; mce_open my $fh, "<", \*STDIN or die "open error: $!"; and for writing: mce_open my $fh, "> output.txt" or die "open error: $!"; mce_open my $fh, ">", "output.txt" or die "open error: $!"; mce_open my $fh, ">", \*STDOUT or die "open error: $!"; CHUNK IOStarting with "MCE::Shared" v1.007, chunk IO is possible for both non-shared and shared handles. Chunk IO is enabled by the trailing 'k' or 'm' for read size. Also, chunk IO supports the special "\n>"-like record separator. That anchors ">" at the start of the line. Workers receive record(s) beginning with ">" and ending with "\n".# non-shared handle --------------------------------------------- use MCE::Shared::Handle; mce_open my $fh, '<', 'bio.fasta' or die "open error: $!"; # shared handle ------------------------------------------------- use MCE::Shared; mce_open my $fh, '<', 'bio.fasta' or die "open error: $!"; # 'k' or 'm' indicates kibiBytes (KiB) or mebiBytes (MiB) respectively. # Read continues reading until reaching the record separator or EOF. # Optionally, one may specify the record separator. $/ = "\n>"; while ( read($fh, my($buf), '2k') ) { print "# chunk number: $.\n"; print "$buf\n"; } $. contains the chunk_id above or the record_number below. "readline($fh)" or $fh may be used for reading a single record. while ( my $buf = <$fh> ) { print "# record number: $.\n"; print "$buf\n"; } The following provides a parallel demonstration. Workers receive the next chunk from the shared-manager process where the actual read takes place. MCE::Shared also works with "threads", "forks", and likely other parallel modules. use MCE::Hobo; # (change to) use threads; (or) use forks; use MCE::Shared; use feature qw( say ); my $pattern = 'something'; my $hugefile = 'somehuge.log'; my $result = MCE::Shared->array(); mce_open my $fh, "<", $hugefile or die "open error: $!"; sub task { # the trailing 'k' or 'm' for size enables chunk IO while ( read $fh, my( $slurp_chunk ), "640k" ) { my $chunk_id = $.; # process chunk only if a match is found; ie. fast scan # optionally, comment out the if statement and closing brace if ( $slurp_chunk =~ /$pattern/m ) { my @matches; while ( $slurp_chunk =~ /([^\n]+\n)/mg ) { my $line = $1; # save $1 to not lose the value push @matches, $line if ( $line =~ /$pattern/ ); } $result->push( @matches ) if @matches; } } } MCE::Hobo->create('task') for 1 .. 4; # do something else MCE::Hobo->waitall(); say $result->len(); For comparison, the same thing using "MCE::Flow". MCE workers read the file directly when given a plain path, so will have lesser overhead. However, the run time is similar if one were to pass a file handle instead to mce_flow_f. The benefit of chunk IO is from lesser IPC for the shared-manager process (above). Likewise, for the mce-manager process (below). use MCE::Flow; use feature qw( say ); my $pattern = 'something'; my $hugefile = 'somehuge.log'; my @result = mce_flow_f { max_workers => 4, chunk_size => '640k', use_slurpio => 1, }, sub { my ( $mce, $slurp_ref, $chunk_id ) = @_; # process chunk only if a match is found; ie. fast scan # optionally, comment out the if statement and closing brace if ( $$slurp_ref =~ /$pattern/m ) { my @matches; while ( $$slurp_ref =~ /([^\n]+\n)/mg ) { my $line = $1; # save $1 to not lose the value push @matches, $line if ( $line =~ /$pattern/ ); } MCE->gather( @matches ) if @matches; } }, $hugefile; say scalar( @result ); CREDITSImplementation inspired by Tie::StdHandle.LIMITATIONSPerl must have IO::FDPass for constructing a shared "condvar" or "queue" while the shared-manager process is running. For platforms where IO::FDPass isn't possible, construct "condvar" and "queue" before other classes. On systems without "IO::FDPass", the manager process is delayed until sharing other classes or started explicitly.use MCE::Shared; my $has_IO_FDPass = $INC{'IO/FDPass.pm'} ? 1 : 0; my $cv = MCE::Shared->condvar(); my $que = MCE::Shared->queue(); MCE::Shared->start() unless $has_IO_FDPass; Regarding mce_open, "IO::FDPass" is needed for constructing a shared-handle from a non-shared handle not yet available inside the shared-manager process. The workaround is to have the non-shared handle made before the shared-manager is started. Passing a file by reference is fine for the three STD* handles. # The shared-manager knows of \*STDIN, \*STDOUT, \*STDERR. mce_open my $shared_in, "<", \*STDIN; # ok mce_open my $shared_out, ">>", \*STDOUT; # ok mce_open my $shared_err, ">>", \*STDERR; # ok mce_open my $shared_fh1, "<", "/path/to/sequence.fasta"; # ok mce_open my $shared_fh2, ">>", "/path/to/results.log"; # ok mce_open my $shared_fh, ">>", \*NON_SHARED_FH; # requires IO::FDPass The IO::FDPass module is known to work reliably on most platforms. Install 1.1 or later to rid of limitations described above. perl -MIO::FDPass -le "print 'Cheers! Perl has IO::FDPass.'" INDEXMCE, MCE::Hobo, MCE::SharedAUTHORMario E. Roy, <marioeroy AT gmail DOT com>
Visit the GSP FreeBSD Man Page Interface. |