Perl Tutorial - Practical Extraction and Reporting Language (Perl)
Please leave a remark at the bottom of each page with your useful suggestion.
Table of Contents
- Perl Introduction
- Perl Program Startup
- Perl Regular Expressions
- Perl Array Program
- Perl Basic Program
- Perl Subroutine / Function Program
- Perl XML Program
- Perl String Program
- Perl Statement Program
- Perl Network Program
- Perl Hash Program
- Perl File Handling Program
- Perl Data Type Program
- Perl Database Program
- Perl Class Program
- Perl CGI Program
- Perl GUI Program
- Perl Report Program
Perl XML Program
Check error in SAX parser
use XML::Parser;
$currentLine = 0;
$parser = new XML::Parser(Handlers => {Start => \&start_handler,
End => \&end_handler,
Char => \&char_handler,
Proc => \&proc_handler,
XMLDecl => \&XMLDecl_handler,
Final => \&final_handler});
$file = "yourName.xml";
eval {
$parser->parsefile($file);
};
if($@) {
print "Error in $file: " . (substr $@, 0, index($@, ", byte")) . "\n";
exit(1);
};
sub XMLDecl_handler
{
$xmlString[$currentLine++] = "<?xml version=\"$_[1]\"?>";
}
sub start_handler
{
$xmlString[$currentLine] = $indent . "<$_[1]";
for ($i = 2; $i <= $#_ - 1; $i += 2){
$xmlString[$currentLine] .= " " . $_[$i] . "=\"". $_[$i + 1] . "\"";
}
$xmlString[$currentLine++] .= ">";
$indent .= " ";
}
sub end_handler
{
$indent = substr($indent, 0, length($indent) - 4);
$xmlString[$currentLine++] = $indent . "</$_[1]>";
}
sub char_handler
{
if($_[1] =~ /[^ \n\t\r]/g) {
$xmlString[$currentLine++] = $indent . "$_[1]";
}
}
sub proc_handler
{
$xmlString[$currentLine++] = "<?$_[1] $_[2]?>";
}
sub final_handler
{
for ($i = 0; $i < $currentLine; $i++){
print $xmlString[$i] . "\n";
}
}
Check node name in SAX paser
use XML::Parser;
$parser = new XML::Parser(Handlers => {Start => \&start_handler, Char => \&char_handler});
$parser->parsefile("yourXML.xml");
$customer = 0;
$data_ok = 0;
sub start_handler
{
$data_ok = 0;
if ($_[1] eq "tagName"){
$customer++;
}
if ($customer == 2){
if($_[1] eq "tagName2"){
$data_ok++;
}
}
}
sub char_handler
{
if(($_[1] =~ /[^ \n\t\r]/g) && $data_ok){
print "First name: $_[1]\n";
}
}
Converting a comma separated list data source to XML
#!/perl/bin/perl
use warnings;
use strict;
print <<'HEADER';
Content-Type: text/xml
<?xml version = "1.0"?>
HEADER
print( "<contacts>\n\n" );
open( NAMES, "names.txt" ) or die ( "Error opening names.txt" );
while ( <NAMES> ) {
chomp;
# escape any characters not allowed in XML content.
s/&/&/;
s/</</;
s/>/>/;
s/"/"/;
s/'/'/;
my ( $last, $first ) = split( /, / );
print( " <contact>\n",
" <LastName>$last</LastName>\n",
" <FirstName>$first</FirstName>\n",
" </contact>\n\n" );
}
close( NAMES );
print( "</contacts>\n" );
#File: names.txt
# Jack, John
# Jason, Sue
# Jodd, Bob
Register handlers to SAX parser
use XML::Parser;
$currentLine = 0;
$parser = new XML::Parser(Handlers => {Start => \&start_handler,
End => \&end_handler,
Char => \&char_handler,
Proc => \&proc_handler,
XMLDecl => \&XMLDecl_handler,
Final => \&final_handler});
$parser->parsefile("yourXML.xml");
sub XMLDecl_handler
{
$xmlString[$currentLine++] = "<?xml version=\"$_[1]\"?>";
}
sub start_handler
{
$xmlString[$currentLine] = $indent . "<$_[1]";
for ($i = 2; $i <= $#_ - 1; $i += 2){
$xmlString[$currentLine] .= " " . $_[$i] . "=\"". $_[$i + 1] . "\"";
}
$xmlString[$currentLine++] .= ">";
$indent .= " ";
}
sub end_handler
{
$indent = substr($indent, 0, length($indent) - 4);
$xmlString[$currentLine++] = $indent . "</$_[1]>";
}
sub char_handler
{
if($_[1] =~ /[^ \n\t\r]/g) {
$xmlString[$currentLine++] = $indent . "$_[1]";
}
}
sub proc_handler
{
$xmlString[$currentLine++] = "<?$_[1] $_[2]?>";
}
sub final_handler
{
for ($i = 0; $i < $currentLine; $i++){
print $xmlString[$i] . "\n";
}
}
SAX parser handler
use XML::Parser;
$currentLine = 0;
$parser = new XML::Parser(Handlers => {Start => \&start_handler,
End => \&end_handler,
Char => \&char_handler,
Proc => \&proc_handler,
XMLDecl => \&XMLDecl_handler,
Final => \&final_handler});
$parser->parsefile("yourXML.xml");
sub XMLDecl_handler
{
$xmlString[$currentLine++] = "<?xml version=\"$_[1]\"?>";
}
sub start_handler
{
$xmlString[$currentLine] = $indent . "<$_[1]";
for ($i = 2; $i <= $#_ - 1; $i += 2){
$xmlString[$currentLine] .= " " . $_[$i] . "=\"". $_[$i + 1] . "\"";
}
$xmlString[$currentLine++] .= ">";
$indent .= " ";
}
sub end_handler
{
$indent = substr($indent, 0, length($indent) - 4);
$xmlString[$currentLine++] = $indent . "</$_[1]>";
}
sub char_handler
{
if($_[1] =~ /[^ \n\t\r]/g) {
$xmlString[$currentLine++] = $indent . "$_[1]";
}
}
sub proc_handler
{
$xmlString[$currentLine++] = "<?$_[1] $_[2]?>";
}
sub final_handler
{
for ($i = 0; $i < $currentLine; $i++){
print $xmlString[$i] . "\n";
}
}
Stream XML::Parser
use XML::Parser;
my $parser = XML::Parser->new( Handlers => {Start=>\&handle_start,End=>\&handle_end,});
$parser->parsefile( "yourXML.xml" );
my @element_stack; # remember which elements are open
sub handle_start {
my( $expat, $element, %attrs ) = @_;
my $line = $expat->current_line;
print "$element starting on # $line!\n";
push( @element_stack, { element=>$element, line=>$line });
if( %attrs ) {
print "Attributes:\n";
while( my( $key, $value ) = each( %attrs )) {
print "\t$key => $value\n";
}
}
}
sub handle_end {
my( $expat, $element ) = @_;
my $element_record = pop( @element_stack );
print "$element started on # ", $$element_record{ line };
}
The XML::Parser module provides a framework for parsing XML.
#With XML data, the main conditions include:
#The start of an XML tag
#The end of an XML tag
#The data between the start and end of an XML tag
#The start of the XML document
#The end of the XML document
#Parameters Passed to Your XML Callback Routines
#Routine Parameters
#Start XML::Parser object reference, element name, attribute, value.
#End XML::Parser object reference, element name
#Char XML::Parser object reference, text data
#Init XML::Parser object reference
#Final XML::Parser object reference
#!/usr/bin/perl -w
use XML::Parser;
$filename = 'yourXML.xml';
print "Opening $filename\n";
$parser = new XML::Parser(Handlers => {Start => \&tag_start,
End => \&tag_end,
Char => \&tag_char_data} );
$parser->parsefile($filename);
# Handles the start of a tag.
sub tag_start {
# Use shift to pull off elements.
my($obj) = shift;
my($elem) = shift;
my(%attrs) = @_;
print "<$elem ";
my(@keys) = keys( %attrs );
my($key);
foreach $key (@keys) {
print " $key=$attrs{$key} ";
}
print ">\n";
}
# Handles the end of a tag.
sub tag_end {
# Use shift to pull off elements.
my($obj) = shift;
my($elem) = shift;
print "</$elem>\n";
}
# Handles character data between the
# start and end of a tag.
sub tag_char_data {
# Use shift to pull off elements.
my($obj) = shift;
my($data) = shift;
# Note: no need for \n here in most documents.
print "$data";
}
Using XML::Parser to parse xml file
use XML::Parser;
my $xmlfile = "yourXML.xml";
my $parser = XML::Parser->new( ErrorContext => 2 );
eval { $parser->parsefile( $xmlfile ); };
if( $@ ) {
$@ =~ s/at \/.*?$//s; # remove module line number
print STDERR "\nERROR in '$file':\n$@\n";
} else {
print STDERR "'$file' is well-formed\n";
}
Using XML:Simple to read and store the document
use XML::Simple;
my $simple = XML::Simple->new(); # initialize the object
my $tree = $simple->XMLin( './data.xml' ); # read, store document
print "The user prefers the font " . $tree->{ font }->{ name } . " at " . $tree->{ font }->{ size } . " points.\n";
XML::Parasr style: Tree
use XML::Parser;
$parser = new XML::Parser( Style => 'Tree' );
my $tree = $parser->parsefile( shift @ARGV );
# serialize the structure
use Data::Dumper;
print Dumper( $tree );