perl读取EXCEL文件输出到XML_perl读取excel文件
perl读取EXCEL文件输出到XML由刀豆文库小编整理,希望给你工作、学习、生活带来方便,猜你可能喜欢“perl读取excel文件”。
#!/usr/bin/perl use strict;
use warnings;
use Spreadsheet::ParseExcel;use XML::Writer;use utf8;binmode(STDIN, ':encoding(utf8)');binmode(STDOUT, ':encoding(utf8)');binmode(STDERR, ':encoding(utf8)');#接收参数--文件名 my $file = $ARGV[0];my $book = Spreadsheet::ParseExcel::Workbook->Parse($file);my @sheets = @{ $book->{Worksheet} };
#将文件内容放入数组ary my @ary=();
foreach my $sheet(@sheets){
my($minRow, $maxRow)= $sheet->row_range();
my($minCol, $maxCol)= $sheet->col_range();
foreach my $row($minRow..$maxRow){
foreach my $col($minCol..$maxCol){
my $cell = $sheet->get_cell($row, $col);
next unle $cell;
$ary[$row][$col] = $cell->value;
}
}
}
#开始打印XML元素 my $writer;$writer=new XML::Writer(DATA_MODE=>1,DATA_INDENT=>2);$writer->xmlDecl('UTF-8','yes');$writer->startTag('aaaa');$writer->startTag('webs');#取当前服务器时间,防止ID号重复
my($sec,$min,$hour,$day,$mon,$year,$weekday,$yeardate,$savinglightday)=(localtime(time));my $date=$year.++$mon.$day;#取出数组元素个数 my $max_row=@ary-1;my $web_id='news';foreach my $new_row(0..$max_row){
my $web_name=“”;
if(!$ary[$new_row][3]){
$writer->startTag('web','id'=>“$new_row$web_id$date”,' url'=>“$ary[$new_row][2]”,' time_interval'=>“500”);
$writer->startTag('trans');
$writer->startTag('tran','id'=>“source”);
$writer->characters($web_name);
$writer->endTag('tran');
$writer->endTag('trans');
$writer->startTag('urls');
$writer->startTag('url','id'=>“com.aaaa.bigdata.news_child_url”);
$writer->endTag('url');
$writer->endTag('urls');
$writer->endTag('web');
}
else {
my $num=$ary[$new_row][5]+1;
$writer->startTag('web','id'=>“$new_row$web_id$date”,' url'=>“$ary[$new_row][2]”,' time_interval'=>“500”);
$writer->startTag('trans');
$writer->startTag('tran','id'=>“source”);
$writer->characters($web_name);
$writer->endTag('tran');
$writer->endTag('trans');
$writer->startTag('urls');
$writer->startTag('url','id'=>“com.aaaa.bigdata.news_child_url”);
$writer->endTag('url');
$writer->endTag('urls');
$writer->startTag('events');
$writer->startTag('event','id'=>“url”,'url'=>“$ary[$new_row][3]”,'min'=>“$ary[$new_row][4]”,'max'=>“$num”);
$writer->endTag('event');
$writer->endTag('events');
$writer->endTag('web');
} } $writer->startTag('web','id'=>“aaaa1”);$writer->startTag('properties','db_ip'=>“192.168.1.11”,'db_name'=>“”,'db_user'=>“root”,'db_pawd'=>“123456”,'db_table'=>'test','parser'=>“FALSE”);$writer->startTag('property','id'=>“CHECK_title”);$writer->endTag('property');$writer->startTag('property','id'=>“content”);$writer->endTag('property');$writer->startTag('property','id'=>“publish_time”);$writer->endTag('property');$writer->startTag('property','id'=>“news_url”);$writer->endTag('property');$writer->startTag('property','id'=>“crawl_datetime”);zlg_time($writer);$writer->endTag('property');$writer->endTag('properties');$writer->endTag('web');$writer->endTag('webs');$writer->endTag('aaaa');$writer->end;
sub zlg_time {
my $time='time.strftime(“%Y-%m-%d %H:%M:%S”, time.localtime())';
my $zlg_data=“
return $timen]]>”;
$writer->characters($zlg_data);} exit(0);
另加一个脚本
接收参数1输出的XML文件名、参数2输入的excel文件名
#!/bin/bash dest_sa=$1 cs=$2 perl /home/url/xls.pl $cs >> $dest_sa sed-i 's//>/g' $dest_sa