User:Philh-591/sandbox

My sandbox

In connection with No Silver Bullet this attempts to separate the essential code of a program from the accidental code.

program source

 * 1) !/usr/bin/perl

=pod

=begin comment

Grab page https://www.ecb.europa.eu/stats/exchange/eurofxref/html/eurofxref-graph-gbp.en.html as text and extract lines Inverse.push({ date: new Date(2016,0,4), rate: 1.3548 }); It's javascript, so no reason to parse HTML.

Jan 2018, reporting HTTP request sent, awaiting response... 301 Moved Permanently Location: /stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/eurofxref-graph-gbp.en.html [following] but redirects automatically (wget). To change and test sometime (or to XML). New page is    https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/eurofxref-graph-gbp.en.html

Grab ~phweb/ExchangeRate/eurofxref-graph-gbp.en.html for testing.

Construct page(s) GBP-EUR- .html for specified year(s) or default to last year (Jan-Mar) or this year (Apr-Dec). Warn on page if year incomplete.

Construct legible, clippable page(s) with HTML::Element and emit it/them.

The XML alternative page was at    https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/gbp.xml If using this, in the CompactData -> DataSet -> Series element extract all the  elements, attributes TIME_PERIOD="YYYY-MM-DD", OBS_VALUE="0.nnnnn" and invert OBS_VALUE (observation value) to 1. This is now at    https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/gbp.xml so I seem to have already found this relocation.

=end comment

=cut

use File::Slurp; use Date::Format;

my $sigterm = 0; $SIG{TERM} = $SIG{INT} = sub { $sigterm = 1 };

my $filenamepattern = '"GBP-EUR-$year"'; my $now = time; my $thisyear = time2str( '%Y', $now); my $actualyear = $thisyear; my $month = time2str( '%L', $now); {    my $year = $thisyear; $year--; my $filename = eval $filenamepattern; $filename .= '.html'; # If modified date of last year's file is further in the past # than 2 Jan this year (day in year - 2). if ( -M $filename > ( time2str( '%j', time) - 2 ) ) { $thisyear-- if $month < 4; } }
 * 1) Default year
 * 1) Stick with this year if last year's file already exists and is dated beyond
 * 2) 2 Jan of this year.

my @years; for my $y (@ARGV) { if ($y < 1999 or $y > $actualyear) { printf( "Invalid year %s \n", $y); }   else { push @years, $y; } } push @years, $thisyear if $#years < 0;   # Default year if none specified.
 * 1) Specified years

print( join( ' ', @years), "\n");

my $URL = 'https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/gbp.xml';

my $data = qx/wget -O - $URL/ ; die 'Rates not found : ', $? unless $data; die 'terminated' if $sigterm;


 * 1) my $localfile = '/home/phweb/ExchangeRate/test/gbp.2018.xml';
 * 2) my $data = read_file( $localfile);

my $rates;


 * 1) Using HTML::[Element|TreeBuilder] rather than XML::[Element|TreeBuilder]
 * 2) will force all tagnames and attribute names to lower case.

use HTML::TreeBuilder; use HTML::Element; use HTML::PrettyPrinter; use Time::ParseDate;

my $tree = HTML::TreeBuilder->new_from_content( $data); $tree->ignore_unknown( 0); $tree->parse_content( $data);

my @observations = $tree->look_down( _tag => 'Obs',    OBS_STATUS => "A", OBS_CONF => "F");

for my $observation (@observations) {    $observation->attr( 'time_period') =~ /(\d{4})-(\d{2})-(\d{2})/; my ( $year, $month, $day ) = ( $1, $2, $3 ); # or zero prefixes, especially on days, cause trouble. $_ = 0 + $_ for ($year, $month, $day); $month--;  # to zero base my $rate = sprintf( '%0.4f', 1/$observation->attr( 'obs_value')); $rates->{$year}->{$month}->{$day} = $rate; }

my $style = HTML::Element->new( 'style', type => 'text/css'); $style->push_content( <<endstyle ); table.year, tr.year { border: 2px solid green; width: 95%; margin: 1pc 2.5%; } table.month, tr.month { border: 2px solid blue; width: 95%; margin: 1pc 2.5%; } table.day { border: 1px solid teal; } table.day td.day { border: 1px solid aqua; width: 2em; text-align: right; } table.day td.data { border: 1px solid aqua; width: 6em; text-align: right; } th.year, td.year { border: 1px solid green; width: 25%; } tr.week { border: 2px solid lime; } th.month, td.month { border: 1px solid blue; width: 12%; } th.day { border: 2px solid blue; width: 9em; text-align: center} td.day { width: 9em; text-align: right } col { width: 6em; } h1, h2 { text-align: center } endstyle

my $NBSP = "\N{NO-BREAK SPACE}";

my $weekdays = [ 'thead', [ 'tr', { class => 'week' }, map { [ 'th', { class => 'day' }, $_ ] } qw( Mon Tue Wed Thu Fri Sat Sun ) ] ];

my $weekpattern = join( '.', ('(..)') x 7 ); my $weekmatch = qr/$weekpattern/;

my $doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '. '"http://www.w3.org/TR/html4/strict.dtd">';

for my $year (@years) { my $filename = eval $filenamepattern; my ($htmlfilename, $txtfilename) = ( $filename.'.html', $filename.'.txt' ); my $title = "GBP-EUR - $year";

my @text; push @text, $title."\n";


 * 1) TODO:
 * 2) next if file modification date is past 2 Jan of the following year.
 * 3) and remark to that effect.

# See HTMLcalendar.pl for details. my $head = [ 'head', [ 'meta', { charset => 'utf-8' } ], [ 'title', $title ], $style ];

# An HTML::Element so content can be pushed into it later. my $yeartable = HTML::Element->new_from_lol(        [ 'table', { class => 'year' },             [ 'caption', "GBP to EUR rates for $year as ",                 [ 'a', { href => $URL }, 'published by the ECB' ] ] ] );

my $body = [ 'body', [ 'h1', $title ], $yeartable ]; my $document = HTML::Element->new_from_lol( [ 'html', $head, $body ]);

sub makedaycell { my ($celldata, $month, $year) = @_; my $day = 0 + $celldata; my $rate = '-'; $celldata =~ s/ /$NBSP/g; if ($day > 0) { $month--;   # Index from 0 on rates page. $rate = $rates->{$year}->{$month}->{$day}; }       my $daytable = [ 'table', { class => 'day' }, [ 'tr', [ 'td', { class => 'day' }, $celldata ], [ 'td', { class => 'data' }, $rate ] ] ]; return [ 'td', { class => 'day' }, $daytable ]; }

sub makedaytextcell { my ($celldata, $month, $year) = @_; my $day = 0 + $celldata; if ($day > 0) { $month--;   # Index from 0 on rates page. $rate = $rates->{$year}->{$month}->{$day}; $rate = '     ' unless $rate; return "[$celldata - $rate ]"; }       else { #             [.. - x.xxxx ] return " $celldata          "; }     }

for my $month ( 1 .. 12 ) {       my @weekrows; # Start week on Monday. Old (cal) format, so days in horizontal lines. my @weeks = qx/ ncal -bh -m $month $year/; $weeks[0] =~ /(\w+)/; $monthname = $1;

push @text, $monthname."\n"; my $textweekdays = $weeks[1]; # Set weekdays (not weekend) to pattern for rates. $textweekdays =~ s/([A-Z][a-z]) ([^-])/[$1 - x.xxxx ] $2/ for 1 .. 5;       push @text, $textweekdays."\n";

for my $week ( @weeks[ 2 .. $#weeks ] ) { my @weekdates = $week =~ $weekmatch; push @weekrows, [ 'tr', map { makedaycell( $_, $month, $year) } @weekdates ];

my $textweek = $week; # Iterate over @weekdates, insert with rate if numeric or insert # with padding if not numeric or no rate. $textweek = join( ' ',                (map{ makedaytextcell( $_, $month, $year); } @weekdates[0..4]),                (map{ $_; } @weekdates[5..6]) )."\n"; push @text, $textweek; }        $yeartable->push_content( HTML::Element->new_from_lol( [ 'tr', [ 'td', [ 'table', { class => 'month' }, [ 'caption', $monthname ], $weekdays, [ 'tbody', @weekrows ] ] ] ] ) ); }

my $hpp = HTML::PrettyPrinter->new(        linelength => 80, tabify => 0, quote_attr => 1, allow_forced_nl => 1 ); $hpp->set_force_nl( 1, qw( head body table tr )); write_file( $htmlfilename, $doctype, "\n", @{$hpp->format( $document)});

write_file( $txtfilename, @text, "\n");

} 1;