#!/usr/bin/perl -w

use strict;
use Text::Iconv;

my $cp = "ISO-8859-2";

Text::Iconv->raise_error(1);	# die on bad encoding!

my $html;
while(<>) {
	$html .= $_;
}

my $c;
if ($html =~ m!CONTENT="text/html; charset=([^"]+)"!is) {
	$c = Text::Iconv->new("$1",$cp);
}

$html =~ s/\s+LANG="[^"]+"//gsi;
$html =~ s/<\/*FONT[^>]*>//gsi;
$html =~ s/&#(\d+);/chr($1)/gsie;
$html =~ s/\s+STYLE="margin-bottom: 0in"//gsi;
$html =~ s/\s+STYLE="line-height: 100%"//gsi;
$html =~ s/<(SDFIELD)[^>]*><\/\1>//gsi;

$html =~ s/(STYLE="[^"]*)text-indent:\s+\d+cm(;\s+)*/$1/gsi;
$html =~ s/(STYLE="[^"]*)line-height:\s+\d+%;*/$1/gsi;
$html =~ s/(STYLE="[^"]*)widows:\s+\d+;*/$1/gsi;
$html =~ s/(STYLE="[^"]*)orphans:\s+\d+;*/$1/gsi;
$html =~ s/STYLE="\s*"\s*//gsi;

# remove excessive empty lines
$html =~ s,<p[^>]*>(?:\s*<br>\s*)*</p>,,gsi;
$html =~ s,(<(?:table|td)[^>]*)(width="*\d+"*),$1,gsi;

if ($c) {
	$html = $c->convert($html) || die "can't convert codepage!";
}

$html =~ s/&scaron;/¹/gs;
$html =~ s/&Scaron;/©/gs;

$html =~ s,"(text/html;\s+charset=)\S+?","$1$cp",gsi;

print "$html";
