devdaily home | apple | java | perl | unix | directory | blog

What this is

This file is included in the DevDaily.com "Perl Source Code Warehouse" project. The intent of this project is to help you "Learn Perl by Example" TM.

Other links

The source code

#!/usr/local/bin/perl

$VERSION = "1.12";

# CGI code.pl
# Version 1.11
# Part of "WWW Cyrillic Encoding Suite"
# Get docs and newest version from
#	http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt 
# You may install this script on your web site for free
# To obtain permision for redistribution or any other usage
#	contact john@neystadt.org.
#
# Drop me a line if you deploy this script on your site.

# This script translates WEB pages from one Russian code to another.
# Developed by Leonid Neishtadt (http://www.neystadt.org/leonid/)
# e-mail: leonid@neystadt.org
#
# Currently the following codes are supported:
# DOS (alternate) code page CP866 (dos)
# Windows code page CP1251 (win).
# UNIX code KOI8-r (koi8 or nocs for supressing charset Metatag),
# ISO-8859-5 (iso),
# Macintosh (mac),
# Volapuk (transliteration) (vol) - only as output code.
# AUTO - auto selection of output encoding according to platform where browser 
#        runs (Windows, UNIX, MACINTOSH, OS/2)
#
# Usage: Copy this script into cgi-bin directory,
#        refer to it as ..../cgi-bin/code.pl/"tab"/"URL to be translated"
# where "tab" is one of the above encodings or 'rus' for displaying menu 
# with available codes.
# It is also can be coded as 'fromcode-tocode' for explicit definition of
# the original file encoding. 
# "URL" is absolute URL from the server root (Don't forget to set $path).
# or full URL like http://cnn.com. 
# All relative references from this page to other WEB pages will be also
# translated through the same code table (isn't supported yet for full URLs).
#
# Source encoding is taken from Metatag like:
# 
# The tag is changed during translation or deleted for 'vol' and 'nocs'.
# If the tag is absent default encoding is taken from variable $defcode.
#
# It is recommended that you put  on all 
# your pages, and choose only destination encoding in urls. Do no worry for
# old buggy browsers which can't display correctly pages with this meta-tag
# NOCS encoding converts page to koi8 and deletes the meta-tag
#
# READABLE URLS
# -------------
# If you use APPACHE you can add the lines similar to those to your webserver 
# configuration files:
#
# ScriptAlias /koi8       /home/www/neystadt/cgi-bin/code.pl/koi8
# ScriptAlias /win        /home/www/neystadt/cgi-bin/code.pl/win
# ScriptAlias /dos        /home/www/neystadt/cgi-bin/code.pl/dos
# ScriptAlias /mac        /home/www/neystadt/cgi-bin/code.pl/mac
# ScriptAlias /iso        /home/www/neystadt/cgi-bin/code.pl/iso
# ScriptAlias /vol        /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /lat        /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /nocs       /home/www/neystadt/cgi-bin/code.pl/nocs
#
# From now you will be able to translate urls like http://www.neystadt.org/russia/
# simply by prefixing the url with encoding: http://www.neystadt.org/koi8/russia/ 
# or http://www.neystadt.org/lat/russia/.
# 
# Note that code.pl automatically finds index.html if directory names is given 
# (like in example above). The index file name can be changed by $IndexFileName
# parameters in the script.


=head1 NAME

code.pl - CGI script to convert on-the-fly html pages across cyrillic charsets

=cut

use Convert::Cyrillic;
use LWP::UserAgent;
use HTTP::Headers::UserAgent;

$path="..";     # <==== path from cgi-bin to the server root.
$defcode="WIN"; # <==== default source encoding
$maxsize=500000; # maximum file size
$IndexFileName = 'index.html';
$UserAgent=$ENV{HTTP_USER_AGENT};
$scrname=$ENV{SCRIPT_NAME};
$file=$ENV{PATH_INFO};
$file=~s/^$scrname//;
$file=~s/\+/ /go;
$file=~s/%(..)/pack("c",hex($1))/ge;
if ($file=~/[\.\/\\]([^\.\/\\]+)$/o) {$ext=lc($1);} else {$ext='html';}
$file=~s%^\/([^\/]*)%%o;
$lang=uc($+);
if ($lang eq 'RUS') {
	print "Content-type: text/html\n
	

Select Russian encoding:

"; goto end; } if ($lang=~/(.*)-(.*)/o) { $charset=$1; $lang=$2; } if (!(',ISO,KOI8,KOI,DOS,WIN,VOL,MAC,NOCS,AUTO,' =~ /,$lang,/i)) { $err = "Unsupported code - $lang"; goto error; } $file =~ s|http:/([^/])|http://$1|oi; # Some vers of Ms-IIS merge '//' into '/' in Urls if ($file =~ s|^/(http://)|$1|oi) { $url=$ENV {'QUERY_STRING'}; if ($url) { $url= "?" . $url; } $url = $file . $url; my $ua = new LWP::UserAgent; $ua->agent("code.pl/1.2 " . $ua->agent); $ua->from ('leonid@neystadt.org'); my $req = new HTTP::Request (GET => $url); my $res = $ua->request ($req); if (!$res->is_success) { my $err = $res->error_as_HTML(); print <<"EOF"; Content-Type: text/html

Failure

Failed to retrive url: $url. Remote server returned the following reponse:
$err EOF goto end; } $type = $res->content_type; $buffer = $res->content; #neystadt::http_rtr::Http_Retrieve ($url, $buffer, $hdrs); #$hdrs=~/Content-Type: (.*)\n/io; $type = $1; } else { if ($file=~/cgi-bin/io) { $err = "Incorrect file name"; goto error; } $file = "$path$file"; if (-d $file) { $file = "$file/$IndexFileName"; $ext = 'htm'; } if (open In,"$file") { binmode In; read (In, $buffer, $maxsize); close In; } else { print "Content-type: text/html HTTP Error

Error: 404 Not Found


The requested URI $file does not exist.
"; goto end; } } if ($lang=~/auto/io){ $platform = HTTP::Headers::UserAgent::GetPlatform ($UserAgent); $lang='koi'; $lang='win' if $platform=~/WIN/io; $lang='mac' if $platform eq 'MAC'; $lang='koi' if $platform eq 'UNIX'; $lang='dos' if $platform eq 'OS2'; $lang='nocs' if $platform eq 'Linux'; } $newcharset = "koi8-r" if $lang=~/koi|nocs/io; $newcharset = "windows-1251" if $lang=~/win/io; $newcharset = "x-mac-cyrillic" if $lang=~/mac/io; $newcharset = "ibm866" if $lang=~/dos/io; $newcharset = "ISO-8859-5" if $lang=~/iso/io; if ($buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io) { $type=$1; $charset=$2 if !$charset; if ($lang=~/nocs|vol/io){ $buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io; } } else { $type="text/html" if $ext eq 'html' || $ext eq 'htm'; $type="text/plain" if $ext eq 'txt'; $type="image/gif" if $ext eq 'gif'; $type="image/jpeg" if $ext eq 'jpg' || $ext eq 'jpeg'; } $lang="koi8" if $lang=~/nocs/io; $type="text/html" if !$type; $slang=$defcode; $slang="KOI8" if $charset=~/koi/io; $slang="WIN" if $charset=~/1251/io; $slang="ISO" if $charset=~/iso/io; $slang="DOS" if $charset=~/alt/io; $slang="MAC" if $charset=~/mac/io; # translate the page $buffer = Convert::Cyrillic::cstocs ($slang,$lang,$buffer) if $type =~ /text/o; if ($hdrs) { binmode STDOUT; print $hdrs; } else { print("Content-type: $type\n\n"); binmode STDOUT; } print $buffer; goto end; error: ermsg($err); end:; sub ermsg { if (!$sw) {$sw=1; print "Content-type: text/plain\n\n";} print "@_[0]\n"; } __END__ =head1 DESCRIPTION See the comments on the top of the script. =head1 PREREQUISITES This script requires the C, C and C modules available from CPAN. =pod OSNAMES any =pod SCRIPT CATEGORIES CGI =cut




Copyright 1998-2008 Alvin Alexander
All Rights Reserved.
 
devdaily.com is based in louisville, kentucky, and this web site is hosted by godaddy.com