diff options
author | Anil Madhavapeddy <avsm@cvs.openbsd.org> | 2004-06-22 04:01:52 +0000 |
---|---|---|
committer | Anil Madhavapeddy <avsm@cvs.openbsd.org> | 2004-06-22 04:01:52 +0000 |
commit | 07f9a9d13ea0b7f12cf124c7c25c9b01c96c9369 (patch) | |
tree | 63b3492941bfdd52a92247669895596bbb90b758 /gnu/usr.bin/lynx/test | |
parent | 48e173e619472dce9fa16a21cb6fb6ac6a9e3d24 (diff) |
update to lynx 2.8.5rel.1
tested todd@,naddy@. millert@ deraadt@ ok
Diffstat (limited to 'gnu/usr.bin/lynx/test')
-rw-r--r-- | gnu/usr.bin/lynx/test/koi8-r.html | 321 | ||||
-rw-r--r-- | gnu/usr.bin/lynx/test/quickbrown.html | 103 | ||||
-rw-r--r-- | gnu/usr.bin/lynx/test/sgml.html | 13 | ||||
-rw-r--r-- | gnu/usr.bin/lynx/test/unicode.html | 3 | ||||
-rw-r--r-- | gnu/usr.bin/lynx/test/utf-8-demo.html | 216 |
5 files changed, 649 insertions, 7 deletions
diff --git a/gnu/usr.bin/lynx/test/koi8-r.html b/gnu/usr.bin/lynx/test/koi8-r.html new file mode 100644 index 00000000000..d6792195afb --- /dev/null +++ b/gnu/usr.bin/lynx/test/koi8-r.html @@ -0,0 +1,321 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Test of the KOI8-R symbols</TITLE> +</HEAD> +<BODY> +<PRE> + + This table prepared from KOI8-R.TXT available at ftp.unicode.org + + ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT + (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC) + + +original comment: + +# +# Name: KOI8-R (RFC1489) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 18 August 1999 +# Authors: Helmut Richter <richter@lrz.de> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-R characters map into Unicode. The underlying document is the +# mapping described in RFC 1489. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-R code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-R order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 "�" # NULL +0x01 0x0001 "" # START OF HEADING +0x02 0x0002 "" # START OF TEXT +0x03 0x0003 "" # END OF TEXT +0x04 0x0004 "" # END OF TRANSMISSION +0x05 0x0005 "" # ENQUIRY +0x06 0x0006 "" # ACKNOWLEDGE +0x07 0x0007 "" # BELL +0x08 0x0008 "" # BACKSPACE +0x09 0x0009 "	" # HORIZONTAL TABULATION +0x0A 0x000A "
" # LINE FEED +0x0B 0x000B "" # VERTICAL TABULATION +0x0C 0x000C "" # FORM FEED +0x0D 0x000D "
" # CARRIAGE RETURN +0x0E 0x000E "" # SHIFT OUT +0x0F 0x000F "" # SHIFT IN +0x10 0x0010 "" # DATA LINK ESCAPE +0x11 0x0011 "" # DEVICE CONTROL ONE +0x12 0x0012 "" # DEVICE CONTROL TWO +0x13 0x0013 "" # DEVICE CONTROL THREE +0x14 0x0014 "" # DEVICE CONTROL FOUR +0x15 0x0015 "" # NEGATIVE ACKNOWLEDGE +0x16 0x0016 "" # SYNCHRONOUS IDLE +0x17 0x0017 "" # END OF TRANSMISSION BLOCK +0x18 0x0018 "" # CANCEL +0x19 0x0019 "" # END OF MEDIUM +0x1A 0x001A "" # SUBSTITUTE +0x1B 0x001B "" # ESCAPE +0x1C 0x001C "" # FILE SEPARATOR +0x1D 0x001D "" # GROUP SEPARATOR +0x1E 0x001E "" # RECORD SEPARATOR +0x1F 0x001F "" # UNIT SEPARATOR +0x20 0x0020 " " # SPACE +0x21 0x0021 "!" # EXCLAMATION MARK +0x22 0x0022 """ # QUOTATION MARK +0x23 0x0023 "#" # NUMBER SIGN +0x24 0x0024 "$" # DOLLAR SIGN +0x25 0x0025 "%" # PERCENT SIGN +0x26 0x0026 "&" # AMPERSAND +0x27 0x0027 "'" # APOSTROPHE +0x28 0x0028 "(" # LEFT PARENTHESIS +0x29 0x0029 ")" # RIGHT PARENTHESIS +0x2A 0x002A "*" # ASTERISK +0x2B 0x002B "+" # PLUS SIGN +0x2C 0x002C "," # COMMA +0x2D 0x002D "-" # HYPHEN-MINUS +0x2E 0x002E "." # FULL STOP +0x2F 0x002F "/" # SOLIDUS +0x30 0x0030 "0" # DIGIT ZERO +0x31 0x0031 "1" # DIGIT ONE +0x32 0x0032 "2" # DIGIT TWO +0x33 0x0033 "3" # DIGIT THREE +0x34 0x0034 "4" # DIGIT FOUR +0x35 0x0035 "5" # DIGIT FIVE +0x36 0x0036 "6" # DIGIT SIX +0x37 0x0037 "7" # DIGIT SEVEN +0x38 0x0038 "8" # DIGIT EIGHT +0x39 0x0039 "9" # DIGIT NINE +0x3A 0x003A ":" # COLON +0x3B 0x003B ";" # SEMICOLON +0x3C 0x003C "<" # LESS-THAN SIGN +0x3D 0x003D "=" # EQUALS SIGN +0x3E 0x003E ">" # GREATER-THAN SIGN +0x3F 0x003F "?" # QUESTION MARK +0x40 0x0040 "@" # COMMERCIAL AT +0x41 0x0041 "A" # LATIN CAPITAL LETTER A +0x42 0x0042 "B" # LATIN CAPITAL LETTER B +0x43 0x0043 "C" # LATIN CAPITAL LETTER C +0x44 0x0044 "D" # LATIN CAPITAL LETTER D +0x45 0x0045 "E" # LATIN CAPITAL LETTER E +0x46 0x0046 "F" # LATIN CAPITAL LETTER F +0x47 0x0047 "G" # LATIN CAPITAL LETTER G +0x48 0x0048 "H" # LATIN CAPITAL LETTER H +0x49 0x0049 "I" # LATIN CAPITAL LETTER I +0x4A 0x004A "J" # LATIN CAPITAL LETTER J +0x4B 0x004B "K" # LATIN CAPITAL LETTER K +0x4C 0x004C "L" # LATIN CAPITAL LETTER L +0x4D 0x004D "M" # LATIN CAPITAL LETTER M +0x4E 0x004E "N" # LATIN CAPITAL LETTER N +0x4F 0x004F "O" # LATIN CAPITAL LETTER O +0x50 0x0050 "P" # LATIN CAPITAL LETTER P +0x51 0x0051 "Q" # LATIN CAPITAL LETTER Q +0x52 0x0052 "R" # LATIN CAPITAL LETTER R +0x53 0x0053 "S" # LATIN CAPITAL LETTER S +0x54 0x0054 "T" # LATIN CAPITAL LETTER T +0x55 0x0055 "U" # LATIN CAPITAL LETTER U +0x56 0x0056 "V" # LATIN CAPITAL LETTER V +0x57 0x0057 "W" # LATIN CAPITAL LETTER W +0x58 0x0058 "X" # LATIN CAPITAL LETTER X +0x59 0x0059 "Y" # LATIN CAPITAL LETTER Y +0x5A 0x005A "Z" # LATIN CAPITAL LETTER Z +0x5B 0x005B "[" # LEFT SQUARE BRACKET +0x5C 0x005C "\" # REVERSE SOLIDUS +0x5D 0x005D "]" # RIGHT SQUARE BRACKET +0x5E 0x005E "^" # CIRCUMFLEX ACCENT +0x5F 0x005F "_" # LOW LINE +0x60 0x0060 "`" # GRAVE ACCENT +0x61 0x0061 "a" # LATIN SMALL LETTER A +0x62 0x0062 "b" # LATIN SMALL LETTER B +0x63 0x0063 "c" # LATIN SMALL LETTER C +0x64 0x0064 "d" # LATIN SMALL LETTER D +0x65 0x0065 "e" # LATIN SMALL LETTER E +0x66 0x0066 "f" # LATIN SMALL LETTER F +0x67 0x0067 "g" # LATIN SMALL LETTER G +0x68 0x0068 "h" # LATIN SMALL LETTER H +0x69 0x0069 "i" # LATIN SMALL LETTER I +0x6A 0x006A "j" # LATIN SMALL LETTER J +0x6B 0x006B "k" # LATIN SMALL LETTER K +0x6C 0x006C "l" # LATIN SMALL LETTER L +0x6D 0x006D "m" # LATIN SMALL LETTER M +0x6E 0x006E "n" # LATIN SMALL LETTER N +0x6F 0x006F "o" # LATIN SMALL LETTER O +0x70 0x0070 "p" # LATIN SMALL LETTER P +0x71 0x0071 "q" # LATIN SMALL LETTER Q +0x72 0x0072 "r" # LATIN SMALL LETTER R +0x73 0x0073 "s" # LATIN SMALL LETTER S +0x74 0x0074 "t" # LATIN SMALL LETTER T +0x75 0x0075 "u" # LATIN SMALL LETTER U +0x76 0x0076 "v" # LATIN SMALL LETTER V +0x77 0x0077 "w" # LATIN SMALL LETTER W +0x78 0x0078 "x" # LATIN SMALL LETTER X +0x79 0x0079 "y" # LATIN SMALL LETTER Y +0x7A 0x007A "z" # LATIN SMALL LETTER Z +0x7B 0x007B "{" # LEFT CURLY BRACKET +0x7C 0x007C "|" # VERTICAL LINE +0x7D 0x007D "}" # RIGHT CURLY BRACKET +0x7E 0x007E "~" # TILDE +0x7F 0x007F "" # DELETE +0x80 0x2500 "─" # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 "│" # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C "┌" # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 "┐" # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 "└" # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 "┘" # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C "├" # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 "┤" # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C "┬" # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 "┴" # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C "┼" # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 "▀" # UPPER HALF BLOCK +0x8C 0x2584 "▄" # LOWER HALF BLOCK +0x8D 0x2588 "█" # FULL BLOCK +0x8E 0x258C "▌" # LEFT HALF BLOCK +0x8F 0x2590 "▐" # RIGHT HALF BLOCK +0x90 0x2591 "░" # LIGHT SHADE +0x91 0x2592 "▒" # MEDIUM SHADE +0x92 0x2593 "▓" # DARK SHADE +0x93 0x2320 "⌠" # TOP HALF INTEGRAL +0x94 0x25A0 "■" # BLACK SQUARE +0x95 0x2219 "∙" # BULLET OPERATOR +0x96 0x221A "√" # SQUARE ROOT +0x97 0x2248 "≈" # ALMOST EQUAL TO +0x98 0x2264 "≤" # LESS-THAN OR EQUAL TO +0x99 0x2265 "≥" # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 " " # NO-BREAK SPACE +0x9B 0x2321 "⌡" # BOTTOM HALF INTEGRAL +0x9C 0x00B0 "°" # DEGREE SIGN +0x9D 0x00B2 "²" # SUPERSCRIPT TWO +0x9E 0x00B7 "·" # MIDDLE DOT +0x9F 0x00F7 "÷" # DIVISION SIGN +0xA0 0x2550 "═" # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 "║" # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 "╒" # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 "ё" # CYRILLIC SMALL LETTER IO +0xA4 0x2553 "╓" # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA5 0x2554 "╔" # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x2555 "╕" # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA7 0x2556 "╖" # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA8 0x2557 "╗" # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 "╘" # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 "╙" # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A "╚" # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B "╛" # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x255C "╜" # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAE 0x255D "╝" # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E "╞" # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F "╟" # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 "╠" # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 "╡" # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 "Ё" # CYRILLIC CAPITAL LETTER IO +0xB4 0x2562 "╢" # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB5 0x2563 "╣" # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x2564 "╤" # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB7 0x2565 "╥" # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB8 0x2566 "╦" # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 "╧" # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 "╨" # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 "╩" # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A "╪" # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x256B "╫" # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C "╬" # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 "©" # COPYRIGHT SIGN +0xC0 0x044E "ю" # CYRILLIC SMALL LETTER YU +0xC1 0x0430 "а" # CYRILLIC SMALL LETTER A +0xC2 0x0431 "б" # CYRILLIC SMALL LETTER BE +0xC3 0x0446 "ц" # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 "д" # CYRILLIC SMALL LETTER DE +0xC5 0x0435 "е" # CYRILLIC SMALL LETTER IE +0xC6 0x0444 "ф" # CYRILLIC SMALL LETTER EF +0xC7 0x0433 "г" # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 "х" # CYRILLIC SMALL LETTER HA +0xC9 0x0438 "и" # CYRILLIC SMALL LETTER I +0xCA 0x0439 "й" # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A "к" # CYRILLIC SMALL LETTER KA +0xCC 0x043B "л" # CYRILLIC SMALL LETTER EL +0xCD 0x043C "м" # CYRILLIC SMALL LETTER EM +0xCE 0x043D "н" # CYRILLIC SMALL LETTER EN +0xCF 0x043E "о" # CYRILLIC SMALL LETTER O +0xD0 0x043F "п" # CYRILLIC SMALL LETTER PE +0xD1 0x044F "я" # CYRILLIC SMALL LETTER YA +0xD2 0x0440 "р" # CYRILLIC SMALL LETTER ER +0xD3 0x0441 "с" # CYRILLIC SMALL LETTER ES +0xD4 0x0442 "т" # CYRILLIC SMALL LETTER TE +0xD5 0x0443 "у" # CYRILLIC SMALL LETTER U +0xD6 0x0436 "ж" # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 "в" # CYRILLIC SMALL LETTER VE +0xD8 0x044C "ь" # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B "ы" # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 "з" # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 "ш" # CYRILLIC SMALL LETTER SHA +0xDC 0x044D "э" # CYRILLIC SMALL LETTER E +0xDD 0x0449 "щ" # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 "ч" # CYRILLIC SMALL LETTER CHE +0xDF 0x044A "ъ" # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E "Ю" # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 "А" # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 "Б" # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 "Ц" # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 "Д" # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 "Е" # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 "Ф" # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 "Г" # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 "Х" # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 "И" # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 "Й" # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A "К" # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B "Л" # CYRILLIC CAPITAL LETTER EL +0xED 0x041C "М" # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D "Н" # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E "О" # CYRILLIC CAPITAL LETTER O +0xF0 0x041F "П" # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F "Я" # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 "Р" # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 "С" # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 "Т" # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 "У" # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 "Ж" # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 "В" # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C "Ь" # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B "Ы" # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 "З" # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 "Ш" # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D "Э" # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 "Щ" # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 "Ч" # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A "Ъ" # CYRILLIC CAPITAL LETTER HARD SIGN +</PRE> +</BODY> +</HTML> diff --git a/gnu/usr.bin/lynx/test/quickbrown.html b/gnu/usr.bin/lynx/test/quickbrown.html new file mode 100644 index 00000000000..e3207222f42 --- /dev/null +++ b/gnu/usr.bin/lynx/test/quickbrown.html @@ -0,0 +1,103 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Markus Kuhn's quick-brown-fox UTF-8 demo</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<LINK REV="made" HREF="mailto:dickey@invisible-island.net"> +</HEAD> + +<BODY> +<pre> +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn <mkuhn@acm.org> -- 1998-11-30 + +This file was UTF-8 encoded. + + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui + permet de penser à la cænogenèse de l'être dont il est question dans la + cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue + çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Hebrew (iw) +----------- + + דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה? + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but a only a fake!) + + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. + +</pre> +See also: +<ul> +<li><a href="http://www.columbia.edu/kermit/utf8.html" + >http://www.columbia.edu/kermit/utf8.html</a> +<li><a href="http://www.kernel.org/" + >http://www.kernel.org/</a> +<li><a href="http://www.unicode.org/" + >http://www.unicode.org/</a> +<br>and +<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt" + >http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt</a> +<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt" + >http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt</a> +</ul> +</BODY> +</HTML> diff --git a/gnu/usr.bin/lynx/test/sgml.html b/gnu/usr.bin/lynx/test/sgml.html index da3b0277452..9442534582c 100644 --- a/gnu/usr.bin/lynx/test/sgml.html +++ b/gnu/usr.bin/lynx/test/sgml.html @@ -1,3 +1,4 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <HTML> <HEAD> <TITLE>Test of some Unicode symbols enclosed as SGML entity names</TITLE> @@ -48,14 +49,14 @@ original comment: --> -This test illuminating SGML character entities implementation in your browser. +This test illuminates SGML character entities implementation in your browser. We sort the entities according to unicode numbers. -You should see visible character if your display character set support it +You should see visible character if your display character set supports it or some substitution string picked up from src/chrtrans/def7_uni.tbl. If you see &somename; - this name is not implemented yet, -you may search for &. (Sorry, ISOgrk4 which hold a dot in its name +you may search for &. (Sorry, ISOgrk4 which holds a dot in its name seems to be nonvisible for most browsers. Keep in mind that -this table is much wider than in HTML 4.0 draft). +this table is much wider than in the HTML 4.0 draft). Leonid Pauzner. @@ -405,9 +406,9 @@ this table is much wider than in HTML 4.0 draft). 0x03B3 &b.gamma; ISOgrk4 # GREEK SMALL LETTER GAMMA 0x03B3 γ ISOgrk3 # GREEK SMALL LETTER GAMMA 0x03B3 &ggr; ISOgrk1 # GREEK SMALL LETTER GAMMA +0x03B4 &b.delta; ISOgrk4 # GREEK SMALL LETTER DELTA 0x03B4 δ ISOgrk3 # GREEK SMALL LETTER DELTA 0x03B4 &dgr; ISOgrk1 # GREEK SMALL LETTER DELTA -0x03B4 &b.delta; ISOgrk4 # GREEK SMALL LETTER DELTA 0x03B5 &b.epsi; ISOgrk4 # GREEK SMALL LETTER EPSILON 0x03B5 &b.epsis; ISOgrk4 # GREEK SMALL LETTER EPSILON 0x03B5 &b.epsiv; ISOgrk4 # GREEK SMALL LETTER EPSILON @@ -633,7 +634,7 @@ this table is much wider than in HTML 4.0 draft). 0x2041 ⁁ ISOpub # CARET INSERTION POINT 0x2043 ⁃ ISOpub # HYPHEN BULLET 0x2044 ⁄ HTMLsymbol # FRACTION SLASH -0x20AC € new # EURO SIGN +0x20AC € new # EURO SIGN 0x20DB ⃛ ISOtech # COMBINING THREE DOTS ABOVE 0x20DC ⃜ ISOtech # COMBINING FOUR DOTS ABOVE 0x2105 ℅ ISOpub # CARE OF diff --git a/gnu/usr.bin/lynx/test/unicode.html b/gnu/usr.bin/lynx/test/unicode.html index 07ca28c109f..7abcd1a2232 100644 --- a/gnu/usr.bin/lynx/test/unicode.html +++ b/gnu/usr.bin/lynx/test/unicode.html @@ -1,3 +1,4 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <HTML> <HEAD> <TITLE>Test of some Unicode symbols in numeric character reference form</TITLE> @@ -526,7 +527,7 @@ or the internal browser's implementation is broken. 0x2041 ⁁ # CARET INSERTION POINT 0x2043 ⁃ # HYPHEN BULLET 0x2044 ⁄ # FRACTION SLASH -0x20AC € # EURO SIGN +0x20AC € # EURO SIGN 0x20DB ⃛ # COMBINING THREE DOTS ABOVE 0x20DC ⃜ # COMBINING FOUR DOTS ABOVE 0x2105 ℅ # CARE OF diff --git a/gnu/usr.bin/lynx/test/utf-8-demo.html b/gnu/usr.bin/lynx/test/utf-8-demo.html new file mode 100644 index 00000000000..d792903f304 --- /dev/null +++ b/gnu/usr.bin/lynx/test/utf-8-demo.html @@ -0,0 +1,216 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Markus Kuhn's UTF-8 demo</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<LINK REV="made" HREF="mailto:dickey@invisible-island.net"> +</HEAD> + +<BODY> +<pre> +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20 + + +The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode +plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and Sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), + + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), + + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ € 14.95 │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + +</pre> +</BODY> +</HTML> |