[PATCH]console utf-8 mode fixes

From: Adam Tlałka
Date: Fri Aug 04 2006 - 06:10:24 EST


Description: patch for drivers/char/vt.c

Fixed utf-8 mode so alternate charset modes always work according
to control sequences interpreted in do_con_trol function
preserving backward US-ASCII and VT100 semigraphics compatibility.

Malformed utf-8 sequences are represented as sequences of replacement glyphs,original codes or '?' as a last resort.

unicode-xterm, gnome-terminal, kconsole and other terminal emulators
in utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that some important system programs (from Debian distro) uses acsc in utf-8 mode - dselect, aptitude, w3m for example.

Signed-off-by: Adam Tla/lka <atlka@xxxxxxxxx>


Regards
--
Adam Tlałka mailto:atlka@xxxxxxxxx ^v^ ^v^ ^v^
System & Network Administration Group - - - ~~~~~~
Computer Center, Gdańsk University of Technology, Poland
PGP public key: finger atlka@xxxxxxxxxxxxxxxxx
--- vt_orig.c 2006-08-03 08:34:40.000000000 +0200
+++ vt.c 2006-08-03 09:12:21.000000000 +0200
@@ -63,6 +63,13 @@
*
* Removed console_lock, enabled interrupts across all console operations
* 13 March 2001, Andrew Morton
+ *
+ * Fixed UTF-8 mode so alternate charset modes always work according
+ * to control sequences interpreted in do_con_trol function
+ * preserving backward VT100 semigraphics compatibility,
+ * malformed UTF sequences represented as sequences of replacement glyphs,
+ * original codes or '?' as a last resort if replacement glyph is undefined
+ * by Adam Tla/lka <atlka@xxxxxxxxx>, Aug 2006
*/

#include <linux/module.h>
@@ -1991,17 +1998,23 @@ static int do_con_write(struct tty_struc
/* Do no translation at all in control states */
if (vc->vc_state != ESnormal) {
tc = c;
- } else if (vc->vc_utf) {
+ } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
/* Combine UTF-8 into Unicode */
- /* Incomplete characters silently ignored */
+ /* Malformed sequences as sequences of replacement glyphs */
+rescan_last_byte:
if(c > 0x7f) {
- if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
- vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
- vc->vc_utf_count--;
- if (vc->vc_utf_count == 0)
- tc = c = vc->vc_utf_char;
- else continue;
+ if (vc->vc_utf_count) {
+ if ((c & 0xc0) == 0x80) {
+ vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
+ if (--vc->vc_utf_count) {
+ vc->vc_npar++;
+ continue;
+ }
+ tc = c = vc->vc_utf_char;
+ } else
+ goto replacement_glyph;
} else {
+ vc->vc_npar = 0;
if ((c & 0xe0) == 0xc0) {
vc->vc_utf_count = 1;
vc->vc_utf_char = (c & 0x1f);
@@ -2018,14 +2031,15 @@ static int do_con_write(struct tty_struc
vc->vc_utf_count = 5;
vc->vc_utf_char = (c & 0x01);
} else
- vc->vc_utf_count = 0;
+ goto replacement_glyph;
continue;
}
} else {
+ if (vc->vc_utf_count)
+ goto replacement_glyph;
tc = c;
- vc->vc_utf_count = 0;
}
- } else { /* no utf */
+ } else { /* no utf or alternate charset mode */
tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
}

@@ -2040,31 +2054,33 @@ static int do_con_write(struct tty_struc
* direct-to-font zone in UTF-8 mode.
*/
ok = tc && (c >= 32 ||
- (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
- : CTRL_ACTION) >> c) & 1)))
+ !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
+ vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
&& (c != 127 || vc->vc_disp_ctrl)
&& (c != 128+27);

if (vc->vc_state == ESnormal && ok) {
/* Now try to find out how to display it */
tc = conv_uni_to_pc(vc, tc);
- if ( tc == -4 ) {
+ if (tc & ~charmask) {
+ if ( tc == -4 ) {
/* If we got -4 (not found) then see if we have
defined a replacement character (U+FFFD) */
- tc = conv_uni_to_pc(vc, 0xfffd);
-
- /* One reason for the -4 can be that we just
- did a clear_unimap();
- try at least to show something. */
- if (tc == -4)
- tc = c;
- } else if ( tc == -3 ) {
- /* Bad hash table -- hope for the best */
- tc = c;
- }
- if (tc & ~charmask)
- continue; /* Conversion failed */
+replacement_glyph:
+ tc = conv_uni_to_pc(vc, 0xfffd);
+ if (!(tc & ~charmask))
+ goto display_glyph;
+ } else if ( tc != -3 )
+ continue; /* nothing to display */
+ /* no hash table or no replacement --
+ * hope for the best */
+ if ( c & ~charmask )
+ tc = '?';
+ else
+ tc = c;
+ }

+display_glyph:
if (vc->vc_need_wrap || vc->vc_decim)
FLUSH
if (vc->vc_need_wrap) {
@@ -2088,6 +2104,15 @@ static int do_con_write(struct tty_struc
vc->vc_x++;
draw_to = (vc->vc_pos += 2);
}
+ if (vc->vc_utf_count) {
+ if (vc->vc_npar) {
+ vc->vc_npar--;
+ goto display_glyph;
+ }
+ vc->vc_utf_count = 0;
+ c = orig;
+ goto rescan_last_byte;
+ }
continue;
}
FLUSH