/* OS/2 REXX: check various UCS transformation formats and BOCU-1 */ signal on novalue name TRAP ; signal on syntax name TRAP signal on failure name TRAP ; signal on halt name TRAP TEST. = 0 TEST.0.1 = 'UTF-32' ; TEST.0.2 = 'UTF-16' TEST.0.3 = 'UTF-8 ' ; TEST.0.4 = 'UTF-7 ' TEST.0.5 = 'UTF-4 ' ; TEST.0.6 = 'UTF-1 ' TEST.0.7 = 'BOCU-1' if 1 then call UCS.4 /* all code points (2**21 -2048) */ if 0 then call MES.1 /* "BOM" + 335 MES-1 code points */ if 0 then call LEGACY /* codepage 437, 858, 1252, nnnn */ exit 0 UCS.4: procedure expose TEST. /* test 2**21 -2048 code points, */ do N = 0 to x2d( '10FFFF' ) /* CAVEAT, this takes some time: */ if x2d( 'D800' ) <= N & N < x2d( 'E000' ) then iterate N SRC = x2c( d2x( N, 8 )) ; TEST.9.1 = TEST.9.1 + 4 DST = UTF16O( SRC ) ; if SRC <> UTF16I( DST ) then leave N TEST.9.2 = TEST.9.2 + length( DST ) DST = UTF32I( SRC ) ; if SRC <> UTF32O( DST ) then leave N TEST.9.3 = TEST.9.3 + length( DST ) DST = UTF7.O( SRC ) ; if SRC <> UTF7.I( DST ) then leave N TEST.9.4 = TEST.9.4 + length( DST ) DST = UTF4.O( SRC ) ; if SRC <> UTF4.I( DST ) then leave N TEST.9.5 = TEST.9.5 + length( DST ) DST = UTF1.O( SRC ) ; if SRC <> UTF1.I( DST ) then leave N TEST.9.6 = TEST.9.6 + length( DST ) DST = BOCU.O( SRC ) ; if SRC <> BOCU.I( DST ) then leave N TEST.9.7 = TEST.9.7 + length( DST ) call charout /**/, N d2c( 13 ) end N if N = x2d( '110000' ) then say ' PASS u+' || right( d2x( N - 1 ), 6, 0 ) else say ' FAIL u+' || right( d2x( N + 1 ), 6, 0 ) return 0 LEGACY: procedure expose TEST. /* test various legacy codepages */ TEST = 'n/a n/a 819 858 923 1252 MAC' do CASE = 3 to words( TEST ) /* for tests 1 and 2 see MES.1() */ PAGE = word( TEST, CASE ) ; TEST.CASE.0 = PAGE SRC = '' ; T = '' do N = 0 to 127 ; SRC = SRC || x2c( d2x( N, 8 )) ; end N select when PAGE = 819 then do /* ISO 8859-1 (Latin-1) */ do N = 128 to 255 ; T = T d2x( N ) ; end N /* 80-FF */ end /* -------------------------- */ when PAGE = 858 then do /* PC-multilingual-850+euro */ T = T ' C7 FC E9 E2 E4 E0 E5 E7' /* 80 */ T = T ' EA EB E8 EF EE EC C4 C5' /* 88 */ T = T ' C9 E6 C6 F4 F6 F2 FB F9' /* 90 */ T = T ' FF D6 DC F8 A3 D8 D7 192' /* 98 */ T = T ' E1 ED F3 FA F1 D1 AA BA' /* A0 */ T = T ' BF AE AC BD BC A1 AB BB' /* A8 */ T = T '2591 2592 2593 2502 2524 C1 C2 C0' /* B0 */ T = T ' A9 2563 2551 2557 255D A2 A5 2510' /* B8 */ T = T '2514 2534 252C 251C 2500 253C E3 C3' /* C0 */ T = T '255A 2554 2569 2566 2560 2550 256C A4' /* C8 */ T = T ' F0 D0 CA CB C8 20AC CD CE' /* D0 */ T = T ' CF 2518 250C 2588 2584 A6 CC 2580' /* D8 */ T = T ' D3 DF D4 D2 F5 D5 B5 FE' /* E0 */ T = T ' DE DA DB D9 FD DD AF B4' /* E8 */ T = T ' AD B1 2017 BE B6 A7 F7 B8' /* F0 */ T = T ' B0 A8 B7 B9 B3 B2 25A0 A0' /* F8 */ /* 0xD5 850: u+0131 small dotless i, 858: u+20AC Euro */ end /* -------------------------- */ when PAGE = 923 then do /* ISO 8859-15 (Latin-9) */ do N = 128 to 159 ; T = T d2x( N ) ; end N /* 80-9F */ T = T ' A0 A1 A2 A3 20AC A5 160 A7' /* A0 */ T = T ' 161 A9 AA AB AC AD AE AF' /* A8 */ T = T ' B0 B1 B2 B3 17D B5 B6 B7' /* B0 */ T = T ' 17E B9 BA BB 152 153 178 BF' /* B8 */ do N = 192 to 255 ; T = T d2x( N ) ; end N /* C0-FF */ end /* -------------------------- */ when PAGE = 1252 then do /* windows-1252 */ T = T '20AC 81 201A 192 201E 2026 2020 2021' /* 80 */ T = T ' 2C6 2030 160 2039 152 8D 17D 8F' /* 88 */ T = T ' 90 2018 2019 201C 201D 2022 2013 2014' /* 90 */ T = T ' 2DC 2122 161 203A 153 9D 17E 17F' /* 98 */ do N = 160 to 255 ; T = T d2x( N ) ; end N /* A0-FF */ end /* -------------------------- */ when PAGE = 1257 then do /* windows-1257 (12 u+FFFD ?) */ T = T '20AC FFFD 201A FFFD 201E 2026 2020 2021' /* 80 */ T = T 'FFFD 2030 FFFD 2039 FFFD A8 2C7 B8' /* 88 */ T = T 'FFFD 2018 2019 201C 201D 2022 2013 2014' /* 90 */ T = T 'FFFD 2122 FFFD 203A FFFD AF 2DB FFFD' /* 98 */ T = T ' A0 FFFD A2 A3 A4 FFFD A6 A7' /* A0 */ T = T ' D8 A9 156 AB AC AD AE C6' /* A8 */ T = T ' B0 B1 B2 B3 B4 B5 B6 B7' /* B0 */ T = T ' F8 B9 157 BB BC BD BE E6' /* B8 */ T = T ' 104 12E 100 106 C4 C5 118 112' /* C0 */ T = T ' 10C C9 179 116 122 136 12A 13B' /* C8 */ T = T ' 160 143 145 D3 14C D5 D6 D7' /* D0 */ T = T ' 172 141 15A 16A DC 17B 17D DF' /* D8 */ T = T ' 105 12F 101 107 E4 E5 119 113' /* E0 */ T = T ' 10D E9 17A 117 123 137 12B 13C' /* E8 */ T = T ' 161 144 146 F3 14D F5 F6 F7' /* F0 */ T = T ' 173 142 15B 16B FC 17C 17E 2D9' /* F8 */ end /* -------------------------- */ when PAGE = 'MAC' then do /* Macintosh */ T = T ' C4 C5 C7 C9 D1 D6 DC E1' /* 80 */ T = T ' E0 E2 E4 E3 E5 E7 E9 E8' /* 88 */ T = T ' EA EB ED EC EE EF F1 F3' /* 90 */ T = T ' F2 F4 F6 F5 FA F9 FB FC' /* 98 */ T = T '2020 B0 A2 A3 A7 2022 B6 DF' /* A0 */ T = T ' AE A9 2122 B4 A8 2260 C6 D8' /* A8 */ T = T '221E B1 2264 2265 A5 B5 2202 2211' /* B0 */ T = T '220F 3C0 222B AA BA 3A9 E6 F8' /* B8 */ T = T ' BF A1 AC 221A 192 2248 2206 AB' /* C0 */ T = T ' BB 2026 A0 C0 C3 D5 152 153' /* C8 */ T = T '2013 2014 201C 201D 2018 2019 F7 25CA' /* D0 */ T = T ' FF 178 2044 20AC 2039 203A FB01 FB02' /* D8 */ T = T '2021 B7 201A 201E 2030 C2 CA C1' /* E0 */ T = T ' CB C8 CD CE CF CC D3 D4' /* E8 */ T = T 'F8FF D2 DA DB D9 131 2C6 2DC' /* F0 */ T = T ' AF 2D8 2D9 2DA B8 2DD 2DB 2C7' /* F8 */ /* 0xBD old u+2126 Ohm : new u+03A9 Omega */ /* 0xDB old u+00A4 currency symbol : new u+20AC Euro */ /* 0xF0 old u+2665 black heart suit: new u+F8FF priv. */ end /* -------------------------- */ end /* otherwise force REXX error 40 */ do N = 1 to words( T ) SRC = SRC || x2c( right( word( T, N ), 8, 0 )) end N SRC = STIR( SRC, 1 ) ; TEST.CASE.1 = length( SRC ) if CHECK( CASE, 1, SRC, /* == */ SRC ) then exit TRAP( CASE ) DST = UTF16O( SRC ) ; TEST.CASE.2 = length( DST ) if CHECK( CASE, 2, SRC, UTF16I( DST )) then exit TRAP( CASE ) DST = UTF32I( SRC ) ; TEST.CASE.3 = length( DST ) if CHECK( CASE, 3, SRC, UTF32O( DST )) then exit TRAP( CASE ) DST = UTF7.O( SRC ) ; TEST.CASE.4 = length( DST ) if CHECK( CASE, 4, SRC, UTF7.I( DST )) then exit TRAP( CASE ) DST = UTF4.O( SRC ) ; TEST.CASE.5 = length( DST ) if CHECK( CASE, 5, SRC, UTF4.I( DST )) then exit TRAP( CASE ) DST = UTF1.O( SRC ) ; TEST.CASE.6 = length( DST ) if CHECK( CASE, 6, SRC, UTF1.I( DST )) then exit TRAP( CASE ) DST = BOCU.O( SRC ) ; TEST.CASE.7 = length( DST ) if CHECK( CASE, 7, SRC, BOCU.I( DST )) then exit TRAP( CASE ) end return MES.1: procedure expose TEST. /* table of 335 MES-1 characters */ MES = x2c( '0000FEFF' ) ; S.0 = 0020 ; E.0 = 007E S.1 = 00A0 ; E.1 = 0113 ; S.2 = 0116 ; E.2 = 012B S.3 = 012E ; E.3 = 014D ; S.4 = 0150 ; E.4 = 017E S.5 = 02C7 ; E.5 = 02C7 ; S.6 = 02D8 ; E.6 = 02DB S.7 = 02DD ; E.7 = 02DD ; S.8 = 2015 ; E.8 = 2015 S.9 = 2018 ; E.9 = 2019 ; S.10 = 201C ; E.10 = 201D S.11 = 20AC ; E.11 = 20AC ; S.12 = 2122 ; E.12 = 2122 S.13 = 2126 ; E.13 = 2126 ; S.14 = 215B ; E.14 = 215E S.15 = 2190 ; E.15 = 2193 ; S.16 = 266A ; E.16 = 266A do N = 0 to 16 do U = x2d( S.N ) to x2d( E.N ) MES = MES || x2c( d2x( U, 8 )) end U end N SRC = STIR( MES, 21 ) /* ----------------------------- */ TEST.1.1 = left( SRC, 4 ) ; TEST.2.1 = length( SRC ) if CHECK( 2, 1, SRC, /* == */ SRC ) then exit TRAP( 'failure' ) DST = UTF16O( SRC ) /* ----------------------------- */ TEST.1.2 = left( DST, 2 ) ; TEST.2.2 = length( DST ) if CHECK( 2, 2, SRC, UTF16I( DST )) then exit TRAP( 'failure' ) DST = UTF32I( SRC ) /* ----------------------------- */ TEST.1.3 = left( DST, 3 ) ; TEST.2.3 = length( DST ) if CHECK( 2, 3, SRC, UTF32O( DST )) then exit TRAP( 'failure' ) DST = UTF7.O( SRC ) /* ----------------------------- */ TEST.1.4 = left( DST, 5 ) ; TEST.2.4 = length( DST ) if CHECK( 2, 4, SRC, UTF7.I( DST )) then exit TRAP( 'failure' ) DST = UTF4.O( SRC ) /* ----------------------------- */ TEST.1.5 = left( DST, 5 ) ; TEST.2.5 = length( DST ) if CHECK( 2, 5, SRC, UTF4.I( DST )) then exit TRAP( 'failure' ) DST = UTF1.O( SRC ) /* ----------------------------- */ TEST.1.6 = left( DST, 3 ) ; TEST.2.6 = length( DST ) if CHECK( 2, 6, SRC, UTF1.I( DST )) then exit TRAP( 'failure' ) DST = BOCU.O( SRC ) /* ----------------------------- */ TEST.1.7 = left( DST, 3 ) ; TEST.2.7 = length( DST ) if CHECK( 2, 7, SRC, BOCU.I( DST )) then exit TRAP( 'failure' ) if 0 then return /* if 1 skip slow BOCU-1 details */ W. = 0 ; W.3 = copies( 9, digits()) do N = 1 to 128 SRC = STIR( MES, N ) ; DST = BOCU.O( SRC ) LEN = length( DST ) if SRC \== BOCU.I( DST ) then exit TRAP( CHECK( 0, 7, SRC, BOCU.I( DST ))) if W.1 < LEN then do ; W.1 = LEN ; W.2 = N ; end if W.3 > LEN then do ; W.3 = LEN ; W.4 = N ; end end N say 'B(' || right( W.2, 3 ) || ') ' W.1 say 'B(' || right( W.4, 3 ) || ') ' W.3 return 0 STIR: procedure /* stir SRC sequence for BOCU-1: */ parse arg SRC, GAP POS = 4 * ( length( SRC ) / 4 - GAP ) DST = '' /* move n-th character from SRC */ do N = 1 while SRC <> '' /* to DST (for realistic BOCU-1) */ POS = ( POS + 4 * GAP ) // length( SRC ) DST = DST || substr( SRC, POS + 1, 4 ) SRC = left( SRC, POS ) || substr( SRC, POS + 5 ) end N return DST CHECK: procedure expose TEST. /* progress (or error) indicator */ parse arg CASE, N, WANT, GOT ; LINE = TEST.0.N if TEST.1.N <> 0 then LINE = LINE left( c2x( TEST.1.N ), 10 ) if GOT == WANT then do do L = 2 to 8 if TEST.L.N <> 0 then LINE = LINE right( TEST.L.N, 4 ) end L if TEST.9.N <> 0 then LINE = LINE right( TEST.9.N, 8 ) L = 0 end else do LINE = LINE 'error in case' CASE do L = 1 to length( WANT ) by 4 if substr( WANT, L, 4 ) \== substr( GOT, L, 4 ) then do LINE = LINE 'wanted' c2x( substr( WANT, L, 4 )) LINE = LINE 'but got' c2x( substr( GOT, L, 4 )) leave L end end L L = 1 end say LINE ; return L UTF32I: procedure /* UTF-32BE to UTF-8 encoder */ parse arg SRC ; DST = '' do while 4 <= length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC CHR 5 SRC ; CHR = c2d( CHR ) if CHR <= 127 then do DST = DST || d2c( CHR ) ; iterate end BIN = reverse( x2b( d2x( CHR ))) CHR = '' do LEN = 2 until verify( substr( BIN, 8 - LEN ), 0 ) = 0 CHR = CHR || left( BIN, 6, 0 ) || 01 BIN = substr( BIN, 7 ) end LEN BIN = CHR || left( BIN, 8 - LEN, 0 ) || copies( 1, LEN ) DST = DST || x2c( b2x( reverse( BIN ))) end if sign( length( SRC )) then DST = DST || SUB return DST UTF32O: procedure /* UTF-8 to UTF-32BE decoder */ U.2 = xrange( x2c( '80' ), x2c( 'BF' )) SUB = x2c( '0000FFFD' ) ; DST = '' parse arg SRC ; LOS = length( SRC ) do while LOS > 0 parse var SRC LB 2 SRC ; LOS = LOS - 1 LB = c2d( LB ) ; TOP = 0 if LB < 128 then do DST = DST || x2c( d2x( LB, 8 )) ; iterate end if LOS > 0 then TOP = c2d( left( SRC, 1 )) % 16 select /* for CESU remove both LB = 237 */ when LB < 192 then LEN = -0 /* trail bytes */ when LB < 194 then LEN = -1 /* bad C0 + C1 */ when LB < 224 then LEN = +1 when LB = 224 & TOP = 8 then LEN = -2 /* E08x is bad */ when LB = 224 & TOP = 9 then LEN = -2 /* E09x is bad */ when LB = 237 & TOP = 10 then LEN = -2 /* EDAx is bad */ when LB = 237 & TOP = 11 then LEN = -2 /* EDBx is bad */ when LB < 240 then LEN = +2 when LB = 240 & TOP = 8 then LEN = -3 /* F08x is bad */ when LB < 244 then LEN = +3 when LB = 244 & TOP = 8 then LEN = +3 /* F48x is ok. */ when LB < 248 then LEN = -3 /* bad F4 - F7 */ when LB < 252 then LEN = -4 /* bad F8 - FB */ when LB < 254 then LEN = -5 /* bad FC + FD */ otherwise LEN = -0 /* bad FE + FF */ end BAD = ( LEN <= 0 ) ; LEN = abs( LEN ) if LOS < LEN then do BAD = 1 ; LEN = LOS end TOP = left( SRC, LEN ) ; SRC = substr( SRC, LEN + 1 ) TMP = verify( TOP, U.2 ) ; LOS = LOS - LEN if TMP > 0 then do /* eat plausible trailing bytes: */ BAD = 1 ; SRC = substr( TOP, TMP ) || SRC LOS = length( SRC ) /* but keep possible valid input */ end /* bytes for the next iteration */ if BAD = 0 then do /* at this point input is valid: */ LB = x2b( d2x( LB )) ; LEN = verify( LB, 1 ) - 2 LB = copies( 0, LEN ) || right( LB, 6 - LEN ) do until TOP == '' TMP = x2b( c2x( left( TOP, 1 ))) LB = LB || right( TMP, 6 ) TOP = substr( TOP, 2 ) end TOP = b2x( strip( LB, 'L', 0 )) DST = DST || x2c( right( TOP, 8, 0 )) end else DST = DST || SUB end return DST UTF16I: procedure /* UTF-16BE to UTF-32BE decoder */ parse arg SRC ; LO = x2d( 'D800' ) DST = '' ; HI = x2d( 'DC00' ) do while 2 <= length( SRC ) /* next UTF-16 or low surrogate */ parse var SRC L 3 SRC ; L = c2d( L ) select when LO > L then DST = DST || x2c( d2x( L, 8 )) when 57344 <= L then DST = DST || x2c( d2x( L, 8 )) when HI <= L then DST = DST || x2c( '0000FFFD' ) when length( SRC ) < 2 then SRC = '?' otherwise /* length < 2: no high surrogate */ L = L - LO + 64 ; parse var SRC R 3 SRC R = c2d( R ) - HI if 0 <= R & R < 57344 - HI then DST = DST || x2c( d2x( L * 1024 + R, 8 )) else DST = DST || x2c( '0000FFFD' ) end end if sign( length( SRC )) then DST = DST || x2c( '0000FFFD' ) return DST UTF16O: procedure /* UTF-32BE to UTF-16BE encoder */ parse arg SRC ; LO = x2d( 'D800' ) DST = '' ; HI = x2d( 'DC00' ) do while 4 <= length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC L 3 R 5 SRC ; L = c2d( L ) - 1 if L < 0 | 15 < L then do if 15 < L then R = x2c( 'FFFD' ) DST = DST || R ; iterate end R = c2d( R ) ; L = L * 64 + R % 1024 R = R // 1024 ; L = x2c( d2x( LO + L, 4 )) R = x2c( d2x( HI + R, 4 )) ; DST = DST || L || R end if sign( length( SRC )) then DST = DST || x2c( 'FFFD' ) return DST UTF7.I: procedure /* UTF-7 to UTF-32BE decoder */ B64 = 'abcdefghijklmnopqrstuvwxyz' B64 = translate( B64 ) || B64 || '0123456789+/' parse arg SRC ; DST = '' do while length( SRC ) > 0 parse var SRC TOP 2 SRC if TOP <> '+' then do DST = DST || right( TOP, 4, x2c( 0 )) iterate end if abbrev( SRC, '-' ) then do SRC = substr( SRC, 2 ) ; DST = DST || x2c( '0000002B' ) iterate /* decode '+-' as '+' = u+002B */ end TMP = verify( SRC || '-', B64 ) if TMP = 1 then do /* '+' before non-B64 is invalid */ DST = DST || x2c( '0000FFFD' ) iterate end TOP = left( SRC, TMP - 1 ) ; SRC = substr( SRC, TMP ) TMP = '' do until TOP == '' /* decode B64 chars.s after '+' */ parse var TOP POS 2 TOP POS = d2x( pos( POS, B64 ) - 1 ) TMP = TMP || right( x2b( POS ), 6, 0 ) end POS = length( TMP ) /* RFC 1642 UTF-7 B64 has no pad */ TOP = x2c( b2x( left( TMP, POS - POS // 8 ))) ERR = ( POS // 8 = 6 ) ; POS = length( TOP ) if POS // 2 then do /* note extraneous B64 char. ERR */ ERR = 1 ; TOP = left( TOP, POS - 1 ) end /* note odd number of octets ERR */ do until TOP == '' /* process even number of octets */ parse var TOP TMP 3 TOP ; TMP = c2d( TMP ) select when TMP < 55296 then DST = DST || x2c( d2x( TMP, 8 )) when 57343 < TMP then DST = DST || x2c( d2x( TMP, 8 )) when 56319 < TMP then DST = DST || x2c( '0000FFFD' ) when TOP == '' then DST = DST || x2c( '0000FFFD' ) otherwise /* got low surrogate, handle U32 */ POS = 1024 * ( TMP - 55296 + 64 ) parse var TOP TMP 3 TOP TMP = c2d( TMP ) - 56320 if 0 <= TMP & TMP < 1024 then DST = DST || x2c( d2x( POS + TMP, 8 )) else DST = DST || x2c( '0000FFFD' ) end /* no high surrogate: use u+FFFD */ end /* after B64 problems add u+FFFD */ if ERR then DST = DST || x2c( '0000FFFD' ) if abbrev( SRC, '-' ) then SRC = substr( SRC, 2 ) end return DST UTF7.O: procedure /* UTF-32BE to UTF-7 encoder */ B64 = 'abcdefghijklmnopqrstuvwxyz' B64 = translate( B64 ) || B64 || '0123456789+/' parse arg SRC ; DST = '' B16 = '' do while 4 <= length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC CHR 5 SRC ; TMP = c2d( CHR ) select /* special cases '\', '~', etc. */ when TMP <= 8 then CHR = right( CHR, 2 ) when TMP <= 10 then CHR = right( CHR, 1 ) when TMP <= 12 then CHR = right( CHR, 2 ) when TMP = 13 then CHR = right( CHR, 1 ) when TMP <= 31 then CHR = right( CHR, 2 ) when TMP <= 91 then CHR = right( CHR, 1 ) when TMP = 92 then CHR = right( CHR, 2 ) when TMP <= 125 then CHR = right( CHR, 1 ) when TMP < 55296 then CHR = right( CHR, 2 ) when TMP < 57344 then CHR = x2c( 'FFFD' ) when TMP < 65536 then CHR = right( CHR, 2 ) otherwise CHR = UTF16O( CHR ) end if length( CHR ) > 1 then do B16 = B16 || CHR ; TMP = length( SRC ) select /* collect UTF-16 in buffer B16 */ when 4 <= TMP then iterate when 1 <= TMP then B16 = B16 || x2c( 'FFFD' ) otherwise nop /* but output B16 at end of SRC, */ end /* CHR = '' triggers sanity '-' */ SRC = '' ; CHR = '' end if B16 \== '' then do /* output B16 before UTF-7 ASCII */ DST = DST || '+' /* '+' (excl. '+-') starts a B64 */ B16 = x2b( c2x( B16 )) /* '-' or non-B64 terminates B64 */ TMP = ( length( B16 ) / 4 ) // 3 B16 = B16 || copies( '00', TMP ) do while B16 <> '' parse var B16 N 7 B16 ; N = x2d( b2x( N )) DST = DST || substr( B64, N + 1, 1 ) end /* add '-' also if next is a '-' */ if sign( pos( CHR, B64 )) | abbrev( '-', CHR ) then DST = DST || '-' end if CHR = '+' then DST = DST || '+-' else DST = DST || CHR end if sign( length( SRC )) then DST = DST || '+//0-' return DST UTF4.I: procedure /* UTF-4 to UTF-32BE decoder */ parse arg SRC ; DST = '' do while length( SRC ) > 0 parse var SRC LB 2 SRC ; LB = c2d( LB ) if LB <= 127 | ( 160 <= LB & LB <= 255 ) then do DST = DST || x2c( d2x( LB, 8 )) ; iterate end LB = LB - 128 ; CHR = 0 NX = c2x( left( SRC, LB )) ; SRC = substr( SRC, LB + 1 ) do N = 2 to 2 * LB by 2 CHR = CHR * 16 + x2d( substr( NX, N, 1 )) end N DST = DST || x2c( d2x( CHR, 8 )) end return DST UTF4.O: procedure /* UTF-32BE to UTF-4 encoder */ parse arg SRC ; DST = '' do while 4 <= length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC CHR 5 SRC ; CHR = c2d( CHR ) if CHR <= 127 | ( 160 <= CHR & CHR <= 255 ) then do DST = DST || d2c( CHR ) ; iterate end CHR = d2x( CHR ) ; LEN = length( CHR ) DST = DST || d2c( 128 + LEN ) do N = 1 to LEN DST = DST || d2c( 144 + x2d( substr( CHR, N, 1 ))) end N end if sign( length( SRC )) then DST = DST || x2c( '948F8F8F8D' ) return DST UTF1.I: procedure /* UTF-1 to UTF-32BE decoder */ parse arg SRC ; W.1 = x2d( 4016 ) DST = '' ; W.2 = x2d( 38E2E ) do while length( SRC ) > 0 parse var SRC LB 2 SRC ; LB = c2d( LB ) if LB < 160 then do DST = DST || x2c( d2x( LB, 8 )) ; iterate end select when LB = 160 then do T = 1 ; CHR = 66 ; LB = 0 end when LB < 246 then do T = 1 ; CHR = 256 ; LB = LB - 161 end when LB < 252 then do T = 2 ; CHR = W.1 ; LB = LB - 246 end otherwise T = 4 ; CHR = W.2 ; LB = LB - 252 end CHR = CHR + LB * ( 190 ** T ) do N = T - 1 to 0 by -1 parse var SRC LB 2 SRC ; L = c2d( LB ) select /* accept trailing 21..7E/A0..FF */ when 160 <= L then L = L - 66 when 33 <= L & L < 127 then L = L - 33 otherwise /* reject trailing 00..20/7F..9F */ CHR = x2d( 'FFFD' ) SRC = LB || SRC ; leave N end CHR = CHR + L * ( 190 ** N ) end N select /* accept A0..D7FF, E000..10FFFF */ when 1114111 < CHR then DST = DST || x2c( '0000FFFD' ) when 57344 <= CHR then DST = DST || x2c( d2x( CHR, 8 )) when 55296 <= CHR then DST = DST || x2c( '0000FFFD' ) when 160 <= CHR then DST = DST || x2c( d2x( CHR, 8 )) otherwise DST = DST || x2c( '0000FFFD' ) end /* 7F..9F never arrive here, but */ end /* 00..7E (bad UTF-1 A021..A07E) */ return DST UTF1.O: procedure /* UTF-32BE to UTF-1 encoder */ parse arg SRC ; W.1 = x2d( 4016 ) DST = '' ; W.2 = x2d( 38E2E ) do while 4 <= length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC CHR 5 SRC ; CHR = c2d( CHR ) if CHR < 256 then do if 160 <= CHR then DST = DST || d2c( 160 ) DST = DST || d2c( CHR ) ; iterate end select when CHR < W.1 then do T = 1 ; L = 161 ; CHR = CHR - 256 end when CHR < W.2 then do T = 2 ; L = 246 ; CHR = CHR - W.1 end otherwise T = 4 ; L = 252 ; CHR = CHR - W.2 end DST = DST || d2c( L + CHR % ( 190 ** T )) do N = T - 1 to 0 by -1 /* trailing bytes 21..7E, A0..FF */ L = ( CHR % ( 190 ** N )) // 190 if L < 94 then DST = DST || d2c( L + 33 ) else DST = DST || d2c( L + 66 ) end N end if sign( length( SRC )) then DST = DST || x2c( 'F765AD' ) return DST BOCU.I: procedure /* BOCU-1 to UTF-32BE decoder */ parse arg SRC /* (TRAP if invalid trail bytes) */ PREV = 64 ; DST = '' /* (RD1) */ do while length( SRC ) > 0 parse var SRC LB 2 SRC ; LB = c2d( LB ) if LB <= 32 | LB = 255 then do /* (RD2) */ if LB <> 32 then PREV = 64 /* (RD3) */ if LB < 255 then DST = DST || x2c( d2x( LB, 8 )) iterate /* reset state if 255 here (RD6) */ end select /* single RD4 or multi-byte RD5: */ when 254 = LB then do T = 3 ; DIFF = 187660 ; LB = LB - 254 end when 251 <= LB then do T = 2 ; DIFF = 10513 ; LB = LB - 251 end when 208 <= LB then do T = 1 ; DIFF = 64 ; LB = LB - 208 end when 80 <= LB then do T = 0 ; DIFF = 0 ; LB = LB - 144 end when 37 <= LB then do T = 1 ; DIFF = -64 ; LB = LB - 80 end when 34 <= LB then do T = 2 ; DIFF = -10513 ; LB = LB - 37 end when 33 = LB then do T = 3 ; DIFF = -187660 ; LB = LB - 34 end end /* otherwise force REXX error 40 */ CHR = LB * ( 243 ** T ) + DIFF + PREV do N = 1 to T parse var SRC LB 2 SRC /* missing trail bytes => empty, */ LB = c2d( LB ) /* empty => 0, causes TRAP below */ select when 33 <= LB & LB < 256 then LB = LB - 13 when 28 <= LB & LB <= 31 then LB = LB - 12 when 16 <= LB & LB <= 25 then LB = LB - 10 when 1 <= LB & LB <= 6 then LB = LB - 1 otherwise exit TRAP( 'bad trail byte' x2d( LB )) end CHR = CHR + LB * ( 243 ** ( T - N )) end N DST = DST || x2c( d2x( CHR, 8 )) PREV = BOCU.5( CHR ) end return DST BOCU.O: procedure /* UTF-32BE to BOCU-1 encoder */ parse arg SRC PREV = 64 ; DST = '' /* R1 */ do while 0 < length( SRC ) /* split next UTF-32BE from SRC */ parse var SRC CHR 5 SRC if length( CHR ) = 4 then CHR = c2d( CHR ) else CHR = x2d( 'FFFD' ) if CHR <= 32 then do /* C0 control or space, R2 or R3 */ if CHR < 32 then PREV = 64 /* R3 */ DST = DST || d2c( CHR ) ; iterate /* R2 */ end DIFF = CHR - PREV /* R4 */ PREV = BOCU.5( CHR ) /* R5 */ TAIL = '' /* R4.1 handled with R4.2 - R4.6 */ select /* R4.2 base LEAD bytes and R4.3 */ when 1114111 < DIFF then exit TRAP( 10FFFF ) when 187660 <= DIFF then do /* 3, FE, 2DD0C */ T = 3 ; DIFF = DIFF - 187660 ; LEAD = 254 end when 10513 <= DIFF then do /* 2, FB, 2911 */ T = 2 ; DIFF = DIFF - 10513 ; LEAD = 251 end when 64 <= DIFF then do /* 1, D0, 64 */ T = 1 ; DIFF = DIFF - 64 ; LEAD = 208 end when -64 <= DIFF then do /* 0, 90, 0 */ T = 0 ; LEAD = 144 end when -10513 <= DIFF then do /* 1, 50, -64 */ T = 1 ; DIFF = DIFF + 64 ; LEAD = 80 end when -187660 <= DIFF then do /* 2, 25, -2911 */ T = 2 ; DIFF = DIFF + 10513 ; LEAD = 37 end when -1114111 <= DIFF then do /* 3, 22, -2DDC0 */ T = 3 ; DIFF = DIFF + 187660 ; LEAD = 34 end end /* otherwise force REXX error 40 */ do N = 1 to T /* determine trail bytes (R4.4): */ M = DIFF // 243 ; DIFF = DIFF % 243 if M < 0 then do /* non-negative modulo (R4.4a) */ M = M + 243 ; DIFF = DIFF - 1 end select /* avoid 00, 07..0F, 1A..1B, 20: */ when M <= 5 then TAIL = d2c( M + 1 ) || TAIL when M <= 15 then TAIL = d2c( M + 10 ) || TAIL when M <= 19 then TAIL = d2c( M + 12 ) || TAIL when M <= 242 then TAIL = d2c( M + 13 ) || TAIL end /* otherwise force REXX error 40 */ end N DST = DST || d2c( LEAD + DIFF ) || TAIL /* R4.5 and R4.6 */ end return DST BOCU.5: /* Hiragana etc. not tested here */ select when arg( 1 ) < x2d( '3040' ) then nop /* Hiragana (R5a) */ when arg( 1 ) < x2d( '30A0' ) then return x2d( '3070' ) when arg( 1 ) < x2d( '4E00' ) then nop /* Unihan (R5b) */ when arg( 1 ) < x2d( '9FA6' ) then return x2d( '7711' ) when arg( 1 ) < x2d( 'AC00' ) then nop /* Hangul (R5c) */ when arg( 1 ) < x2d( 'D7A4' ) then return x2d( 'C1D1' ) otherwise nop end return arg( 1 ) - ( arg( 1 ) // 128 ) + 64 /* middle of page */ /* see , (c) F. Ellermann */ TRAP: /* select REXX exception handler */ call trace 'O' ; trace N /* don't trace interactive */ parse source TRAP /* source on separate line */ TRAP = x2c( 0D ) || right( '+++', 10 ) TRAP || x2c( 0D0A ) TRAP = TRAP || right( '+++', 10 ) /* = standard trace prefix */ TRAP = TRAP strip( condition( 'c' ) 'trap:' condition( 'd' )) select when wordpos( condition( 'c' ), 'ERROR FAILURE' ) > 0 then do if condition( 'd' ) > '' /* need an additional line */ then TRAP = TRAP || x2c( 0D0A ) || right( '+++', 10 ) TRAP = TRAP '(RC' rc || ')' /* any system error codes */ if condition( 'c' ) = 'FAILURE' then rc = -3 end when wordpos( condition( 'c' ), 'HALT SYNTAX' ) > 0 then do if condition( 'c' ) = 'HALT' then rc = 4 if condition( 'd' ) > '' & condition( 'd' ) <> rc then do if condition( 'd' ) <> errortext( rc ) then do TRAP = TRAP || x2c( 0D0A ) || right( '+++', 10 ) TRAP = TRAP errortext( rc ) end /* future condition( 'd' ) */ end /* may use errortext( rc ) */ else TRAP = TRAP errortext( rc ) rc = -rc /* rc < 0: REXX error code */ end when condition( 'c' ) = 'NOVALUE' then rc = -2 /* dubious */ when condition( 'c' ) = 'NOTREADY' then rc = -1 /* dubious */ otherwise /* force non-zero whole rc */ if datatype( value( 'RC' ), 'W' ) = 0 then rc = 1 if rc = 0 then rc = 1 if condition() = '' then TRAP = TRAP arg( 1 ) end /* direct: TRAP( message ) */ TRAP = TRAP || x2c( 0D0A ) || format( sigl, 6 ) signal on syntax name TRAP.SIGL /* throw syntax error 3... */ if 0 < sigl & sigl <= sourceline() /* if no handle for source */ then TRAP = TRAP '*-*' strip( sourceline( sigl )) else TRAP = TRAP '+++ (source line unavailable)' TRAP.SIGL: /* ...catch syntax error 3 */ if abbrev( right( TRAP, 2 + 6 ), x2c( 0D0A )) then do TRAP = TRAP '+++ (source line unreadable)' ; rc = -rc end select when 0 then do /* in pipes STDERR: output */ parse version TRAP.REXX . . /* REXX/Personal: \dev\con */ signal on syntax name TRAP.FAIL if TRAP.REXX = 'REXXSAA' /* fails if no more handle */ then call lineout 'STDERR' , TRAP else call lineout '\dev\con', TRAP end when 0 then do /* OS/2 PM: RxMessageBox() */ signal on syntax name TRAP.FAIL call RxMessageBox , /* fails if not in PMREXX */ translate( TRAP, ' ', x2c( 0D )), , 'CANCEL', 'WARNING' end /* replace any CR by blank */ otherwise say TRAP ; trace ?L /* interactive Label trace */ end if condition() = 'SIGNAL' then signal TRAP.EXIT TRAP.CALL: return rc /* continue after CALL ON */ TRAP.FAIL: say TRAP ; rc = 0 - rc /* force TRAP error output */ TRAP.EXIT: exit rc /* exit for any SIGNAL ON */