11module QC
2- # Copyright 2011 Keith Rarick
2+ # encoding: UTF-8
3+ #
4+ # Copyright 2011, 2012 Keith Rarick
35#
46# Permission is hereby granted, free of charge, to any person obtaining a copy
57# of this software and associated documentation files (the "Software"), to deal
@@ -261,6 +263,12 @@ def abbrev(s)
261263 def unquote ( q )
262264 q = q [ 1 ...-1 ]
263265 a = q . dup # allocate a big enough string
266+ rubydoesenc = false
267+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
268+ if a . class . method_defined? ( :force_encoding )
269+ a . force_encoding ( 'UTF-8' )
270+ rubydoesenc = true
271+ end
264272 r , w = 0 , 0
265273 while r < q . length
266274 c = q [ r ]
@@ -298,7 +306,12 @@ def unquote(q)
298306 end
299307 end
300308 end
301- w += ucharenc ( a , w , uchar )
309+ if rubydoesenc
310+ a [ w ] = '' << uchar
311+ w += 1
312+ else
313+ w += ucharenc ( a , w , uchar )
314+ end
302315 else
303316 raise Error , "invalid escape char #{ q [ r ] } in \" #{ q } \" "
304317 end
@@ -308,6 +321,8 @@ def unquote(q)
308321 # Copy anything else byte-for-byte.
309322 # Valid UTF-8 will remain valid UTF-8.
310323 # Invalid UTF-8 will remain invalid UTF-8.
324+ # In ruby >= 1.9, c is a codepoint, not a byte,
325+ # in which case this is still what we want.
311326 a [ w ] = c
312327 r += 1
313328 w += 1
@@ -442,6 +457,10 @@ def strenc(s)
442457 t = StringIO . new
443458 t . putc ( ?")
444459 r = 0
460+
461+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
462+ rubydoesenc = s . class . method_defined? ( :encoding )
463+
445464 while r < s . length
446465 case s [ r ]
447466 when ?" then t . print ( '\\"' )
@@ -456,21 +475,13 @@ def strenc(s)
456475 case true
457476 when Spc <= c && c <= ?~
458477 t . putc ( c )
459- when true
478+ when rubydoesenc
479+ u = c . ord
480+ surrenc ( t , u )
481+ else
460482 u , size = uchardec ( s , r )
461483 r += size - 1 # we add one more at the bottom of the loop
462- if u < 0x10000
463- t . print ( '\\u' )
464- hexenc4 ( t , u )
465- else
466- u1 , u2 = unsubst ( u )
467- t . print ( '\\u' )
468- hexenc4 ( t , u1 )
469- t . print ( '\\u' )
470- hexenc4 ( t , u2 )
471- end
472- else
473- # invalid byte; skip it
484+ surrenc ( t , u )
474485 end
475486 end
476487 r += 1
@@ -480,6 +491,20 @@ def strenc(s)
480491 end
481492
482493
494+ def surrenc ( t , u )
495+ if u < 0x10000
496+ t . print ( '\\u' )
497+ hexenc4 ( t , u )
498+ else
499+ u1 , u2 = unsubst ( u )
500+ t . print ( '\\u' )
501+ hexenc4 ( t , u1 )
502+ t . print ( '\\u' )
503+ hexenc4 ( t , u2 )
504+ end
505+ end
506+
507+
483508 def hexenc4 ( t , u )
484509 t . putc ( Hex [ ( u >>12 ) &0xf ] )
485510 t . putc ( Hex [ ( u >>8 ) &0xf ] )
0 commit comments