1+ //
2+ // SocketFixUTF8.swift
3+ // Socket.IO-Swift
4+ //
5+ // Created by Erik Little on 3/16/15.
6+ // Permission is hereby granted, free of charge, to any person obtaining a copy
7+ // of this software and associated documentation files (the "Software"), to deal
8+ // in the Software without restriction, including without limitation the rights
9+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+ // copies of the Software, and to permit persons to whom the Software is
11+ // furnished to do so, subject to the following conditions:
12+ //
13+ // The above copyright notice and this permission notice shall be included in
14+ // all copies or substantial portions of the Software.
15+ //
16+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+ // THE SOFTWARE.
23+ //
24+ // Adapted from: https://github.com/durbrow/fix-double-utf8.swift
25+
26+ import Foundation
27+
28+ var memoizer = [ String: UnicodeScalar] ( )
29+
30+ func lookup( base: UnicodeScalar , combi: UnicodeScalar ) -> UnicodeScalar {
31+ let combined = " \( base) \( combi) "
32+
33+ if let y = memoizer [ combined] {
34+ return y
35+ }
36+
37+ for i in 0x80 ... 0xFF {
38+ let ch = UnicodeScalar ( i)
39+
40+ if String ( ch) == combined {
41+ memoizer [ combined] = ch
42+ return ch
43+ }
44+ }
45+ let ch = UnicodeScalar ( 0xFFFD ) // Unicode replacement character �
46+
47+ memoizer [ combined] = ch
48+ return ch
49+ }
50+
51+ func fixDoubleUTF8( inout name: String ) {
52+ var isASCII = true
53+ var y = [ UInt8] ( )
54+
55+ for ch in name. unicodeScalars {
56+ if ch. value < 0x80 {
57+ y. append ( UInt8 ( ch) )
58+ continue
59+ }
60+ isASCII = false
61+
62+ if ch. value < 0x100 {
63+ y. append ( UInt8 ( ch) )
64+ continue
65+ }
66+ // might be a combining character that when combined with the
67+ // preceeding character maps to a codepoint in the UTF8 range
68+ if y. count == 0 {
69+ return
70+ }
71+
72+ let last = y. removeLast ( )
73+ let repl = lookup ( UnicodeScalar ( last) , ch)
74+
75+ // the replacement needs to be in the UTF8 range
76+ if repl. value >= 0x100 {
77+ return
78+ }
79+
80+ y. append ( UInt8 ( repl) )
81+ }
82+
83+ if isASCII {
84+ return
85+ }
86+
87+ y. append ( 0 ) // null terminator
88+
89+ return y. withUnsafeBufferPointer {
90+ let cstr = UnsafePointer < CChar > ( $0. baseAddress) // typecase from uint8_t * to char *
91+ let rslt = String . fromCStringRepairingIllFormedUTF8 ( cstr) // -> (String, Bool)
92+ if let str = rslt. 0 {
93+ if !rslt. hadError {
94+ name = str
95+ }
96+ }
97+
98+ return
99+ }
100+ }
0 commit comments