8000 encoding, refactor: use icu built-in encoding for iconv if icu built-… · fibjs/fibjs@4cb10eb · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Commit 4cb10eb

Browse files
committed
encoding, refactor: use icu built-in encoding for iconv if icu built-in encoding.
1 parent 5e16d74 commit 4cb10eb

File tree

2 files changed

+175
-103
lines changed

2 files changed

+175
-103
lines changed

fibjs/src/encoding/encoding_iconv.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static size_t iconv(iconv_t cd, const char** inbuf, size_t* inbytesleft,
4747
#include "object.h"
4848
#include "encoding_iconv.h"
4949
#include "ifs/encoding.h"
50-
50+
#include <unicode/include/unicode/ucnv.h>
5151
namespace fibjs {
5252

5353
DECLARE_MODULE(iconv);
@@ -92,9 +92,25 @@ void encoding_iconv::open(const char* charset)
9292

9393
result_t encoding_iconv::encode(exlib::string data, exlib::string& retVal)
9494
{
95+
if (data.empty()) {
96+
retVal.clear();
97+
return 0;
98+
}
99+
95100
if (ucs_encode(data, retVal) == 0)
96101
return 0;
97102

103+
int32_t _sz;
104+
UErrorCode errorCode = U_ZERO_ERROR;
105+
106+
_sz = ucnv_convert(m_charset.c_str(), "utf-8", NULL, 0, data.c_str(), data.length(), &errorCode);
107+
if (_sz) {
108+
retVal.resize(_sz);
109+
errorCode = U_ZERO_ERROR;
110+
ucnv_convert(m_charset.c_str(), "utf-8", retVal.c_buffer(), _sz, data.c_str(), data.length(), &errorCode);
111+
return 0;
112+
}
113+
98114
if (!m_iconv_en) {
99115
m_iconv_en = iconv_open(m_charset.c_str(), "utf-8");
100116
if (m_iconv_en == (iconv_t)(-1)) {
@@ -135,6 +151,11 @@ result_t encoding_iconv::encode(exlib::string data, obj_ptr<Buffer_base>& retVal
135151

136152
result_t encoding_iconv::decode(const char* data, size_t sz, exlib::string& retVal)
137153
{
154+
if (sz == 0) {
155+
retVal.clear();
156+
return 0;
157+
}
158+
138159
if (ucs_decode(data, sz, retVal) == 0)
139160
return 0;
140161

@@ -146,6 +167,17 @@ result_t encoding_iconv::decode(const char* data, size_t sz, exlib::string& retV
146167
}
147168
}
148169

170+
int32_t _sz;
171+
UErrorCode errorCode = U_ZERO_ERROR;
172+
173+
_sz = ucnv_convert("utf-8", m_charset.c_str(), NULL, 0, data, sz, &errorCode);
174+
if (_sz) {
175+
retVal.resize(_sz);
176+
errorCode = U_ZERO_ERROR;
177+
ucnv_convert("utf-8", m_charset.c_str(), retVal.c_buffer(), _sz, data, sz, &errorCode);
178+
return 0;
179+
}
180+
149181
exlib::string strBuf;
150182

151183
strBuf.resize(sz * 2);
@@ -210,6 +242,13 @@ bool encoding_iconv::is_encoding(exlib::string charset)
210242
if (is_ucs_encoding(charset))
211243
return true;
212244

245+
UErrorCode err = U_ZERO_ERROR;
246+
UConverter* icu_ec = ucnv_open(charset.c_str(), &err);
247+
if (icu_ec) {
248+
ucnv_close(icu_ec);
249+
return true;
250+
}
251+
213252
void* iconv_ec = iconv_open(charset.c_str(), "utf-8");
214253
if (iconv_ec != (iconv_t)(-1)) {
215254
iconv_close((iconv_t)iconv_ec);

test/encoding_test.js

Lines changed: 135 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -328,111 +328,144 @@ describe('encoding', () => {
328328
}
329329
});
330330

331-
it('iconv ucs2', () => {
332-
for (var i = 0; i < 0xd800; i++) {
333-
var s = String.fromCharCode(i);
334-
var buf = iconv.encode('utf16le', s);
335-
var n = buf.readUInt16LE();
336-
assert.equal(i, n);
337-
assert.equal(iconv.decode('utf16le', buf), s);
338-
}
331+
describe("iconv", () => {
332+
it('ucs2', () => {
333+
for (var i = 0; i < 0xd800; i++) {
334+
var s = String.fromCharCode(i);
335+
var buf = iconv.encode('utf16le', s);
336+
var n = buf.readUInt16LE();
337+
assert.equal(i, n);
338+
assert.equal(iconv.decode('utf16le', buf), s);
339+
}
339340

340-
for (var i = 0; i < 0xd800; i++) {
341-
var s = String.fromCharCode(i);
342-
var buf = iconv.encode('utf16be', s);
343-
var n = buf.readUInt16BE();
344-
assert.equal(i, n);
345-
assert.equal(iconv.decode('utf16be', buf), s);
346-
}
341+
for (var i = 0; i < 0xd800; i++) {
342+
var s = String.fromCharCode(i);
343+
var buf = iconv.encode('utf16be', s);
344+
var n = buf.readUInt16BE();
345+
assert.equal(i, n);
346+
assert.equal(iconv.decode('utf16be', buf), s);
347+
}
347348

348-
assert.equal(new Buffer([0xc8]).toString(), '\ufffd');
349-
assert.equal(Buffer.from('3DD84DDC', 'hex').toString('utf16le'), '👍');
350-
});
349+
assert.equal(new Buffer([0xc8]).toString(), '\ufffd');
350+
assert.equal(Buffer.from('3DD84DDC', 'hex').toString('utf16le'), '👍');
351+
});
352+
353+
var datas = [
354+
[
355+
0x7f,
356+
"7f000000",
357+
"7f000000"
358+
],
359+
[
360+
0x80,
361+
"80000000",
362+
"80000000"
363+
],
364+
[
365+
0x7ff,
366+
"ff070000",
367+
"ff070000"
368+
],
369+
[
370+
0x800,
371+
"00080000",
372+
"00080000"
373+
],
374+
[
375+
0xffff,
376+
"ffff0000",
377+
"ffff0000"
378+
],
379+
[
380+
0x10000,
381+
"00000100",
382+
"00000100"
383+
],
384+
[
385+
0x10ffff,
386+
"ffff1000",
387+
"ffff1000"
388+
],
389+
[
390+
0x110000,
391+
"00001100",
392+
"00001100"
393+
],
394+
[
395+
0x1fffff,
396+
"ffff1f00",
397+
"ffff1f00"
398+
],
399+
[
400+
0x200000,
401+
"00002000",
402+
"00002000"
403+
],
404+
[
405+
0x3ffffff,
406+
"ffffff03",
407+
"bfff0000ffdf0000"
408+
],
409+
[
410+
0x4000000,
411+
"00000004",
412+
"c0ff000000dc0000"
413+
]
414+
];
351415

352-
var datas = [
353-
[
354-
0x7f,
355-
"7f000000",
356-
"7f000000"
357-
],
358-
[
359-
0x80,
360-
"80000000",
361-
"80000000"
362-
],
363-
[
364-
0x7ff,
365-
"ff070000",
366-
"ff070000"
367-
],
368-
[
369-
0x800,
370-
"00080000",
371-
"00080000"
372-
],
373-
[
374-
0xffff,
375-
"ffff0000",
376-
"ffff0000"
377-
],
378-
[
379-
0x10000,
380-
"00000100",
381-
"00000100"
382-
],
383-
[
384-
0x10ffff,
385-
"ffff1000",
386-
"ffff1000"
387-
],
388-
[
389-
0x110000,
390-
"00001100",
391-
"00001100"
392-
],
393-
[
394-
0x1fffff,
395-
"ffff1f00",
396-
"ffff1f00"
397-
],
398-
[
399-
0x200000,
400-
"00002000",
401-
"00002000"
402-
],
403-
[
404-
0x3ffffff,
405-
"ffffff03",
406-
"bfff0000ffdf0000"
407-
],
408-
[
409-
0x4000000,
410-
"00000004",
411-
"c0ff000000dc0000"
412-
]
413-
];
414-
415-
it('iconv ucs2 multi', () => {
416-
datas.forEach(d => {
417-
var buf = Buffer.alloc(4);
418-
buf.writeUInt32LE(d[0]);
419-
var s = iconv.decode('utf32le', buf);
420-
var buf2 = Buffer.alloc(s.length * 2);
421-
buf2.writeUInt16LE(s.charCodeAt(0));
422-
if (s.length > 1)
423-
buf2.writeUInt16LE(s.charCodeAt(1), 2);
424-
assert.equal(iconv.decode('utf16le', buf2), s);
425-
});
426-
});
416+
it('ucs2 multi', () => {
417+
datas.forEach(d => {
418+
var buf = Buffer.alloc(4);
419+
buf.writeUInt32LE(d[0]);
420+
var s = iconv.decode('utf32le', buf);
421+
var buf2 = Buffer.alloc(s.length * 2);
422+
buf2.writeUInt16LE(s.charCodeAt(0));
423+
if (s.length > 1)
424+
buf2.writeUInt16LE(s.charCodeAt(1), 2);
425+
assert.equal(iconv.decode('utf16le', buf2), s);
426+
});
427+
});
428+
429+
it('ucs4', () => {
430+
datas.forEach(d => {
431+
var buf = Buffer.alloc(4);
432+
buf.writeUInt32LE(d[0]);
433+
var s = iconv.decode('utf32le', buf);
434+
var buf1 = iconv.encode('utf32le', s);
435+
assert.deepEqual(buf.hex(), d[1]);
436+
assert.deepEqual(buf1.hex(), d[2]);
437+
});
438+
});
439+
440+
const builtin_datas = [
441+
{
442+
"name": "gbk",
443+
"text": "你好",
444+
"hex": "c4e3bac3"
445+
},
446+
{
447+
"name": "big5",
448+
"text": "你好",
449+
"hex": "a741a66e"
450+
},
451+
{
452+
"name": "shift_jis",
453+
"text": "こんにちは",
454+
"hex": "82b182f182c982bf82cd"
455+
},
456+
{
457+
"name": "euc-kr",
458+
"text": "안녕하세요",
459+
"hex": "bec8b3e7c7cfbcbcbfe4"
460+
}
461+
];
427462

428-
it('iconv ucs4', () => {
429-
datas.forEach(d => {
430-
var buf = Buffer.alloc(4);
431-
buf.writeUInt32LE(d[0]);
432-
var s = iconv.decode('utf32le', buf);
433-
var buf1 = iconv.encode('utf32le', s);
434-
assert.deepEqual(buf.hex(), d[1]);
435-
assert.deepEqual(buf1.hex(), d[2]);
463+
it("builtin codec", () => {
464+
for (var d of builtin_datas) {
465+
var buf = iconv.encode(d.name, d.text);
466+
assert.equal(buf.hex(), d.hex);
467+
assert.equal(iconv.decode(d.name, buf), d.text);
468+
}
436469
});
437470
});
438471

@@ -755,7 +788,7 @@ describe('encoding', () => {
755788
});
756789

757790
it('test for Map', () => {
758-
var tmp = {a: 12, b: [2, 3, 5], c: true};
791+
var tmp = { a: 12, b: [2, 3, 5], c: true };
759792
var map = new Map(Object.entries(tmp));
760793
assert.deepEqual(tmp, msgpack.decode(msgpack.encode(map)));
761794
assert.isObject(msgpack.decode(msgpack.encode(map)));

0 commit comments

Comments
 (0)
0