| 14 |
liveuser |
1 |
/*!
|
|
|
2 |
* UTF-8 Validation Code originally from:
|
|
|
3 |
* ws: a node.js websocket client
|
|
|
4 |
* Copyright(c) 2015 Einar Otto Stangvik <einaros@gmail.com>
|
|
|
5 |
* MIT Licensed
|
|
|
6 |
*/
|
|
|
7 |
|
|
|
8 |
#include <v8.h>
|
|
|
9 |
#include <node.h>
|
|
|
10 |
#include <node_version.h>
|
|
|
11 |
#include <node_buffer.h>
|
|
|
12 |
#include <node_object_wrap.h>
|
|
|
13 |
#include <stdlib.h>
|
|
|
14 |
#include <wchar.h>
|
|
|
15 |
#include <stdio.h>
|
|
|
16 |
#include "nan.h"
|
|
|
17 |
|
|
|
18 |
using namespace v8;
|
|
|
19 |
using namespace node;
|
|
|
20 |
|
|
|
21 |
#define UNI_SUR_HIGH_START (uint32_t) 0xD800
|
|
|
22 |
#define UNI_SUR_LOW_END (uint32_t) 0xDFFF
|
|
|
23 |
#define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
|
|
|
24 |
#define UNI_MAX_LEGAL_UTF32 (uint32_t) 0x0010FFFF
|
|
|
25 |
|
|
|
26 |
static const uint8_t trailingBytesForUTF8[256] = {
|
|
|
27 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
28 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
29 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
30 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
31 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
32 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
|
33 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
34 |
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
|
|
35 |
};
|
|
|
36 |
|
|
|
37 |
static const uint32_t offsetsFromUTF8[6] = {
|
|
|
38 |
0x00000000, 0x00003080, 0x000E2080,
|
|
|
39 |
0x03C82080, 0xFA082080, 0x82082080
|
|
|
40 |
};
|
|
|
41 |
|
|
|
42 |
static int isLegalUTF8(const uint8_t *source, const int length)
|
|
|
43 |
{
|
|
|
44 |
uint8_t a;
|
|
|
45 |
const uint8_t *srcptr = source+length;
|
|
|
46 |
switch (length) {
|
|
|
47 |
default: return 0;
|
|
|
48 |
/* Everything else falls through when "true"... */
|
|
|
49 |
/* RFC3629 makes 5 & 6 bytes UTF-8 illegal
|
|
|
50 |
case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
|
|
|
51 |
case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
|
|
|
52 |
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
|
|
|
53 |
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
|
|
|
54 |
case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
|
|
|
55 |
switch (*source) {
|
|
|
56 |
/* no fall-through in this inner switch */
|
|
|
57 |
case 0xE0: if (a < 0xA0) return 0; break;
|
|
|
58 |
case 0xED: if (a > 0x9F) return 0; break;
|
|
|
59 |
case 0xF0: if (a < 0x90) return 0; break;
|
|
|
60 |
case 0xF4: if (a > 0x8F) return 0; break;
|
|
|
61 |
default: if (a < 0x80) return 0;
|
|
|
62 |
}
|
|
|
63 |
|
|
|
64 |
case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
|
|
|
65 |
}
|
|
|
66 |
if (*source > 0xF4) return 0;
|
|
|
67 |
return 1;
|
|
|
68 |
}
|
|
|
69 |
|
|
|
70 |
int is_valid_utf8 (size_t len, char *value)
|
|
|
71 |
{
|
|
|
72 |
/* is the string valid UTF-8? */
|
|
|
73 |
for (unsigned int i = 0; i < len; i++) {
|
|
|
74 |
uint32_t ch = 0;
|
|
|
75 |
uint8_t extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
|
|
|
76 |
|
|
|
77 |
if (extrabytes + i >= len)
|
|
|
78 |
return 0;
|
|
|
79 |
|
|
|
80 |
if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
|
|
|
81 |
|
|
|
82 |
switch (extrabytes) {
|
|
|
83 |
case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
|
|
|
84 |
case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
|
|
|
85 |
case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
|
|
|
86 |
case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
|
|
|
87 |
case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
|
|
|
88 |
case 0 : ch += (uint8_t) value[i];
|
|
|
89 |
}
|
|
|
90 |
|
|
|
91 |
ch -= offsetsFromUTF8[extrabytes];
|
|
|
92 |
|
|
|
93 |
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
|
|
94 |
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
|
|
|
95 |
return 0;
|
|
|
96 |
} else {
|
|
|
97 |
return 0;
|
|
|
98 |
}
|
|
|
99 |
}
|
|
|
100 |
|
|
|
101 |
return 1;
|
|
|
102 |
}
|
|
|
103 |
|
|
|
104 |
class Validation : public ObjectWrap
|
|
|
105 |
{
|
|
|
106 |
public:
|
|
|
107 |
|
|
|
108 |
static void Initialize(v8::Handle<v8::Object> target)
|
|
|
109 |
{
|
|
|
110 |
Nan::HandleScope scope;
|
|
|
111 |
Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
|
|
|
112 |
t->InstanceTemplate()->SetInternalFieldCount(1);
|
|
|
113 |
Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
|
|
|
114 |
Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
|
|
|
115 |
}
|
|
|
116 |
|
|
|
117 |
protected:
|
|
|
118 |
|
|
|
119 |
static NAN_METHOD(New)
|
|
|
120 |
{
|
|
|
121 |
Nan::HandleScope scope;
|
|
|
122 |
Validation* validation = new Validation();
|
|
|
123 |
validation->Wrap(info.This());
|
|
|
124 |
info.GetReturnValue().Set(info.This());
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
static NAN_METHOD(IsValidUTF8)
|
|
|
128 |
{
|
|
|
129 |
Nan::HandleScope scope;
|
|
|
130 |
if (!Buffer::HasInstance(info[0])) {
|
|
|
131 |
return Nan::ThrowTypeError("First argument needs to be a buffer");
|
|
|
132 |
}
|
|
|
133 |
Local<Object> buffer_obj = info[0]->ToObject();
|
|
|
134 |
char *buffer_data = Buffer::Data(buffer_obj);
|
|
|
135 |
size_t buffer_length = Buffer::Length(buffer_obj);
|
|
|
136 |
info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
|
|
|
137 |
}
|
|
|
138 |
};
|
|
|
139 |
#if !NODE_VERSION_AT_LEAST(0,10,0)
|
|
|
140 |
extern "C"
|
|
|
141 |
#endif
|
|
|
142 |
void init (Handle<Object> target)
|
|
|
143 |
{
|
|
|
144 |
Nan::HandleScope scope;
|
|
|
145 |
Validation::Initialize(target);
|
|
|
146 |
}
|
|
|
147 |
|
|
|
148 |
NODE_MODULE(validation, init)
|