Subversion Repositories php-qbpwcf

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 liveuser 1
/*!
2
 * UTF-8 Validation Code originally from:
3
 * ws: a node.js websocket client
4
 * Copyright(c) 2015 Einar Otto Stangvik <einaros@gmail.com>
5
 * MIT Licensed
6
 */
7
 
8
#include <v8.h>
9
#include <node.h>
10
#include <node_version.h>
11
#include <node_buffer.h>
12
#include <node_object_wrap.h>
13
#include <stdlib.h>
14
#include <wchar.h>
15
#include <stdio.h>
16
#include "nan.h"
17
 
18
using namespace v8;
19
using namespace node;
20
 
21
#define UNI_SUR_HIGH_START   (uint32_t) 0xD800
22
#define UNI_SUR_LOW_END    (uint32_t) 0xDFFF
23
#define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
24
#define UNI_MAX_LEGAL_UTF32  (uint32_t) 0x0010FFFF
25
 
26
static const uint8_t trailingBytesForUTF8[256] = {
27
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
28
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
29
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
30
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
31
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
32
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
34
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
35
};
36
 
37
static const uint32_t offsetsFromUTF8[6] = {
38
  0x00000000, 0x00003080, 0x000E2080,
39
  0x03C82080, 0xFA082080, 0x82082080
40
};
41
 
42
static int isLegalUTF8(const uint8_t *source, const int length)
43
{
44
  uint8_t a;
45
  const uint8_t *srcptr = source+length;
46
  switch (length) {
47
  default: return 0;
48
  /* Everything else falls through when "true"... */
49
  /* RFC3629 makes 5 & 6 bytes UTF-8 illegal
50
  case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
51
  case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
52
  case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
53
  case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
54
  case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
55
    switch (*source) {
56
      /* no fall-through in this inner switch */
57
      case 0xE0: if (a < 0xA0) return 0; break;
58
      case 0xED: if (a > 0x9F) return 0; break;
59
      case 0xF0: if (a < 0x90) return 0; break;
60
      case 0xF4: if (a > 0x8F) return 0; break;
61
      default:   if (a < 0x80) return 0;
62
    }
63
 
64
  case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
65
  }
66
  if (*source > 0xF4) return 0;
67
  return 1;
68
}
69
 
70
int is_valid_utf8 (size_t len, char *value)
71
{
72
  /* is the string valid UTF-8? */
73
  for (unsigned int i = 0; i < len; i++) {
74
    uint32_t ch = 0;
75
    uint8_t  extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
76
 
77
    if (extrabytes + i >= len)
78
      return 0;
79
 
80
    if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
81
 
82
    switch (extrabytes) {
83
      case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
84
      case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
85
      case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
86
      case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
87
      case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
88
      case 0 : ch += (uint8_t) value[i];
89
    }
90
 
91
    ch -= offsetsFromUTF8[extrabytes];
92
 
93
    if (ch <= UNI_MAX_LEGAL_UTF32) {
94
      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
95
        return 0;
96
    } else {
97
      return 0;
98
    }
99
  }
100
 
101
  return 1;
102
}
103
 
104
class Validation : public ObjectWrap
105
{
106
public:
107
 
108
  static void Initialize(v8::Handle<v8::Object> target)
109
  {
110
    Nan::HandleScope scope;
111
    Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
112
    t->InstanceTemplate()->SetInternalFieldCount(1);
113
    Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
114
    Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
115
  }
116
 
117
protected:
118
 
119
  static NAN_METHOD(New)
120
  {
121
    Nan::HandleScope scope;
122
    Validation* validation = new Validation();
123
    validation->Wrap(info.This());
124
    info.GetReturnValue().Set(info.This());
125
  }
126
 
127
  static NAN_METHOD(IsValidUTF8)
128
  {
129
    Nan::HandleScope scope;
130
    if (!Buffer::HasInstance(info[0])) {
131
      return Nan::ThrowTypeError("First argument needs to be a buffer");
132
    }
133
    Local<Object> buffer_obj = info[0]->ToObject();
134
    char *buffer_data = Buffer::Data(buffer_obj);
135
    size_t buffer_length = Buffer::Length(buffer_obj);
136
    info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
137
  }
138
};
139
#if !NODE_VERSION_AT_LEAST(0,10,0)
140
extern "C"
141
#endif
142
void init (Handle<Object> target)
143
{
144
  Nan::HandleScope scope;
145
  Validation::Initialize(target);
146
}
147
 
148
NODE_MODULE(validation, init)