| 3 |
liveuser |
1 |
<?php
|
|
|
2 |
namespace Ratchet\WebSocket\Encoding;
|
|
|
3 |
|
|
|
4 |
/**
|
|
|
5 |
* This class handled encoding validation
|
|
|
6 |
*/
|
|
|
7 |
class Validator {
|
|
|
8 |
const UTF8_ACCEPT = 0;
|
|
|
9 |
const UTF8_REJECT = 1;
|
|
|
10 |
|
|
|
11 |
/**
|
|
|
12 |
* Incremental UTF-8 validator with constant memory consumption (minimal state).
|
|
|
13 |
*
|
|
|
14 |
* Implements the algorithm "Flexible and Economical UTF-8 Decoder" by
|
|
|
15 |
* Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/).
|
|
|
16 |
*/
|
|
|
17 |
protected static $dfa = array(
|
|
|
18 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f
|
|
|
19 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f
|
|
|
20 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f
|
|
|
21 |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f
|
|
|
22 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f
|
|
|
23 |
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf
|
|
|
24 |
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df
|
|
|
25 |
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef
|
|
|
26 |
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff
|
|
|
27 |
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0
|
|
|
28 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2
|
|
|
29 |
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4
|
|
|
30 |
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6
|
|
|
31 |
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8
|
|
|
32 |
);
|
|
|
33 |
|
|
|
34 |
/**
|
|
|
35 |
* Lookup if mbstring is available
|
|
|
36 |
* @var bool
|
|
|
37 |
*/
|
|
|
38 |
private $hasMbString = false;
|
|
|
39 |
|
|
|
40 |
/**
|
|
|
41 |
* Lookup if iconv is available
|
|
|
42 |
* @var bool
|
|
|
43 |
*/
|
|
|
44 |
private $hasIconv = false;
|
|
|
45 |
|
|
|
46 |
public function __construct() {
|
|
|
47 |
$this->hasMbString = extension_loaded('mbstring');
|
|
|
48 |
$this->hasIconv = extension_loaded('iconv');
|
|
|
49 |
}
|
|
|
50 |
|
|
|
51 |
/**
|
|
|
52 |
* @param string $str The value to check the encoding
|
|
|
53 |
* @param string $against The type of encoding to check against
|
|
|
54 |
* @return bool
|
|
|
55 |
*/
|
|
|
56 |
public function checkEncoding($str, $against) {
|
|
|
57 |
if ('UTF-8' == $against) {
|
|
|
58 |
return $this->isUtf8($str);
|
|
|
59 |
}
|
|
|
60 |
|
|
|
61 |
if ($this->hasMbString) {
|
|
|
62 |
return mb_check_encoding($str, $against);
|
|
|
63 |
} elseif ($this->hasIconv) {
|
|
|
64 |
return ($str == iconv($against, "{$against}//IGNORE", $str));
|
|
|
65 |
}
|
|
|
66 |
|
|
|
67 |
return true;
|
|
|
68 |
}
|
|
|
69 |
|
|
|
70 |
protected function isUtf8($str) {
|
|
|
71 |
if ($this->hasMbString) {
|
|
|
72 |
if (false === mb_check_encoding($str, 'UTF-8')) {
|
|
|
73 |
return false;
|
|
|
74 |
}
|
|
|
75 |
} elseif ($this->hasIconv) {
|
|
|
76 |
if ($str != iconv('UTF-8', 'UTF-8//IGNORE', $str)) {
|
|
|
77 |
return false;
|
|
|
78 |
}
|
|
|
79 |
}
|
|
|
80 |
|
|
|
81 |
$state = static::UTF8_ACCEPT;
|
|
|
82 |
|
|
|
83 |
for ($i = 0, $len = strlen($str); $i < $len; $i++) {
|
|
|
84 |
$state = static::$dfa[256 + ($state << 4) + static::$dfa[ord($str[$i])]];
|
|
|
85 |
|
|
|
86 |
if (static::UTF8_REJECT === $state) {
|
|
|
87 |
return false;
|
|
|
88 |
}
|
|
|
89 |
}
|
|
|
90 |
|
|
|
91 |
return true;
|
|
|
92 |
}
|
|
|
93 |
}
|