summaryrefslogtreecommitdiff
path: root/include/utf8/ord.php
blob: a333f960114b102a0618abc78e316ac8e07e53ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
<?php

/**
* @version $Id: ord.php,v 1.4 2006/09/11 15:22:54 harryf Exp $
* @package utf8
* @subpackage strings
*/

/**
* UTF-8 aware alternative to ord
* Returns the unicode ordinal for a character
* @param string UTF-8 encoded character
* @return int unicode ordinal for the character
* @see http://www.php.net/ord
* @see http://www.php.net/manual/en/function.ord.php#46267
*/
function utf8_ord($chr)
{
	$ord0 = ord($chr);

	if ($ord0 >= 0 && $ord0 <= 127)
		return $ord0;

	if (!isset($chr{1}))
	{
		trigger_error('Short sequence - at least 2 bytes expected, only 1 seen');
		return false;
	}

	$ord1 = ord($chr{1});
	if ($ord0 >= 192 && $ord0 <= 223)
		return ($ord0 - 192) * 64 + ($ord1 - 128);

	if (!isset($chr{2}))
	{
		trigger_error('Short sequence - at least 3 bytes expected, only 2 seen');
		return false;
	}

	$ord2 = ord($chr{2});
	if ($ord0 >= 224 && $ord0 <= 239)
		return ($ord0-224)*4096 + ($ord1-128)*64 + ($ord2-128);

	if (!isset($chr{3}))
	{
		trigger_error('Short sequence - at least 4 bytes expected, only 3 seen');
		return false;
	}

	$ord3 = ord($chr{3});
	if ($ord0>=240 && $ord0<=247)
		return ($ord0-240)*262144 + ($ord1-128)*4096 + ($ord2-128)*64 + ($ord3-128);

	if (!isset($chr{4}))
	{
		trigger_error('Short sequence - at least 5 bytes expected, only 4 seen');
		return false;
	}

	$ord4 = ord($chr{4});
	if ($ord0>=248 && $ord0<=251)
		return ($ord0-248)*16777216 + ($ord1-128)*262144 + ($ord2-128)*4096 + ($ord3-128)*64 + ($ord4-128);

	if (!isset($chr{5}))
	{
		trigger_error('Short sequence - at least 6 bytes expected, only 5 seen');
		return false;
	}

	if ($ord0>=252 && $ord0<=253)
		return ($ord0-252) * 1073741824 + ($ord1-128)*16777216 + ($ord2-128)*262144 + ($ord3-128)*4096  + ($ord4-128)*64 + (ord($c{5})-128);

	if ($ord0 >= 254 && $ord0 <= 255)
	{
		trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0);
		return false;
	}
}