XdbSearcher.php 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. // Copyright 2022 The Ip2Region Authors. All rights reserved.
  3. // Use of this source code is governed by a Apache2.0-style
  4. // license that can be found in the LICENSE file.
  5. //
  6. // @Author Lion <chenxin619315@gmail.com>
  7. // @Date 2022/06/21
  8. class XdbSearcher
  9. {
  10. const HeaderInfoLength = 256;
  11. const VectorIndexRows = 256;
  12. const VectorIndexCols = 256;
  13. const VectorIndexSize = 8;
  14. const SegmentIndexSize = 14;
  15. // xdb file handle
  16. private $handle = null;
  17. // header info
  18. private $header = null;
  19. private $ioCount = 0;
  20. // vector index in binary string.
  21. // string decode will be faster than the map based Array.
  22. private $vectorIndex = null;
  23. // xdb content buffer
  24. private $contentBuff = null;
  25. // ---
  26. // static function to create searcher
  27. /**
  28. * @throws Exception
  29. */
  30. public static function newWithFileOnly($dbFile)
  31. {
  32. return new XdbSearcher($dbFile, null, null);
  33. }
  34. /**
  35. * @throws Exception
  36. */
  37. public static function newWithVectorIndex($dbFile, $vIndex)
  38. {
  39. return new XdbSearcher($dbFile, $vIndex);
  40. }
  41. /**
  42. * @throws Exception
  43. */
  44. public static function newWithBuffer($cBuff)
  45. {
  46. return new XdbSearcher(null, null, $cBuff);
  47. }
  48. // --- End of static creator
  49. /**
  50. * initialize the xdb searcher
  51. * @throws Exception
  52. */
  53. function __construct($dbFile = null, $vectorIndex = null, $cBuff = null)
  54. {
  55. // check the content buffer first
  56. if ($cBuff != null) {
  57. $this->vectorIndex = null;
  58. $this->contentBuff = $cBuff;
  59. } else {
  60. // 加载默认数据文件 by Anyon
  61. if (is_null($dbFile)) {
  62. $dbFile = __DIR__ . DIRECTORY_SEPARATOR . 'ip2region.xdb';
  63. }
  64. // open the xdb binary file
  65. $this->handle = fopen($dbFile, "r");
  66. if ($this->handle === false) {
  67. throw new Exception("failed to open xdb file '%s'", $dbFile);
  68. }
  69. $this->vectorIndex = $vectorIndex;
  70. }
  71. }
  72. function close()
  73. {
  74. if ($this->handle != null) {
  75. fclose($this->handle);
  76. }
  77. }
  78. function getIOCount()
  79. {
  80. return $this->ioCount;
  81. }
  82. /**
  83. * find the region info for the specified ip address
  84. * @throws Exception
  85. */
  86. function search($ip)
  87. {
  88. // check and convert the sting ip to a 4-bytes long
  89. if (is_string($ip)) {
  90. $t = self::ip2long($ip);
  91. if ($t === null) {
  92. throw new Exception("invalid ip address `$ip`");
  93. }
  94. $ip = $t;
  95. }
  96. // reset the global counter
  97. $this->ioCount = 0;
  98. // locate the segment index block based on the vector index
  99. $il0 = ($ip >> 24) & 0xFF;
  100. $il1 = ($ip >> 16) & 0xFF;
  101. $idx = $il0 * self::VectorIndexCols * self::VectorIndexSize + $il1 * self::VectorIndexSize;
  102. if ($this->vectorIndex != null) {
  103. $sPtr = self::getLong($this->vectorIndex, $idx);
  104. $ePtr = self::getLong($this->vectorIndex, $idx + 4);
  105. } elseif ($this->contentBuff != null) {
  106. $sPtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx);
  107. $ePtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx + 4);
  108. } else {
  109. // read the vector index block
  110. $buff = $this->read(self::HeaderInfoLength + $idx, 8);
  111. if ($buff === null) {
  112. throw new Exception("failed to read vector index at ${idx}");
  113. }
  114. $sPtr = self::getLong($buff, 0);
  115. $ePtr = self::getLong($buff, 4);
  116. }
  117. // printf("sPtr: %d, ePtr: %d\n", $sPtr, $ePtr);
  118. // binary search the segment index to get the region info
  119. $dataLen = 0;
  120. $dataPtr = null;
  121. $l = 0;
  122. $h = ($ePtr - $sPtr) / self::SegmentIndexSize;
  123. while ($l <= $h) {
  124. $m = ($l + $h) >> 1;
  125. $p = $sPtr + $m * self::SegmentIndexSize;
  126. // read the segment index
  127. $buff = $this->read($p, self::SegmentIndexSize);
  128. if ($buff == null) {
  129. throw new Exception("failed to read segment index at ${p}");
  130. }
  131. $sip = self::getLong($buff, 0);
  132. if ($ip < $sip) {
  133. $h = $m - 1;
  134. } else {
  135. $eip = self::getLong($buff, 4);
  136. if ($ip > $eip) {
  137. $l = $m + 1;
  138. } else {
  139. $dataLen = self::getShort($buff, 8);
  140. $dataPtr = self::getLong($buff, 10);
  141. break;
  142. }
  143. }
  144. }
  145. // match nothing interception.
  146. // @TODO: could this even be a case ?
  147. // printf("dataLen: %d, dataPtr: %d\n", $dataLen, $dataPtr);
  148. if ($dataPtr == null) {
  149. return null;
  150. }
  151. // load and return the region data
  152. $buff = $this->read($dataPtr, $dataLen);
  153. if ($buff == null) {
  154. return null;
  155. }
  156. return $buff;
  157. }
  158. // read specified bytes from the specified index
  159. private function read($offset, $len)
  160. {
  161. // check the in-memory buffer first
  162. if ($this->contentBuff != null) {
  163. return substr($this->contentBuff, $offset, $len);
  164. }
  165. // read from the file
  166. $r = fseek($this->handle, $offset);
  167. if ($r == -1) {
  168. return null;
  169. }
  170. $this->ioCount++;
  171. $buff = fread($this->handle, $len);
  172. if ($buff === false) {
  173. return null;
  174. }
  175. if (strlen($buff) != $len) {
  176. return null;
  177. }
  178. return $buff;
  179. }
  180. // --- static util functions ----
  181. // convert a string ip to long
  182. public static function ip2long($ip)
  183. {
  184. $ip = ip2long($ip);
  185. if ($ip === false) {
  186. return null;
  187. }
  188. // convert signed int to unsigned int if on 32 bit operating system
  189. if ($ip < 0 && PHP_INT_SIZE == 4) {
  190. $ip = sprintf("%u", $ip);
  191. }
  192. return $ip;
  193. }
  194. // read a 4bytes long from a byte buffer
  195. public static function getLong($b, $idx)
  196. {
  197. $val = (ord($b[$idx])) | (ord($b[$idx + 1]) << 8)
  198. | (ord($b[$idx + 2]) << 16) | (ord($b[$idx + 3]) << 24);
  199. // convert signed int to unsigned int if on 32 bit operating system
  200. if ($val < 0 && PHP_INT_SIZE == 4) {
  201. $val = sprintf("%u", $val);
  202. }
  203. return $val;
  204. }
  205. // read a 2bytes short from a byte buffer
  206. public static function getShort($b, $idx)
  207. {
  208. return ((ord($b[$idx])) | (ord($b[$idx + 1]) << 8));
  209. }
  210. // load header info from a specified file handle
  211. public static function loadHeader($handle)
  212. {
  213. if (fseek($handle, 0) == -1) {
  214. return null;
  215. }
  216. $buff = fread($handle, self::HeaderInfoLength);
  217. if ($buff === false) {
  218. return null;
  219. }
  220. // read bytes length checking
  221. if (strlen($buff) != self::HeaderInfoLength) {
  222. return null;
  223. }
  224. // return the decoded header info
  225. return [
  226. 'version' => self::getShort($buff, 0),
  227. 'indexPolicy' => self::getShort($buff, 2),
  228. 'createdAt' => self::getLong($buff, 4),
  229. 'startIndexPtr' => self::getLong($buff, 8),
  230. 'endIndexPtr' => self::getLong($buff, 12)
  231. ];
  232. }
  233. // load header info from the specified xdb file path
  234. public static function loadHeaderFromFile($dbFile)
  235. {
  236. $handle = fopen($dbFile, 'r');
  237. if ($handle === false) {
  238. return null;
  239. }
  240. $header = self::loadHeader($handle);
  241. fclose($handle);
  242. return $header;
  243. }
  244. // load vector index from a file handle
  245. public static function loadVectorIndex($handle)
  246. {
  247. if (fseek($handle, self::HeaderInfoLength) == -1) {
  248. return null;
  249. }
  250. $rLen = self::VectorIndexRows * self::VectorIndexCols * self::SegmentIndexSize;
  251. $buff = fread($handle, $rLen);
  252. if ($buff === false) {
  253. return null;
  254. }
  255. if (strlen($buff) != $rLen) {
  256. return null;
  257. }
  258. return $buff;
  259. }
  260. // load vector index from a specified xdb file path
  261. public static function loadVectorIndexFromFile($dbFile)
  262. {
  263. $handle = fopen($dbFile, 'r');
  264. if ($handle === false) {
  265. return null;
  266. }
  267. $vIndex = self::loadVectorIndex($handle);
  268. fclose($handle);
  269. return $vIndex;
  270. }
  271. // load the xdb content from a file handle
  272. public static function loadContent($handle)
  273. {
  274. if (fseek($handle, 0, SEEK_END) == -1) {
  275. return null;
  276. }
  277. $size = ftell($handle);
  278. if ($size === false) {
  279. return null;
  280. }
  281. // seek to the head for reading
  282. if (fseek($handle, 0) == -1) {
  283. return null;
  284. }
  285. $buff = fread($handle, $size);
  286. if ($buff === false) {
  287. return null;
  288. }
  289. // read length checking
  290. if (strlen($buff) != $size) {
  291. return null;
  292. }
  293. return $buff;
  294. }
  295. // load the xdb content from a file path
  296. public static function loadContentFromFile($dbFile)
  297. {
  298. $str = file_get_contents($dbFile, false);
  299. if ($str === false) {
  300. return null;
  301. } else {
  302. return $str;
  303. }
  304. }
  305. public static function now()
  306. {
  307. return (microtime(true) * 1000);
  308. }
  309. }