sphinxapi.php 46 KB


  1. <?php
  2. //
  3. // $Id$
  4. //
  5. //
  6. // Copyright (c) 2001-2011, Andrew Aksyonoff
  7. // Copyright (c) 2008-2011, Sphinx Technologies Inc
  8. // All rights reserved
  9. //
  10. // This program is free software; you can redistribute it and/or modify
  11. // it under the terms of the GNU General Public License. You should have
  12. // received a copy of the GPL license along with this program; if you
  13. // did not, you can find it at http://www.gnu.org/
  14. //
  15. /////////////////////////////////////////////////////////////////////////////
  16. // PHP version of Sphinx searchd client (PHP API)
  17. /////////////////////////////////////////////////////////////////////////////
  18. /// known searchd commands
  19. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  20. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  21. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  22. define ( "SEARCHD_COMMAND_KEYWORDS", 3 );
  23. define ( "SEARCHD_COMMAND_PERSIST", 4 );
  24. define ( "SEARCHD_COMMAND_STATUS", 5 );
  25. define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 );
  26. /// current client-side command implementation versions
  27. define ( "VER_COMMAND_SEARCH", 0x119 );
  28. define ( "VER_COMMAND_EXCERPT", 0x103 );
  29. define ( "VER_COMMAND_UPDATE", 0x102 );
  30. define ( "VER_COMMAND_KEYWORDS", 0x100 );
  31. define ( "VER_COMMAND_STATUS", 0x100 );
  32. define ( "VER_COMMAND_QUERY", 0x100 );
  33. define ( "VER_COMMAND_FLUSHATTRS", 0x100 );
  34. /// known searchd status codes
  35. define ( "SEARCHD_OK", 0 );
  36. define ( "SEARCHD_ERROR", 1 );
  37. define ( "SEARCHD_RETRY", 2 );
  38. define ( "SEARCHD_WARNING", 3 );
  39. /// known match modes
  40. define ( "SPH_MATCH_ALL", 0 );
  41. define ( "SPH_MATCH_ANY", 1 );
  42. define ( "SPH_MATCH_PHRASE", 2 );
  43. define ( "SPH_MATCH_BOOLEAN", 3 );
  44. define ( "SPH_MATCH_EXTENDED", 4 );
  45. define ( "SPH_MATCH_FULLSCAN", 5 );
  46. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
  47. /// known ranking modes (ext2 only)
  48. define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
  49. define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
  50. define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
  51. define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  52. define ( "SPH_RANK_PROXIMITY", 4 );
  53. define ( "SPH_RANK_MATCHANY", 5 );
  54. define ( "SPH_RANK_FIELDMASK", 6 );
  55. define ( "SPH_RANK_SPH04", 7 );
  56. define ( "SPH_RANK_EXPR", 8 );
  57. define ( "SPH_RANK_TOTAL", 9 );
  58. /// known sort modes
  59. define ( "SPH_SORT_RELEVANCE", 0 );
  60. define ( "SPH_SORT_ATTR_DESC", 1 );
  61. define ( "SPH_SORT_ATTR_ASC", 2 );
  62. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  63. define ( "SPH_SORT_EXTENDED", 4 );
  64. define ( "SPH_SORT_EXPR", 5 );
  65. /// known filter types
  66. define ( "SPH_FILTER_VALUES", 0 );
  67. define ( "SPH_FILTER_RANGE", 1 );
  68. define ( "SPH_FILTER_FLOATRANGE", 2 );
  69. /// known attribute types
  70. define ( "SPH_ATTR_INTEGER", 1 );
  71. define ( "SPH_ATTR_TIMESTAMP", 2 );
  72. define ( "SPH_ATTR_ORDINAL", 3 );
  73. define ( "SPH_ATTR_BOOL", 4 );
  74. define ( "SPH_ATTR_FLOAT", 5 );
  75. define ( "SPH_ATTR_BIGINT", 6 );
  76. define ( "SPH_ATTR_STRING", 7 );
  77. define ( "SPH_ATTR_MULTI", 0x40000001 );
  78. define ( "SPH_ATTR_MULTI64", 0x40000002 );
  79. /// known grouping functions
  80. define ( "SPH_GROUPBY_DAY", 0 );
  81. define ( "SPH_GROUPBY_WEEK", 1 );
  82. define ( "SPH_GROUPBY_MONTH", 2 );
  83. define ( "SPH_GROUPBY_YEAR", 3 );
  84. define ( "SPH_GROUPBY_ATTR", 4 );
  85. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  86. // important properties of PHP's integers:
  87. // - always signed (one bit short of PHP_INT_SIZE)
  88. // - conversion from string to int is saturated
  89. // - float is double
  90. // - div converts arguments to floats
  91. // - mod converts arguments to ints
  92. // the packing code below works as follows:
  93. // - when we got an int, just pack it
  94. // if performance is a problem, this is the branch users should aim for
  95. //
  96. // - otherwise, we got a number in string form
  97. // this might be due to different reasons, but we assume that this is
  98. // because it didn't fit into PHP int
  99. //
  100. // - factor the string into high and low ints for packing
  101. // - if we have bcmath, then it is used
  102. // - if we don't, we have to do it manually (this is the fun part)
  103. //
  104. // - x64 branch does factoring using ints
  105. // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
  106. //
  107. // unpacking routines are pretty much the same.
  108. // - return ints if we can
  109. // - otherwise format number into a string
  110. /// pack 64-bit signed
  111. function sphPackI64 ( $v )
  112. {
  113. assert ( is_numeric($v) );
  114. // x64
  115. if ( PHP_INT_SIZE>=8 )
  116. {
  117. $v = (int)$v;
  118. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  119. }
  120. // x32, int
  121. if ( is_int($v) )
  122. return pack ( "NN", $v < 0 ? -1 : 0, $v );
  123. // x32, bcmath
  124. if ( function_exists("bcmul") )
  125. {
  126. if ( bccomp ( $v, 0 ) == -1 )
  127. $v = bcadd ( "18446744073709551616", $v );
  128. $h = bcdiv ( $v, "4294967296", 0 );
  129. $l = bcmod ( $v, "4294967296" );
  130. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  131. }
  132. // x32, no-bcmath
  133. $p = max(0, strlen($v) - 13);
  134. $lo = abs((float)substr($v, $p));
  135. $hi = abs((float)substr($v, 0, $p));
  136. $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
  137. $q = floor($m/4294967296.0);
  138. $l = $m - ($q*4294967296.0);
  139. $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
  140. if ( $v<0 )
  141. {
  142. if ( $l==0 )
  143. $h = 4294967296.0 - $h;
  144. else
  145. {
  146. $h = 4294967295.0 - $h;
  147. $l = 4294967296.0 - $l;
  148. }
  149. }
  150. return pack ( "NN", $h, $l );
  151. }
  152. /// pack 64-bit unsigned
  153. function sphPackU64 ( $v )
  154. {
  155. assert ( is_numeric($v) );
  156. // x64
  157. if ( PHP_INT_SIZE>=8 )
  158. {
  159. assert ( $v>=0 );
  160. // x64, int
  161. if ( is_int($v) )
  162. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  163. // x64, bcmath
  164. if ( function_exists("bcmul") )
  165. {
  166. $h = bcdiv ( $v, 4294967296, 0 );
  167. $l = bcmod ( $v, 4294967296 );
  168. return pack ( "NN", $h, $l );
  169. }
  170. // x64, no-bcmath
  171. $p = max ( 0, strlen($v) - 13 );
  172. $lo = (int)substr ( $v, $p );
  173. $hi = (int)substr ( $v, 0, $p );
  174. $m = $lo + $hi*1316134912;
  175. $l = $m % 4294967296;
  176. $h = $hi*2328 + (int)($m/4294967296);
  177. return pack ( "NN", $h, $l );
  178. }
  179. // x32, int
  180. if ( is_int($v) )
  181. return pack ( "NN", 0, $v );
  182. // x32, bcmath
  183. if ( function_exists("bcmul") )
  184. {
  185. $h = bcdiv ( $v, "4294967296", 0 );
  186. $l = bcmod ( $v, "4294967296" );
  187. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  188. }
  189. // x32, no-bcmath
  190. $p = max(0, strlen($v) - 13);
  191. $lo = (float)substr($v, $p);
  192. $hi = (float)substr($v, 0, $p);
  193. $m = $lo + $hi*1316134912.0;
  194. $q = floor($m / 4294967296.0);
  195. $l = $m - ($q * 4294967296.0);
  196. $h = $hi*2328.0 + $q;
  197. return pack ( "NN", $h, $l );
  198. }
  199. // unpack 64-bit unsigned
  200. function sphUnpackU64 ( $v )
  201. {
  202. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  203. if ( PHP_INT_SIZE>=8 )
  204. {
  205. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  206. if ( $lo<0 ) $lo += (1<<32);
  207. // x64, int
  208. if ( $hi<=2147483647 )
  209. return ($hi<<32) + $lo;
  210. // x64, bcmath
  211. if ( function_exists("bcmul") )
  212. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  213. // x64, no-bcmath
  214. $C = 100000;
  215. $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
  216. $l = (($hi % $C) << 32) + ($lo % $C);
  217. if ( $l>$C )
  218. {
  219. $h += (int)($l / $C);
  220. $l = $l % $C;
  221. }
  222. if ( $h==0 )
  223. return $l;
  224. return sprintf ( "%d%05d", $h, $l );
  225. }
  226. // x32, int
  227. if ( $hi==0 )
  228. {
  229. if ( $lo>0 )
  230. return $lo;
  231. return sprintf ( "%u", $lo );
  232. }
  233. $hi = sprintf ( "%u", $hi );
  234. $lo = sprintf ( "%u", $lo );
  235. // x32, bcmath
  236. if ( function_exists("bcmul") )
  237. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  238. // x32, no-bcmath
  239. $hi = (float)$hi;
  240. $lo = (float)$lo;
  241. $q = floor($hi/10000000.0);
  242. $r = $hi - $q*10000000.0;
  243. $m = $lo + $r*4967296.0;
  244. $mq = floor($m/10000000.0);
  245. $l = $m - $mq*10000000.0;
  246. $h = $q*4294967296.0 + $r*429.0 + $mq;
  247. $h = sprintf ( "%.0f", $h );
  248. $l = sprintf ( "%07.0f", $l );
  249. if ( $h=="0" )
  250. return sprintf( "%.0f", (float)$l );
  251. return $h . $l;
  252. }
  253. // unpack 64-bit signed
  254. function sphUnpackI64 ( $v )
  255. {
  256. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  257. // x64
  258. if ( PHP_INT_SIZE>=8 )
  259. {
  260. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  261. if ( $lo<0 ) $lo += (1<<32);
  262. return ($hi<<32) + $lo;
  263. }
  264. // x32, int
  265. if ( $hi==0 )
  266. {
  267. if ( $lo>0 )
  268. return $lo;
  269. return sprintf ( "%u", $lo );
  270. }
  271. // x32, int
  272. elseif ( $hi==-1 )
  273. {
  274. if ( $lo<0 )
  275. return $lo;
  276. return sprintf ( "%.0f", $lo - 4294967296.0 );
  277. }
  278. $neg = "";
  279. $c = 0;
  280. if ( $hi<0 )
  281. {
  282. $hi = ~$hi;
  283. $lo = ~$lo;
  284. $c = 1;
  285. $neg = "-";
  286. }
  287. $hi = sprintf ( "%u", $hi );
  288. $lo = sprintf ( "%u", $lo );
  289. // x32, bcmath
  290. if ( function_exists("bcmul") )
  291. return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
  292. // x32, no-bcmath
  293. $hi = (float)$hi;
  294. $lo = (float)$lo;
  295. $q = floor($hi/10000000.0);
  296. $r = $hi - $q*10000000.0;
  297. $m = $lo + $r*4967296.0;
  298. $mq = floor($m/10000000.0);
  299. $l = $m - $mq*10000000.0 + $c;
  300. $h = $q*4294967296.0 + $r*429.0 + $mq;
  301. if ( $l==10000000 )
  302. {
  303. $l = 0;
  304. $h += 1;
  305. }
  306. $h = sprintf ( "%.0f", $h );
  307. $l = sprintf ( "%07.0f", $l );
  308. if ( $h=="0" )
  309. return $neg . sprintf( "%.0f", (float)$l );
  310. return $neg . $h . $l;
  311. }
  312. function sphFixUint ( $value )
  313. {
  314. if ( PHP_INT_SIZE>=8 )
  315. {
  316. // x64 route, workaround broken unpack() in 5.2.2+
  317. if ( $value<0 ) $value += (1<<32);
  318. return $value;
  319. }
  320. else
  321. {
  322. // x32 route, workaround php signed/unsigned braindamage
  323. return sprintf ( "%u", $value );
  324. }
  325. }
  326. /// sphinx searchd client class
  327. class SphinxClient
  328. {
  329. var $_host; ///< searchd host (default is "localhost")
  330. var $_port; ///< searchd port (default is 9312)
  331. var $_offset; ///< how many records to seek from result-set start (default is 0)
  332. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  333. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  334. var $_weights; ///< per-field weights (default is 1 for all fields)
  335. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  336. var $_sortby; ///< attribute to sort by (defualt is "")
  337. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  338. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  339. var $_filters; ///< search filters
  340. var $_groupby; ///< group-by attribute name
  341. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  342. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  343. var $_groupdistinct;///< group-by count-distinct attribute
  344. var $_maxmatches; ///< max matches to retrieve
  345. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  346. var $_retrycount; ///< distributed retries count
  347. var $_retrydelay; ///< distributed retries delay
  348. var $_anchor; ///< geographical anchor point
  349. var $_indexweights; ///< per-index weights
  350. var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  351. var $_rankexpr; ///< ranking mode expression (for SPH_RANK_EXPR)
  352. var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
  353. var $_fieldweights; ///< per-field-name weights
  354. var $_overrides; ///< per-query attribute values overrides
  355. var $_select; ///< select-list (attributes or expressions, with optional aliases)
  356. var $_error; ///< last error message
  357. var $_warning; ///< last warning message
  358. var $_connerror; ///< connection error vs remote error flag
  359. var $_reqs; ///< requests array for multi-query
  360. var $_mbenc; ///< stored mbstring encoding
  361. var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
  362. var $_timeout; ///< connect timeout
  363. /////////////////////////////////////////////////////////////////////////////
  364. // common stuff
  365. /////////////////////////////////////////////////////////////////////////////
  366. /// create a new client object and fill defaults
  367. function __construct ()
  368. {
  369. // per-client-object settings
  370. $this->_host = "localhost";
  371. $this->_port = 9312;
  372. $this->_path = false;
  373. $this->_socket = false;
  374. // per-query settings
  375. $this->_offset = 0;
  376. $this->_limit = 20;
  377. $this->_mode = SPH_MATCH_ALL;
  378. $this->_weights = array ();
  379. $this->_sort = SPH_SORT_RELEVANCE;
  380. $this->_sortby = "";
  381. $this->_min_id = 0;
  382. $this->_max_id = 0;
  383. $this->_filters = array ();
  384. $this->_groupby = "";
  385. $this->_groupfunc = SPH_GROUPBY_DAY;
  386. $this->_groupsort = "@group desc";
  387. $this->_groupdistinct= "";
  388. $this->_maxmatches = 1000;
  389. $this->_cutoff = 0;
  390. $this->_retrycount = 0;
  391. $this->_retrydelay = 0;
  392. $this->_anchor = array ();
  393. $this->_indexweights= array ();
  394. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  395. $this->_rankexpr = "";
  396. $this->_maxquerytime= 0;
  397. $this->_fieldweights= array();
  398. $this->_overrides = array();
  399. $this->_select = "*";
  400. $this->_error = ""; // per-reply fields (for single-query case)
  401. $this->_warning = "";
  402. $this->_connerror = false;
  403. $this->_reqs = array (); // requests storage (for multi-query case)
  404. $this->_mbenc = "";
  405. $this->_arrayresult = false;
  406. $this->_timeout = 0;
  407. }
  408. function __destruct()
  409. {
  410. if ( $this->_socket !== false )
  411. fclose ( $this->_socket );
  412. }
  413. /// get last error message (string)
  414. function GetLastError ()
  415. {
  416. return $this->_error;
  417. }
  418. /// get last warning message (string)
  419. function GetLastWarning ()
  420. {
  421. return $this->_warning;
  422. }
  423. /// get last error flag (to tell network connection errors from searchd errors or broken responses)
  424. function IsConnectError()
  425. {
  426. return $this->_connerror;
  427. }
  428. /// set searchd host name (string) and port (integer)
  429. function SetServer ( $host, $port = 0 )
  430. {
  431. assert ( is_string($host) );
  432. if ( $host[0] == '/')
  433. {
  434. $this->_path = 'unix://' . $host;
  435. return;
  436. }
  437. if ( substr ( $host, 0, 7 )=="unix://" )
  438. {
  439. $this->_path = $host;
  440. return;
  441. }
  442. assert ( is_int($port) );
  443. $this->_host = $host;
  444. $this->_port = $port;
  445. $this->_path = '';
  446. }
  447. /// set server connection timeout (0 to remove)
  448. function SetConnectTimeout ( $timeout )
  449. {
  450. assert ( is_numeric($timeout) );
  451. $this->_timeout = $timeout;
  452. }
  453. function _Send ( $handle, $data, $length )
  454. {
  455. if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
  456. {
  457. $this->_error = 'connection unexpectedly closed (timed out?)';
  458. $this->_connerror = true;
  459. return false;
  460. }
  461. return true;
  462. }
  463. /////////////////////////////////////////////////////////////////////////////
  464. /// enter mbstring workaround mode
  465. function _MBPush ()
  466. {
  467. $this->_mbenc = "";
  468. if ( ini_get ( "mbstring.func_overload" ) & 2 )
  469. {
  470. $this->_mbenc = mb_internal_encoding();
  471. mb_internal_encoding ( "latin1" );
  472. }
  473. }
  474. /// leave mbstring workaround mode
  475. function _MBPop ()
  476. {
  477. if ( $this->_mbenc )
  478. mb_internal_encoding ( $this->_mbenc );
  479. }
  480. /// connect to searchd server
  481. function _Connect ()
  482. {
  483. if ( $this->_socket!==false )
  484. {
  485. // we are in persistent connection mode, so we have a socket
  486. // however, need to check whether it's still alive
  487. if ( !@feof ( $this->_socket ) )
  488. return $this->_socket;
  489. // force reopen
  490. $this->_socket = false;
  491. }
  492. $errno = 0;
  493. $errstr = "";
  494. $this->_connerror = false;
  495. if ( $this->_path )
  496. {
  497. $host = $this->_path;
  498. $port = 0;
  499. }
  500. else
  501. {
  502. $host = $this->_host;
  503. $port = $this->_port;
  504. }
  505. if ( $this->_timeout<=0 )
  506. $fp = @fsockopen ( $host, $port, $errno, $errstr );
  507. else
  508. $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
  509. if ( !$fp )
  510. {
  511. if ( $this->_path )
  512. $location = $this->_path;
  513. else
  514. $location = "{$this->_host}:{$this->_port}";
  515. $errstr = trim ( $errstr );
  516. $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
  517. $this->_connerror = true;
  518. return false;
  519. }
  520. // send my version
  521. // this is a subtle part. we must do it before (!) reading back from searchd.
  522. // because otherwise under some conditions (reported on FreeBSD for instance)
  523. // TCP stack could throttle write-write-read pattern because of Nagle.
  524. if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
  525. {
  526. fclose ( $fp );
  527. $this->_error = "failed to send client protocol version";
  528. return false;
  529. }
  530. // check version
  531. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  532. $v = (int)$v;
  533. if ( $v<1 )
  534. {
  535. fclose ( $fp );
  536. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  537. return false;
  538. }
  539. return $fp;
  540. }
  541. /// get and check response packet from searchd server
  542. function _GetResponse ( $fp, $client_ver )
  543. {
  544. $response = "";
  545. $len = 0;
  546. $header = fread ( $fp, 8 );
  547. if ( strlen($header)==8 )
  548. {
  549. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  550. $left = $len;
  551. while ( $left>0 && !feof($fp) )
  552. {
  553. $chunk = fread ( $fp, min ( 8192, $left ) );
  554. if ( $chunk )
  555. {
  556. $response .= $chunk;
  557. $left -= strlen($chunk);
  558. }
  559. }
  560. }
  561. if ( $this->_socket === false )
  562. fclose ( $fp );
  563. // check response
  564. $read = strlen ( $response );
  565. if ( !$response || $read!=$len )
  566. {
  567. $this->_error = $len
  568. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  569. : "received zero-sized searchd response";
  570. return false;
  571. }
  572. // check status
  573. if ( $status==SEARCHD_WARNING )
  574. {
  575. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  576. $this->_warning = substr ( $response, 4, $wlen );
  577. return substr ( $response, 4+$wlen );
  578. }
  579. if ( $status==SEARCHD_ERROR )
  580. {
  581. $this->_error = "searchd error: " . substr ( $response, 4 );
  582. return false;
  583. }
  584. if ( $status==SEARCHD_RETRY )
  585. {
  586. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  587. return false;
  588. }
  589. if ( $status!=SEARCHD_OK )
  590. {
  591. $this->_error = "unknown status code '$status'";
  592. return false;
  593. }
  594. // check version
  595. if ( $ver<$client_ver )
  596. {
  597. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  598. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  599. }
  600. return $response;
  601. }
  602. /////////////////////////////////////////////////////////////////////////////
  603. // searching
  604. /////////////////////////////////////////////////////////////////////////////
  605. /// set offset and count into result set,
  606. /// and optionally set max-matches and cutoff limits
  607. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  608. {
  609. assert ( is_int($offset) );
  610. assert ( is_int($limit) );
  611. assert ( $offset>=0 );
  612. assert ( $limit>0 );
  613. assert ( $max>=0 );
  614. $this->_offset = $offset;
  615. $this->_limit = $limit;
  616. if ( $max>0 )
  617. $this->_maxmatches = $max;
  618. if ( $cutoff>0 )
  619. $this->_cutoff = $cutoff;
  620. }
  621. /// set maximum query time, in milliseconds, per-index
  622. /// integer, 0 means "do not limit"
  623. function SetMaxQueryTime ( $max )
  624. {
  625. assert ( is_int($max) );
  626. assert ( $max>=0 );
  627. $this->_maxquerytime = $max;
  628. }
  629. /// set matching mode
  630. function SetMatchMode ( $mode )
  631. {
  632. assert ( $mode==SPH_MATCH_ALL
  633. || $mode==SPH_MATCH_ANY
  634. || $mode==SPH_MATCH_PHRASE
  635. || $mode==SPH_MATCH_BOOLEAN
  636. || $mode==SPH_MATCH_EXTENDED
  637. || $mode==SPH_MATCH_FULLSCAN
  638. || $mode==SPH_MATCH_EXTENDED2 );
  639. $this->_mode = $mode;
  640. }
  641. /// set ranking mode
  642. function SetRankingMode ( $ranker, $rankexpr="" )
  643. {
  644. assert ( $ranker>=0 && $ranker<SPH_RANK_TOTAL );
  645. assert ( is_string($rankexpr) );
  646. $this->_ranker = $ranker;
  647. $this->_rankexpr = $rankexpr;
  648. }
  649. /// set matches sorting mode
  650. function SetSortMode ( $mode, $sortby="" )
  651. {
  652. assert (
  653. $mode==SPH_SORT_RELEVANCE ||
  654. $mode==SPH_SORT_ATTR_DESC ||
  655. $mode==SPH_SORT_ATTR_ASC ||
  656. $mode==SPH_SORT_TIME_SEGMENTS ||
  657. $mode==SPH_SORT_EXTENDED ||
  658. $mode==SPH_SORT_EXPR );
  659. assert ( is_string($sortby) );
  660. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  661. $this->_sort = $mode;
  662. $this->_sortby = $sortby;
  663. }
  664. /// bind per-field weights by order
  665. /// DEPRECATED; use SetFieldWeights() instead
  666. function SetWeights ( $weights )
  667. {
  668. assert ( is_array($weights) );
  669. foreach ( $weights as $weight )
  670. assert ( is_int($weight) );
  671. $this->_weights = $weights;
  672. }
  673. /// bind per-field weights by name
  674. function SetFieldWeights ( $weights )
  675. {
  676. assert ( is_array($weights) );
  677. foreach ( $weights as $name=>$weight )
  678. {
  679. assert ( is_string($name) );
  680. assert ( is_int($weight) );
  681. }
  682. $this->_fieldweights = $weights;
  683. }
  684. /// bind per-index weights by name
  685. function SetIndexWeights ( $weights )
  686. {
  687. assert ( is_array($weights) );
  688. foreach ( $weights as $index=>$weight )
  689. {
  690. assert ( is_string($index) );
  691. assert ( is_int($weight) );
  692. }
  693. $this->_indexweights = $weights;
  694. }
  695. /// set IDs range to match
  696. /// only match records if document ID is beetwen $min and $max (inclusive)
  697. function SetIDRange ( $min, $max )
  698. {
  699. assert ( is_numeric($min) );
  700. assert ( is_numeric($max) );
  701. assert ( $min<=$max );
  702. $this->_min_id = $min;
  703. $this->_max_id = $max;
  704. }
  705. /// set values set filter
  706. /// only match records where $attribute value is in given set
  707. function SetFilter ( $attribute, $values, $exclude=false )
  708. {
  709. assert ( is_string($attribute) );
  710. assert ( is_array($values) );
  711. assert ( count($values) );
  712. if ( is_array($values) && count($values) )
  713. {
  714. foreach ( $values as $value )
  715. assert ( is_numeric($value) );
  716. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  717. }
  718. }
  719. /// set range filter
  720. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  721. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  722. {
  723. assert ( is_string($attribute) );
  724. assert ( is_numeric($min) );
  725. assert ( is_numeric($max) );
  726. assert ( $min<=$max );
  727. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  728. }
  729. /// set float range filter
  730. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  731. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  732. {
  733. assert ( is_string($attribute) );
  734. assert ( is_float($min) );
  735. assert ( is_float($max) );
  736. assert ( $min<=$max );
  737. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  738. }
  739. /// setup anchor point for geosphere distance calculations
  740. /// required to use @geodist in filters and sorting
  741. /// latitude and longitude must be in radians
  742. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  743. {
  744. assert ( is_string($attrlat) );
  745. assert ( is_string($attrlong) );
  746. assert ( is_float($lat) );
  747. assert ( is_float($long) );
  748. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  749. }
  750. /// set grouping attribute and function
  751. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  752. {
  753. assert ( is_string($attribute) );
  754. assert ( is_string($groupsort) );
  755. assert ( $func==SPH_GROUPBY_DAY
  756. || $func==SPH_GROUPBY_WEEK
  757. || $func==SPH_GROUPBY_MONTH
  758. || $func==SPH_GROUPBY_YEAR
  759. || $func==SPH_GROUPBY_ATTR
  760. || $func==SPH_GROUPBY_ATTRPAIR );
  761. $this->_groupby = $attribute;
  762. $this->_groupfunc = $func;
  763. $this->_groupsort = $groupsort;
  764. }
  765. /// set count-distinct attribute for group-by queries
  766. function SetGroupDistinct ( $attribute )
  767. {
  768. assert ( is_string($attribute) );
  769. $this->_groupdistinct = $attribute;
  770. }
  771. /// set distributed retries count and delay
  772. function SetRetries ( $count, $delay=0 )
  773. {
  774. assert ( is_int($count) && $count>=0 );
  775. assert ( is_int($delay) && $delay>=0 );
  776. $this->_retrycount = $count;
  777. $this->_retrydelay = $delay;
  778. }
  779. /// set result set format (hash or array; hash by default)
  780. /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
  781. function SetArrayResult ( $arrayresult )
  782. {
  783. assert ( is_bool($arrayresult) );
  784. $this->_arrayresult = $arrayresult;
  785. }
  786. /// set attribute values override
  787. /// there can be only one override per attribute
  788. /// $values must be a hash that maps document IDs to attribute values
  789. function SetOverride ( $attrname, $attrtype, $values )
  790. {
  791. assert ( is_string ( $attrname ) );
  792. assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
  793. assert ( is_array ( $values ) );
  794. $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
  795. }
  796. /// set select-list (attributes or expressions), SQL-like syntax
  797. function SetSelect ( $select )
  798. {
  799. assert ( is_string ( $select ) );
  800. $this->_select = $select;
  801. }
  802. //////////////////////////////////////////////////////////////////////////////
  803. /// clear all filters (for multi-queries)
  804. function ResetFilters ()
  805. {
  806. $this->_filters = array();
  807. $this->_anchor = array();
  808. }
  809. /// clear groupby settings (for multi-queries)
  810. function ResetGroupBy ()
  811. {
  812. $this->_groupby = "";
  813. $this->_groupfunc = SPH_GROUPBY_DAY;
  814. $this->_groupsort = "@group desc";
  815. $this->_groupdistinct= "";
  816. }
  817. /// clear all attribute value overrides (for multi-queries)
  818. function ResetOverrides ()
  819. {
  820. $this->_overrides = array ();
  821. }
  822. //////////////////////////////////////////////////////////////////////////////
  823. /// connect to searchd server, run given search query through given indexes,
  824. /// and return the search results
  825. function Query ( $query, $index="*", $comment="" )
  826. {
  827. assert ( empty($this->_reqs) );
  828. $this->AddQuery ( $query, $index, $comment );
  829. $results = $this->RunQueries ();
  830. $this->_reqs = array (); // just in case it failed too early
  831. if ( !is_array($results) )
  832. return false; // probably network error; error message should be already filled
  833. $this->_error = $results[0]["error"];
  834. $this->_warning = $results[0]["warning"];
  835. if ( $results[0]["status"]==SEARCHD_ERROR )
  836. return false;
  837. else
  838. return $results[0];
  839. }
  840. /// helper to pack floats in network byte order
  841. function _PackFloat ( $f )
  842. {
  843. $t1 = pack ( "f", $f ); // machine order
  844. list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
  845. return pack ( "N", $t2 );
  846. }
  847. /// add query to multi-query batch
  848. /// returns index into results array from RunQueries() call
  849. function AddQuery ( $query, $index="*", $comment="" )
  850. {
  851. // mbstring workaround
  852. $this->_MBPush ();
  853. // build request
  854. $req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker );
  855. if ( $this->_ranker==SPH_RANK_EXPR )
  856. $req .= pack ( "N", strlen($this->_rankexpr) ) . $this->_rankexpr;
  857. $req .= pack ( "N", $this->_sort ); // (deprecated) sort mode
  858. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  859. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  860. $req .= pack ( "N", count($this->_weights) ); // weights
  861. foreach ( $this->_weights as $weight )
  862. $req .= pack ( "N", (int)$weight );
  863. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  864. $req .= pack ( "N", 1 ); // id64 range marker
  865. $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
  866. // filters
  867. $req .= pack ( "N", count($this->_filters) );
  868. foreach ( $this->_filters as $filter )
  869. {
  870. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  871. $req .= pack ( "N", $filter["type"] );
  872. switch ( $filter["type"] )
  873. {
  874. case SPH_FILTER_VALUES:
  875. $req .= pack ( "N", count($filter["values"]) );
  876. foreach ( $filter["values"] as $value )
  877. $req .= sphPackI64 ( $value );
  878. break;
  879. case SPH_FILTER_RANGE:
  880. $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
  881. break;
  882. case SPH_FILTER_FLOATRANGE:
  883. $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
  884. break;
  885. default:
  886. assert ( 0 && "internal error: unhandled filter type" );
  887. }
  888. $req .= pack ( "N", $filter["exclude"] );
  889. }
  890. // group-by clause, max-matches count, group-sort clause, cutoff count
  891. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  892. $req .= pack ( "N", $this->_maxmatches );
  893. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  894. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  895. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  896. // anchor point
  897. if ( empty($this->_anchor) )
  898. {
  899. $req .= pack ( "N", 0 );
  900. } else
  901. {
  902. $a =& $this->_anchor;
  903. $req .= pack ( "N", 1 );
  904. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  905. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  906. $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
  907. }
  908. // per-index weights
  909. $req .= pack ( "N", count($this->_indexweights) );
  910. foreach ( $this->_indexweights as $idx=>$weight )
  911. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  912. // max query time
  913. $req .= pack ( "N", $this->_maxquerytime );
  914. // per-field weights
  915. $req .= pack ( "N", count($this->_fieldweights) );
  916. foreach ( $this->_fieldweights as $field=>$weight )
  917. $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
  918. // comment
  919. $req .= pack ( "N", strlen($comment) ) . $comment;
  920. // attribute overrides
  921. $req .= pack ( "N", count($this->_overrides) );
  922. foreach ( $this->_overrides as $key => $entry )
  923. {
  924. $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
  925. $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
  926. foreach ( $entry["values"] as $id=>$val )
  927. {
  928. assert ( is_numeric($id) );
  929. assert ( is_numeric($val) );
  930. $req .= sphPackU64 ( $id );
  931. switch ( $entry["type"] )
  932. {
  933. case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
  934. case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
  935. default: $req .= pack ( "N", $val ); break;
  936. }
  937. }
  938. }
  939. // select-list
  940. $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
  941. // mbstring workaround
  942. $this->_MBPop ();
  943. // store request to requests array
  944. $this->_reqs[] = $req;
  945. return count($this->_reqs)-1;
  946. }
  947. /// connect to searchd, run queries batch, and return an array of result sets
  948. function RunQueries ()
  949. {
  950. if ( empty($this->_reqs) )
  951. {
  952. $this->_error = "no queries defined, issue AddQuery() first";
  953. return false;
  954. }
  955. // mbstring workaround
  956. $this->_MBPush ();
  957. if (!( $fp = $this->_Connect() ))
  958. {
  959. $this->_MBPop ();
  960. return false;
  961. }
  962. // send query, get response
  963. $nreqs = count($this->_reqs);
  964. $req = join ( "", $this->_reqs );
  965. $len = 8+strlen($req);
  966. $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header
  967. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  968. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
  969. {
  970. $this->_MBPop ();
  971. return false;
  972. }
  973. // query sent ok; we can reset reqs now
  974. $this->_reqs = array ();
  975. // parse and return response
  976. return $this->_ParseSearchResponse ( $response, $nreqs );
  977. }
  978. /// parse and return search query (or queries) response
  979. function _ParseSearchResponse ( $response, $nreqs )
  980. {
  981. $p = 0; // current position
  982. $max = strlen($response); // max position for checks, to protect against broken responses
  983. $results = array ();
  984. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  985. {
  986. $results[] = array();
  987. $result =& $results[$ires];
  988. $result["error"] = "";
  989. $result["warning"] = "";
  990. // extract status
  991. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  992. $result["status"] = $status;
  993. if ( $status!=SEARCHD_OK )
  994. {
  995. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  996. $message = substr ( $response, $p, $len ); $p += $len;
  997. if ( $status==SEARCHD_WARNING )
  998. {
  999. $result["warning"] = $message;
  1000. } else
  1001. {
  1002. $result["error"] = $message;
  1003. continue;
  1004. }
  1005. }
  1006. // read schema
  1007. $fields = array ();
  1008. $attrs = array ();
  1009. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1010. while ( $nfields-->0 && $p<$max )
  1011. {
  1012. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1013. $fields[] = substr ( $response, $p, $len ); $p += $len;
  1014. }
  1015. $result["fields"] = $fields;
  1016. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1017. while ( $nattrs-->0 && $p<$max )
  1018. {
  1019. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1020. $attr = substr ( $response, $p, $len ); $p += $len;
  1021. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1022. $attrs[$attr] = $type;
  1023. }
  1024. $result["attrs"] = $attrs;
  1025. // read match count
  1026. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1027. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1028. // read matches
  1029. $idx = -1;
  1030. while ( $count-->0 && $p<$max )
  1031. {
  1032. // index into result array
  1033. $idx++;
  1034. // parse document id and weight
  1035. if ( $id64 )
  1036. {
  1037. $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1038. list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1039. }
  1040. else
  1041. {
  1042. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  1043. substr ( $response, $p, 8 ) ) );
  1044. $p += 8;
  1045. $doc = sphFixUint($doc);
  1046. }
  1047. $weight = sprintf ( "%u", $weight );
  1048. // create match entry
  1049. if ( $this->_arrayresult )
  1050. $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
  1051. else
  1052. $result["matches"][$doc]["weight"] = $weight;
  1053. // parse and create attributes
  1054. $attrvals = array ();
  1055. foreach ( $attrs as $attr=>$type )
  1056. {
  1057. // handle 64bit ints
  1058. if ( $type==SPH_ATTR_BIGINT )
  1059. {
  1060. $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1061. continue;
  1062. }
  1063. // handle floats
  1064. if ( $type==SPH_ATTR_FLOAT )
  1065. {
  1066. list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1067. list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
  1068. $attrvals[$attr] = $fval;
  1069. continue;
  1070. }
  1071. // handle everything else as unsigned ints
  1072. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1073. if ( $type==SPH_ATTR_MULTI )
  1074. {
  1075. $attrvals[$attr] = array ();
  1076. $nvalues = $val;
  1077. while ( $nvalues-->0 && $p<$max )
  1078. {
  1079. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1080. $attrvals[$attr][] = sphFixUint($val);
  1081. }
  1082. } else if ( $type==SPH_ATTR_MULTI64 )
  1083. {
  1084. $attrvals[$attr] = array ();
  1085. $nvalues = $val;
  1086. while ( $nvalues>0 && $p<$max )
  1087. {
  1088. $val = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1089. $attrvals[$attr][] = strval( $val ); // FIXME!!! sphFixUint returns MVA values as string so It to
  1090. $nvalues -= 2;
  1091. }
  1092. } else if ( $type==SPH_ATTR_STRING )
  1093. {
  1094. $attrvals[$attr] = substr ( $response, $p, $val );
  1095. $p += $val;
  1096. } else
  1097. {
  1098. $attrvals[$attr] = sphFixUint($val);
  1099. }
  1100. }
  1101. if ( $this->_arrayresult )
  1102. $result["matches"][$idx]["attrs"] = $attrvals;
  1103. else
  1104. $result["matches"][$doc]["attrs"] = $attrvals;
  1105. }
  1106. list ( $total, $total_found, $msecs, $words ) =
  1107. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  1108. $result["total"] = sprintf ( "%u", $total );
  1109. $result["total_found"] = sprintf ( "%u", $total_found );
  1110. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  1111. $p += 16;
  1112. while ( $words-->0 && $p<$max )
  1113. {
  1114. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1115. $word = substr ( $response, $p, $len ); $p += $len;
  1116. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1117. $result["words"][$word] = array (
  1118. "docs"=>sprintf ( "%u", $docs ),
  1119. "hits"=>sprintf ( "%u", $hits ) );
  1120. }
  1121. }
  1122. $this->_MBPop ();
  1123. return $results;
  1124. }
  1125. /////////////////////////////////////////////////////////////////////////////
  1126. // excerpts generation
  1127. /////////////////////////////////////////////////////////////////////////////
  1128. /// connect to searchd server, and generate exceprts (snippets)
  1129. /// of given documents for given query. returns false on failure,
  1130. /// an array of snippets on success
  1131. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  1132. {
  1133. assert ( is_array($docs) );
  1134. assert ( is_string($index) );
  1135. assert ( is_string($words) );
  1136. assert ( is_array($opts) );
  1137. $this->_MBPush ();
  1138. if (!( $fp = $this->_Connect() ))
  1139. {
  1140. $this->_MBPop();
  1141. return false;
  1142. }
  1143. /////////////////
  1144. // fixup options
  1145. /////////////////
  1146. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  1147. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  1148. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  1149. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  1150. if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0;
  1151. if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0;
  1152. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  1153. if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
  1154. if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
  1155. if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
  1156. if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
  1157. if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false;
  1158. if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false;
  1159. if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1;
  1160. if ( !isset($opts["load_files"]) ) $opts["load_files"] = false;
  1161. if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index";
  1162. if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false;
  1163. if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none";
  1164. if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false;
  1165. /////////////////
  1166. // build request
  1167. /////////////////
  1168. // v.1.2 req
  1169. $flags = 1; // remove spaces
  1170. if ( $opts["exact_phrase"] ) $flags |= 2;
  1171. if ( $opts["single_passage"] ) $flags |= 4;
  1172. if ( $opts["use_boundaries"] ) $flags |= 8;
  1173. if ( $opts["weight_order"] ) $flags |= 16;
  1174. if ( $opts["query_mode"] ) $flags |= 32;
  1175. if ( $opts["force_all_words"] ) $flags |= 64;
  1176. if ( $opts["load_files"] ) $flags |= 128;
  1177. if ( $opts["allow_empty"] ) $flags |= 256;
  1178. if ( $opts["emit_zones"] ) $flags |= 512;
  1179. $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
  1180. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1181. $req .= pack ( "N", strlen($words) ) . $words; // req words
  1182. // options
  1183. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  1184. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  1185. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  1186. $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] );
  1187. $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2
  1188. $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"];
  1189. $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"];
  1190. // documents
  1191. $req .= pack ( "N", count($docs) );
  1192. foreach ( $docs as $doc )
  1193. {
  1194. assert ( is_string($doc) );
  1195. $req .= pack ( "N", strlen($doc) ) . $doc;
  1196. }
  1197. ////////////////////////////
  1198. // send query, get response
  1199. ////////////////////////////
  1200. $len = strlen($req);
  1201. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  1202. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1203. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
  1204. {
  1205. $this->_MBPop ();
  1206. return false;
  1207. }
  1208. //////////////////
  1209. // parse response
  1210. //////////////////
  1211. $pos = 0;
  1212. $res = array ();
  1213. $rlen = strlen($response);
  1214. for ( $i=0; $i<count($docs); $i++ )
  1215. {
  1216. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1217. $pos += 4;
  1218. if ( $pos+$len > $rlen )
  1219. {
  1220. $this->_error = "incomplete reply";
  1221. $this->_MBPop ();
  1222. return false;
  1223. }
  1224. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  1225. $pos += $len;
  1226. }
  1227. $this->_MBPop ();
  1228. return $res;
  1229. }
  1230. /////////////////////////////////////////////////////////////////////////////
  1231. // keyword generation
  1232. /////////////////////////////////////////////////////////////////////////////
  1233. /// connect to searchd server, and generate keyword list for a given query
  1234. /// returns false on failure,
  1235. /// an array of words on success
  1236. function BuildKeywords ( $query, $index, $hits )
  1237. {
  1238. assert ( is_string($query) );
  1239. assert ( is_string($index) );
  1240. assert ( is_bool($hits) );
  1241. $this->_MBPush ();
  1242. if (!( $fp = $this->_Connect() ))
  1243. {
  1244. $this->_MBPop();
  1245. return false;
  1246. }
  1247. /////////////////
  1248. // build request
  1249. /////////////////
  1250. // v.1.0 req
  1251. $req = pack ( "N", strlen($query) ) . $query; // req query
  1252. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1253. $req .= pack ( "N", (int)$hits );
  1254. ////////////////////////////
  1255. // send query, get response
  1256. ////////////////////////////
  1257. $len = strlen($req);
  1258. $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
  1259. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1260. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
  1261. {
  1262. $this->_MBPop ();
  1263. return false;
  1264. }
  1265. //////////////////
  1266. // parse response
  1267. //////////////////
  1268. $pos = 0;
  1269. $res = array ();
  1270. $rlen = strlen($response);
  1271. list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1272. $pos += 4;
  1273. for ( $i=0; $i<$nwords; $i++ )
  1274. {
  1275. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1276. $tokenized = $len ? substr ( $response, $pos, $len ) : "";
  1277. $pos += $len;
  1278. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1279. $normalized = $len ? substr ( $response, $pos, $len ) : "";
  1280. $pos += $len;
  1281. $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
  1282. if ( $hits )
  1283. {
  1284. list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
  1285. $pos += 8;
  1286. $res [$i]["docs"] = $ndocs;
  1287. $res [$i]["hits"] = $nhits;
  1288. }
  1289. if ( $pos > $rlen )
  1290. {
  1291. $this->_error = "incomplete reply";
  1292. $this->_MBPop ();
  1293. return false;
  1294. }
  1295. }
  1296. $this->_MBPop ();
  1297. return $res;
  1298. }
  1299. function EscapeString ( $string )
  1300. {
  1301. $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
  1302. $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
  1303. return str_replace ( $from, $to, $string );
  1304. }
  1305. /////////////////////////////////////////////////////////////////////////////
  1306. // attribute updates
  1307. /////////////////////////////////////////////////////////////////////////////
  1308. /// batch update given attributes in given rows in given indexes
  1309. /// returns amount of updated documents (0 or more) on success, or -1 on failure
  1310. function UpdateAttributes ( $index, $attrs, $values, $mva=false )
  1311. {
  1312. // verify everything
  1313. assert ( is_string($index) );
  1314. assert ( is_bool($mva) );
  1315. assert ( is_array($attrs) );
  1316. foreach ( $attrs as $attr )
  1317. assert ( is_string($attr) );
  1318. assert ( is_array($values) );
  1319. foreach ( $values as $id=>$entry )
  1320. {
  1321. assert ( is_numeric($id) );
  1322. assert ( is_array($entry) );
  1323. assert ( count($entry)==count($attrs) );
  1324. foreach ( $entry as $v )
  1325. {
  1326. if ( $mva )
  1327. {
  1328. assert ( is_array($v) );
  1329. foreach ( $v as $vv )
  1330. assert ( is_int($vv) );
  1331. } else
  1332. assert ( is_int($v) );
  1333. }
  1334. }
  1335. // build request
  1336. $this->_MBPush ();
  1337. $req = pack ( "N", strlen($index) ) . $index;
  1338. $req .= pack ( "N", count($attrs) );
  1339. foreach ( $attrs as $attr )
  1340. {
  1341. $req .= pack ( "N", strlen($attr) ) . $attr;
  1342. $req .= pack ( "N", $mva ? 1 : 0 );
  1343. }
  1344. $req .= pack ( "N", count($values) );
  1345. foreach ( $values as $id=>$entry )
  1346. {
  1347. $req .= sphPackU64 ( $id );
  1348. foreach ( $entry as $v )
  1349. {
  1350. $req .= pack ( "N", $mva ? count($v) : $v );
  1351. if ( $mva )
  1352. foreach ( $v as $vv )
  1353. $req .= pack ( "N", $vv );
  1354. }
  1355. }
  1356. // connect, send query, get response
  1357. if (!( $fp = $this->_Connect() ))
  1358. {
  1359. $this->_MBPop ();
  1360. return -1;
  1361. }
  1362. $len = strlen($req);
  1363. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  1364. if ( !$this->_Send ( $fp, $req, $len+8 ) )
  1365. {
  1366. $this->_MBPop ();
  1367. return -1;
  1368. }
  1369. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  1370. {
  1371. $this->_MBPop ();
  1372. return -1;
  1373. }
  1374. // parse response
  1375. list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
  1376. $this->_MBPop ();
  1377. return $updated;
  1378. }
  1379. /////////////////////////////////////////////////////////////////////////////
  1380. // persistent connections
  1381. /////////////////////////////////////////////////////////////////////////////
  1382. function Open()
  1383. {
  1384. if ( $this->_socket !== false )
  1385. {
  1386. $this->_error = 'already connected';
  1387. return false;
  1388. }
  1389. if ( !$fp = $this->_Connect() )
  1390. return false;
  1391. // command, command version = 0, body length = 4, body = 1
  1392. $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
  1393. if ( !$this->_Send ( $fp, $req, 12 ) )
  1394. return false;
  1395. $this->_socket = $fp;
  1396. return true;
  1397. }
  1398. function Close()
  1399. {
  1400. if ( $this->_socket === false )
  1401. {
  1402. $this->_error = 'not connected';
  1403. return false;
  1404. }
  1405. fclose ( $this->_socket );
  1406. $this->_socket = false;
  1407. return true;
  1408. }
  1409. //////////////////////////////////////////////////////////////////////////
  1410. // status
  1411. //////////////////////////////////////////////////////////////////////////
  1412. function Status ()
  1413. {
  1414. $this->_MBPush ();
  1415. if (!( $fp = $this->_Connect() ))
  1416. {
  1417. $this->_MBPop();
  1418. return false;
  1419. }
  1420. $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
  1421. if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
  1422. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
  1423. {
  1424. $this->_MBPop ();
  1425. return false;
  1426. }
  1427. $res = substr ( $response, 4 ); // just ignore length, error handling, etc
  1428. $p = 0;
  1429. list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1430. $res = array();
  1431. for ( $i=0; $i<$rows; $i++ )
  1432. for ( $j=0; $j<$cols; $j++ )
  1433. {
  1434. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1435. $res[$i][] = substr ( $response, $p, $len ); $p += $len;
  1436. }
  1437. $this->_MBPop ();
  1438. return $res;
  1439. }
  1440. //////////////////////////////////////////////////////////////////////////
  1441. // flush
  1442. //////////////////////////////////////////////////////////////////////////
  1443. function FlushAttributes ()
  1444. {
  1445. $this->_MBPush ();
  1446. if (!( $fp = $this->_Connect() ))
  1447. {
  1448. $this->_MBPop();
  1449. return -1;
  1450. }
  1451. $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0
  1452. if ( !( $this->_Send ( $fp, $req, 8 ) ) ||
  1453. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) )
  1454. {
  1455. $this->_MBPop ();
  1456. return -1;
  1457. }
  1458. $tag = -1;
  1459. if ( strlen($response)==4 )
  1460. list(,$tag) = unpack ( "N*", $response );
  1461. else
  1462. $this->_error = "unexpected response length";
  1463. $this->_MBPop ();
  1464. return $tag;
  1465. }
  1466. }
  1467. //
  1468. // $Id$
  1469. //