Rev 19 | Rev 24 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 9 | jblyberg | 1 | <?php |
| 22 | jblyberg | 2 | /** |
| 3 | * Locum is a software library that abstracts ILS functionality into a |
||
| 4 | * catalog discovery layer for use with such things as bolt-on OPACs like |
||
| 5 | * SOPAC. |
||
| 6 | * @package Locum |
||
| 7 | * @author John Blyberg |
||
| 8 | */ |
||
| 9 | jblyberg | 9 | |
| 10 | require_once('locum.php'); |
||
| 11 | |||
| 22 | jblyberg | 12 | /** |
| 13 | * This class is the server component of Locum. It is separated from the client piece because the functionality |
||
| 14 | * in this class should never need to be used in any front-end pieces. This class does all the harvesting and |
||
| 15 | * data preparation. |
||
| 16 | */ |
||
| 9 | jblyberg | 17 | class locum_server extends locum { |
| 18 | |||
| 22 | jblyberg | 19 | /** |
| 20 | * This function initiates the harvest of bib records from the catalog. |
||
| 21 | * |
||
| 22 | * @param int $start Bib number to start with |
||
| 23 | * @param int $end Bib number to end with |
||
| 24 | * @param boolean $quiet quietly harvest or not. Default: TRUE |
||
| 25 | */ |
||
| 9 | jblyberg | 26 | public function harvest_bibs($start, $end, $quiet = TRUE) { |
| 27 | |||
| 28 | if ($start > $end) { return 0; } |
||
| 29 | |||
| 30 | $num_children = $this->locum_config[harvest_config][max_children]; |
||
| 31 | $num_to_process = $end - $start; |
||
| 32 | $increment = ceil($num_to_process / $num_children); |
||
| 33 | if (extension_loaded('pcntl') && $this->locum_config[harvest_config][harvest_with_children] && ($num_to_process >= (2 * $num_children))) { |
||
| 34 | for ($i = 0; $i < $num_children; ++$i) { |
||
| 35 | $end = $start + ($increment - 1); |
||
| 36 | $new_start = $end + 1; |
||
| 37 | |||
| 38 | $pid = pcntl_fork(); |
||
| 39 | if ($pid != -1) { |
||
| 40 | if ($pid) { |
||
| 41 | parent::putlog("Spawning child harvester to scan records $start - $end. PID is $pid .."); |
||
| 42 | } else { |
||
| 43 | sleep(1); |
||
| 44 | ++$i; |
||
| 45 | if ($i == $num_children) { $end++; } |
||
| 14 | jblyberg | 46 | $result = self::import_bibs($start, $end); |
| 9 | jblyberg | 47 | parent::putlog("Child process complete. Scanned records $start - $end. Imported $result[imported] reords and skipped $result[skipped] ..", 2); |
| 48 | exit($i); |
||
| 49 | } |
||
| 50 | } else { |
||
| 51 | parent::putlog("Unable to spawn harvester: ($i)", 5); |
||
| 52 | } |
||
| 53 | $start = $new_start; |
||
| 54 | } |
||
| 55 | if ($pid) { |
||
| 56 | while ($i > 0) { |
||
| 57 | pcntl_waitpid(-1, &$status); |
||
| 58 | $val = pcntl_wexitstatus($status); |
||
| 59 | --$i; |
||
| 60 | } |
||
| 61 | parent::putlog("Harvest complete!", 3); |
||
| 62 | } |
||
| 63 | } else { |
||
| 64 | $result = self::import_bibs($start, $end); |
||
| 65 | } |
||
| 66 | } |
||
| 67 | |||
| 22 | jblyberg | 68 | /** |
| 69 | * Does the actual import of bib records. Called by the harvester. |
||
| 70 | * It uses start and end parameters because this function can potentially be called by a |
||
| 71 | * child process |
||
| 72 | * |
||
| 73 | * @param int $start Bib number to start with |
||
| 74 | * @param int $end Bib number to end with |
||
| 75 | * @return array Array of information about the bibs imported |
||
| 76 | */ |
||
| 9 | jblyberg | 77 | public function import_bibs($start, $end) { |
| 78 | |||
| 14 | jblyberg | 79 | $db =& MDB2::connect($this->dsn); |
| 80 | |||
| 9 | jblyberg | 81 | $process_report[skipped] = 0; |
| 82 | $process_report[imported] = 0; |
||
| 83 | |||
| 84 | for ($i = $start; $i <= $end; $i++) { |
||
| 85 | $bib = $this->locum_cntl->scrape_bib($i); |
||
| 86 | |||
| 87 | if ($bib == FALSE) { |
||
| 88 | $process_report[skipped]++; |
||
| 89 | } else { |
||
| 90 | $subj = array_pop($bib); |
||
| 14 | jblyberg | 91 | $valid_vals = array('bnum', 'author', 'addl_author', 'title', 'title_medium', 'edition', 'series', 'callnum', 'pub_info', 'pub_year', 'stdnum', 'lccn', 'descr', 'notes', 'subjects_ser', 'lang', 'loc_code', 'mat_code', 'cover_img', 'bib_created', 'bib_lastupdate', 'bib_prevupdate', 'bib_revs'); |
| 92 | foreach ($bib as $bkey => $bval) { |
||
| 93 | if (in_array($bkey, $valid_vals)) { $bib_values[$bkey] = $bval; } |
||
| 94 | } |
||
| 95 | $bib_values[subjects_ser] = serialize($subj); |
||
| 96 | $types = array('integer', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'date', 'date', 'date', 'integer'); |
||
| 9 | jblyberg | 97 | |
| 15 | jblyberg | 98 | $sql_prep = $db->prepare('INSERT INTO bib_items VALUES (:bnum, :author, :addl_author, :title, :title_medium, :edition, :series, :callnum, :pub_info, :pub_year, :stdnum, :lccn, :descr, :notes, :subjects_ser, :lang, :loc_code, :mat_code, :cover_img, NOW(), :bib_created, :bib_lastupdate, :bib_prevupdate, :bib_revs, \'1\')'); |
| 14 | jblyberg | 99 | $affrows = $sql_prep->execute($bib_values); |
| 9 | jblyberg | 100 | $sql_prep->free(); |
| 101 | |||
| 102 | if (is_array($subj) && count($subj)) { |
||
| 103 | foreach ($subj as $subj_heading) { |
||
| 104 | $insert_data = array($bib[bnum], $subj_heading); |
||
| 105 | $types = array('integer', 'text'); |
||
| 14 | jblyberg | 106 | $sql_prep = $db->prepare('INSERT INTO bib_items_subject VALUES (?, ?)', $types, MDB2_PREPARE_MANIP); |
| 9 | jblyberg | 107 | $affrows = $sql_prep->execute($insert_data); |
| 108 | $sql_prep->free(); |
||
| 109 | } |
||
| 110 | } |
||
| 111 | $process_report[imported]++; |
||
| 112 | } |
||
| 113 | } |
||
| 14 | jblyberg | 114 | $db->disconnect(); |
| 9 | jblyberg | 115 | return $process_report; |
| 116 | } |
||
| 117 | |||
| 22 | jblyberg | 118 | /** |
| 119 | * Scans existing imported bibs for changes or weeds and makes the appropriate changes |
||
| 120 | * |
||
| 121 | * @param boolean $quiet Run this function silently. Default: TRUE |
||
| 122 | */ |
||
| 14 | jblyberg | 123 | public function verify_bibs($quiet = TRUE) { |
| 9 | jblyberg | 124 | |
| 16 | jblyberg | 125 | parent::putlog("Collecting current data keys .."); |
| 14 | jblyberg | 126 | $db =& MDB2::connect($this->dsn); |
| 18 | jblyberg | 127 | $sql = 'SELECT bnum, bib_lastupdate FROM locum_facet_heap'; |
| 14 | jblyberg | 128 | $init_result =& $db->query($sql); |
| 129 | $init_bib_arr = $init_result->fetchAll(MDB2_FETCHMODE_ASSOC); |
||
| 130 | $num_children = $this->locum_config[harvest_config][max_children]; |
||
| 131 | $num_to_process = count($init_bib_arr); |
||
| 132 | foreach ($init_bib_arr as $init_bib_arr_vals) { |
||
| 133 | $bib_arr[$init_bib_arr_vals[bnum]] = $init_bib_arr_vals[bib_lastupdate]; |
||
| 134 | } |
||
| 135 | $db->disconnect(); |
||
| 16 | jblyberg | 136 | parent::putlog("Finished collecting data keys."); |
| 14 | jblyberg | 137 | |
| 138 | if (extension_loaded('pcntl') && $this->locum_config[harvest_config][harvest_with_children] && ($num_to_process >= (2 * $num_children))) { |
||
| 139 | |||
| 140 | $increment = ceil($num_to_process / $num_children); |
||
| 141 | |||
| 142 | $split_offset = 0; |
||
| 143 | for ($i = 0; $i < $num_children; ++$i) { |
||
| 144 | $end = $start + ($increment - 1); |
||
| 145 | $new_start = $end + 1; |
||
| 146 | |||
| 147 | $pid = pcntl_fork(); |
||
| 148 | if ($pid != -1) { |
||
| 149 | if ($pid) { |
||
| 150 | parent::putlog("Spawning child harvester to verify records. PID is $pid .."); |
||
| 151 | } else { |
||
| 152 | sleep(1); |
||
| 153 | ++$i; |
||
| 154 | if ($i == $num_children) { $end++; } |
||
| 155 | $bib_arr_sliced = array_slice($bib_arr, $split_offset, $increment, TRUE); |
||
| 156 | $num_bibs = count($bib_arr_sliced); |
||
| 22 | jblyberg | 157 | $tmp = self::update_bib($bib_arr_sliced); |
| 17 | jblyberg | 158 | $updated = $tmp[updated]; |
| 159 | $retired = $tmp[retired]; |
||
| 14 | jblyberg | 160 | parent::putlog("Child process complete. Checked $num_bibs records, updated $updated records, retired $retired records.", 2); |
| 161 | exit($i); |
||
| 162 | } |
||
| 163 | } else { |
||
| 164 | parent::putlog("Unable to spawn harvester: ($i)", 5); |
||
| 165 | } |
||
| 166 | $start = $new_start; |
||
| 167 | $split_offset = $split_offset + $increment; |
||
| 168 | } |
||
| 169 | if ($pid) { |
||
| 170 | while ($i > 0) { |
||
| 171 | pcntl_waitpid(-1, &$status); |
||
| 172 | $val = pcntl_wexitstatus($status); |
||
| 173 | --$i; |
||
| 174 | } |
||
| 175 | parent::putlog("Verification complete!", 3); |
||
| 176 | } |
||
| 177 | } else { |
||
| 16 | jblyberg | 178 | // TODO - Bib verification for those poor saps w/o pcntl |
| 14 | jblyberg | 179 | } |
| 180 | } |
||
| 181 | |||
| 22 | jblyberg | 182 | /** |
| 183 | * Does the actual update of the bib record if something has changed. |
||
| 184 | * This function is called by verify_bibs() |
||
| 185 | * |
||
| 186 | * @param array $bib_arr Array of bibs like: key => val is bnum => last update date |
||
| 187 | * @return array Array of # updated and # retired |
||
| 188 | */ |
||
| 189 | public function update_bib($bib_arr) { |
||
| 14 | jblyberg | 190 | |
| 17 | jblyberg | 191 | $db = MDB2::connect($this->dsn); |
| 18 | jblyberg | 192 | $updated = 0; |
| 193 | $retired = 0; |
||
| 194 | |||
| 17 | jblyberg | 195 | foreach ($bib_arr as $bnum => $init_bib_date) { |
| 14 | jblyberg | 196 | |
| 17 | jblyberg | 197 | $bib = $this->locum_cntl->scrape_bib($bnum, TRUE); |
| 198 | |||
| 199 | |||
| 200 | if ($bib == FALSE) { |
||
| 201 | // TODO add a verification of weed in here somehow |
||
| 202 | $sql_prep =& $db->prepare('UPDATE bib_items SET active = ? WHERE bnum = ?', array('text', 'integer')); |
||
| 203 | $sql_prep->execute(array('0', $bnum)); |
||
| 204 | $sql_prep =& $db->prepare('DELETE FROM bib_items_subject WHERE bnum = ?', array('integer')); |
||
| 205 | $sql_prep->execute(array($bnum)); |
||
| 206 | $sql_prep->free(); |
||
| 207 | $retired++; |
||
| 208 | } else if ($bib[bnum] && $bib[bib_lastupdate] != $init_bib_date) { |
||
| 209 | $subj = array_pop($bib); |
||
| 210 | $valid_vals = array('bib_created', 'bib_lastupdate', 'bib_prevupdate', 'bib_revs', 'lang', 'loc_code', 'mat_code', 'author', 'addl_author', 'title', 'title_medium', 'edition', 'series', 'callnum', 'pub_info', 'pub_year', 'stdnum', 'lccn', 'descr', 'notes', 'bnum'); |
||
| 211 | foreach ($bib as $bkey => $bval) { |
||
| 212 | if (in_array($bkey, $valid_vals)) { $bib_values[$bkey] = $bval; } |
||
| 213 | } |
||
| 18 | jblyberg | 214 | |
| 17 | jblyberg | 215 | $bib_values[subjects_ser] = serialize($subj); |
| 14 | jblyberg | 216 | |
| 17 | jblyberg | 217 | $types = array('date', 'date', 'date', 'integer', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'integer', 'text', 'text', 'text', 'text', 'text', 'integer'); |
| 14 | jblyberg | 218 | |
| 17 | jblyberg | 219 | $setlist = |
| 220 | "bib_created = :bib_created, " . |
||
| 221 | "bib_lastupdate = :bib_lastupdate, " . |
||
| 222 | "bib_prevupdate = :bib_prevupdate, " . |
||
| 223 | "bib_revs = :bib_revs, " . |
||
| 224 | "lang = :lang, " . |
||
| 225 | "loc_code = :loc_code, " . |
||
| 226 | "mat_code = :mat_code, " . |
||
| 227 | "author = :author, " . |
||
| 228 | "addl_author = :addl_author, " . |
||
| 229 | "title = :title, " . |
||
| 230 | "title_medium = :title_medium, " . |
||
| 231 | "edition = :edition, " . |
||
| 232 | "series = :series, " . |
||
| 233 | "callnum = :callnum, " . |
||
| 234 | "pub_info = :pub_info, " . |
||
| 235 | "pub_year = :pub_year, " . |
||
| 236 | "stdnum = :stdnum, " . |
||
| 237 | "lccn = :lccn, " . |
||
| 238 | "descr = :descr, " . |
||
| 239 | "notes = :notes, " . |
||
| 240 | "subjects = :subjects_ser, " . |
||
| 241 | "modified = NOW()"; |
||
| 14 | jblyberg | 242 | |
| 17 | jblyberg | 243 | $sql_prep =& $db->prepare('UPDATE bib_items SET ' . $setlist . ' WHERE bnum = :bnum', $types, MDB2_PREPARE_MANIP); |
| 244 | $res = $sql_prep->execute($bib_values); |
||
| 245 | $sql_prep =& $db->prepare('DELETE FROM bib_items_subject WHERE bnum = ?', array('integer')); |
||
| 246 | $sql_prep->execute(array($bnum)); |
||
| 247 | $sql_prep->free(); |
||
| 11 | jblyberg | 248 | |
| 17 | jblyberg | 249 | if (is_array($subj) && count($subj)) { |
| 250 | foreach ($subj as $subj_heading) { |
||
| 251 | $insert_data = array($bnum, $subj_heading); |
||
| 252 | $types = array('integer', 'text'); |
||
| 253 | $sql_prep =& $db->prepare('INSERT INTO bib_items_subject VALUES (?, ?)', $types, MDB2_PREPARE_MANIP); |
||
| 254 | $affrows = $sql_prep->execute($insert_data); |
||
| 255 | $sql_prep->free(); |
||
| 256 | } |
||
| 11 | jblyberg | 257 | } |
| 17 | jblyberg | 258 | parent::putlog("Updated record # $bnum", 2, TRUE); |
| 259 | $updated++; |
||
| 11 | jblyberg | 260 | } |
| 9 | jblyberg | 261 | } |
| 14 | jblyberg | 262 | $db->disconnect(); |
| 263 | return array('retired' => $retired, 'updated' => $updated); |
||
| 9 | jblyberg | 264 | } |
| 265 | |||
| 22 | jblyberg | 266 | /** |
| 267 | * Scans for newly cataloged bib records. |
||
| 268 | * Uses the ini "harvest_reach" param to determine how far forward to seek |
||
| 269 | */ |
||
| 19 | jblyberg | 270 | public function new_bib_scan() { |
| 271 | $db = MDB2::connect($this->dsn); |
||
| 272 | $sql = 'SELECT MAX(bnum) FROM bib_items'; |
||
| 273 | $max_bib_result =& $db->query($sql); |
||
| 274 | $max_bib = $max_bib_result->fetchOne(); |
||
| 275 | $next_bib = $max_bib + 1; |
||
| 276 | $last_bib = $next_bib + $this->locum_config[harvest_config][harvest_reach]; |
||
| 277 | $db->disconnect(); |
||
| 278 | self::harvest_bibs($next_bib, $last_bib); |
||
| 279 | } |
||
| 280 | |||
| 22 | jblyberg | 281 | /** |
| 282 | * Grabs the cover image URL for caching (much faster on the front-end to do it this way) |
||
| 283 | * Will try amazon if the ini says so, then syndetics |
||
| 284 | * |
||
| 285 | * @param string $stdnum_raw - stdnum/ISBN from the bib record |
||
| 286 | * @return string Image URL or NULL |
||
| 287 | */ |
||
| 9 | jblyberg | 288 | public function get_cover_img($stdnum_raw) { |
| 289 | |||
| 290 | // Format stdnum as best we can |
||
| 291 | if (preg_match('/ /', $stdnum_raw)) { |
||
| 292 | $stdnum_arr = explode(' ', $stdnum_raw); |
||
| 293 | $stdnum = trim($stdnum_arr[0]); |
||
| 294 | } else { |
||
| 295 | $stdnum = trim($stdnum_raw); |
||
| 296 | } |
||
| 297 | $api_cfg = $this->locum_config[api_config]; |
||
| 298 | $image_url = ''; |
||
| 299 | if ($api_cfg[use_amazon_images] && $api_cfg[use_syndetic_images]) { |
||
| 300 | if ($api_cfg[amazon_img_prio] >= $api_cfg[syndetic_img_prio]) { |
||
| 301 | $image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key]); |
||
| 302 | if (!$image_url) { $image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid]); } |
||
| 303 | } else { |
||
| 304 | $image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid]); |
||
| 305 | if (!$image_url) { $image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key]); } |
||
| 306 | |||
| 307 | } |
||
| 308 | } else if ($api_cfg[use_amazon_images]) { |
||
| 309 | $image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key]); |
||
| 310 | } else if ($api_cfg[use_syndetic_images]) { |
||
| 311 | $image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid]); |
||
| 312 | } |
||
| 313 | return $image_url; |
||
| 314 | } |
||
| 315 | |||
| 22 | jblyberg | 316 | /** |
| 317 | * Used by get_cover_img to get the Amazon cover image URL |
||
| 318 | * You'll need to put in your own Amazon API key into the ini |
||
| 319 | * |
||
| 320 | * @param string $stdnum Stdnum/ISBN |
||
| 321 | * @param string $api_key Amazon API key - they're free. Go git one. |
||
| 322 | * @return string Cover image URL |
||
| 323 | */ |
||
| 9 | jblyberg | 324 | public function get_amazon_image($stdnum, $api_key) { |
| 325 | $url = 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService'; |
||
| 326 | $url.= "&AWSAccessKeyId=$api_key"; |
||
| 327 | $url.= "&Operation=ItemLookup&IdType=ASIN&ItemId=$stdnum"; |
||
| 328 | $url.= '&ResponseGroup=Medium,OfferFull'; |
||
| 329 | |||
| 330 | $az_dl = @file_get_contents($url); |
||
| 331 | list($version, $status_code, $msg) = explode(' ', $http_response_header[0], 3); |
||
| 332 | if ($status_code == '200') { |
||
| 333 | $az = simplexml_load_string($az_dl); |
||
| 16 | jblyberg | 334 | if (is_object($az->Items)) { |
| 335 | if ($az->Items->Item->MediumImage->URL) { |
||
| 336 | $image_url = trim($az->Items->Item->MediumImage->URL); |
||
| 9 | jblyberg | 337 | } |
| 338 | } |
||
| 339 | } |
||
| 340 | return $image_url; |
||
| 341 | } |
||
| 342 | |||
| 22 | jblyberg | 343 | /** |
| 344 | * Used by get_cover_img to get the Syndetics cover image URL |
||
| 345 | * You'll need to put in your own customer ID into the ini |
||
| 346 | * |
||
| 347 | * @param string $stdnum Stdnum/ISBN |
||
| 348 | * @param string $cust_id Your syndetics ID - it's overpriced. Go git one. |
||
| 349 | * @return string Cover image URL |
||
| 350 | */ |
||
| 9 | jblyberg | 351 | public function get_syndetic_image($stdnum, $cust_id) { |
| 352 | $image_url = ''; |
||
| 353 | $url = 'http://syndetics.com/index.aspx?isbn=' . $stdnum . '/index.xml&client=' . $cust_id . '&type=xw10'; |
||
| 354 | $syn_dl = @file_get_contents($url); |
||
| 355 | list($version, $status_code, $msg) = explode(' ', $http_response_header[0], 3); |
||
| 356 | if (preg_match('/xml/', $syn_dl) && $status_code == '200') { |
||
| 16 | jblyberg | 357 | $syn = simplexml_load_string($syn_dl); |
| 358 | if ($syn->SC == 'SC.GIF') { |
||
| 9 | jblyberg | 359 | $image_url = 'http://syndetics.com/hw7.pl?isbn=' . $stdnum . '/SC.GIF&client=' . $cust_id; |
| 360 | $img_size = @getimagesize($image_url); |
||
| 361 | if ($img_size[0] == 1) { $image_url = ''; } |
||
| 362 | } |
||
| 363 | } |
||
| 364 | return $image_url; |
||
| 365 | } |
||
| 366 | |||
| 367 | } |
||
| 368 | |||
| 369 | |||
| 370 | |||
| 371 | ?> |