Rev 19 |
Rev 24 |
Go to most recent revision |
Blame |
Compare with Previous |
Last modification |
View Log
| RSS feed
<?php
/**
* Locum is a software library that abstracts ILS functionality into a
* catalog discovery layer for use with such things as bolt-on OPACs like
* SOPAC.
* @package Locum
* @author John Blyberg
*/
require_once('locum.php');
/**
* This class is the server component of Locum. It is separated from the client piece because the functionality
* in this class should never need to be used in any front-end pieces. This class does all the harvesting and
* data preparation.
*/
class locum_server
extends locum
{
/**
* This function initiates the harvest of bib records from the catalog.
*
* @param int $start Bib number to start with
* @param int $end Bib number to end with
* @param boolean $quiet quietly harvest or not. Default: TRUE
*/
public function harvest_bibs
($start, $end, $quiet = TRUE) {
if ($start > $end) { return 0; }
$num_children = $this->locum_config[harvest_config
][max_children
];
$num_to_process = $end - $start;
$increment = ceil($num_to_process / $num_children);
if (extension_loaded('pcntl') && $this->locum_config[harvest_config
][harvest_with_children
] && ($num_to_process >= (2
* $num_children))) {
for ($i = 0; $i < $num_children; ++$i) {
$end = $start + ($increment - 1
);
$new_start = $end + 1;
$pid = pcntl_fork
();
if ($pid != -1
) {
if ($pid) {
parent
::putlog("Spawning child harvester to scan records $start - $end. PID is $pid ..");
} else {
sleep(1
);
++$i;
if ($i == $num_children) { $end++; }
$result = self::import_bibs($start, $end);
parent
::putlog("Child process complete. Scanned records $start - $end. Imported $result[imported] reords and skipped $result[skipped] ..", 2
);
exit($i);
}
} else {
parent
::putlog("Unable to spawn harvester: ($i)", 5
);
}
$start = $new_start;
}
if ($pid) {
while ($i > 0
) {
pcntl_waitpid
(-1
, &$status);
$val = pcntl_wexitstatus
($status);
--$i;
}
parent
::putlog("Harvest complete!", 3
);
}
} else {
$result = self::import_bibs($start, $end);
}
}
/**
* Does the actual import of bib records. Called by the harvester.
* It uses start and end parameters because this function can potentially be called by a
* child process
*
* @param int $start Bib number to start with
* @param int $end Bib number to end with
* @return array Array of information about the bibs imported
*/
public function import_bibs
($start, $end) {
$db =& MDB2
::connect($this->dsn);
$process_report[skipped
] = 0;
$process_report[imported
] = 0;
for ($i = $start; $i <= $end; $i++) {
$bib = $this->locum_cntl->scrape_bib($i);
if ($bib == FALSE) {
$process_report[skipped
]++;
} else {
$subj = array_pop($bib);
$valid_vals = array('bnum', 'author', 'addl_author', 'title', 'title_medium', 'edition', 'series', 'callnum', 'pub_info', 'pub_year', 'stdnum', 'lccn', 'descr', 'notes', 'subjects_ser', 'lang', 'loc_code', 'mat_code', 'cover_img', 'bib_created', 'bib_lastupdate', 'bib_prevupdate', 'bib_revs');
foreach ($bib as $bkey => $bval) {
if (in_array($bkey, $valid_vals)) { $bib_values[$bkey] = $bval; }
}
$bib_values[subjects_ser
] = serialize($subj);
$types = array('integer', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'date', 'date', 'date', 'integer');
$sql_prep = $db->prepare('INSERT INTO bib_items VALUES (:bnum, :author, :addl_author, :title, :title_medium, :edition, :series, :callnum, :pub_info, :pub_year, :stdnum, :lccn, :descr, :notes, :subjects_ser, :lang, :loc_code, :mat_code, :cover_img, NOW(), :bib_created, :bib_lastupdate, :bib_prevupdate, :bib_revs, \'1\')');
$affrows = $sql_prep->execute($bib_values);
$sql_prep->free();
if (is_array($subj) && count($subj)) {
foreach ($subj as $subj_heading) {
$insert_data = array($bib[bnum
], $subj_heading);
$types = array('integer', 'text');
$sql_prep = $db->prepare('INSERT INTO bib_items_subject VALUES (?, ?)', $types, MDB2_PREPARE_MANIP
);
$affrows = $sql_prep->execute($insert_data);
$sql_prep->free();
}
}
$process_report[imported
]++;
}
}
$db->disconnect();
return $process_report;
}
/**
* Scans existing imported bibs for changes or weeds and makes the appropriate changes
*
* @param boolean $quiet Run this function silently. Default: TRUE
*/
public function verify_bibs
($quiet = TRUE) {
parent
::putlog("Collecting current data keys ..");
$db =& MDB2
::connect($this->dsn);
$sql = 'SELECT bnum, bib_lastupdate FROM locum_facet_heap';
$init_result =& $db->query($sql);
$init_bib_arr = $init_result->fetchAll(MDB2_FETCHMODE_ASSOC
);
$num_children = $this->locum_config[harvest_config
][max_children
];
$num_to_process = count($init_bib_arr);
foreach ($init_bib_arr as $init_bib_arr_vals) {
$bib_arr[$init_bib_arr_vals[bnum
]] = $init_bib_arr_vals[bib_lastupdate
];
}
$db->disconnect();
parent
::putlog("Finished collecting data keys.");
if (extension_loaded('pcntl') && $this->locum_config[harvest_config
][harvest_with_children
] && ($num_to_process >= (2
* $num_children))) {
$increment = ceil($num_to_process / $num_children);
$split_offset = 0;
for ($i = 0; $i < $num_children; ++$i) {
$end = $start + ($increment - 1
);
$new_start = $end + 1;
$pid = pcntl_fork
();
if ($pid != -1
) {
if ($pid) {
parent
::putlog("Spawning child harvester to verify records. PID is $pid ..");
} else {
sleep(1
);
++$i;
if ($i == $num_children) { $end++; }
$bib_arr_sliced = array_slice($bib_arr, $split_offset, $increment, TRUE);
$num_bibs = count($bib_arr_sliced);
$tmp = self::update_bib($bib_arr_sliced);
$updated = $tmp[updated
];
$retired = $tmp[retired
];
parent
::putlog("Child process complete. Checked $num_bibs records, updated $updated records, retired $retired records.", 2
);
exit($i);
}
} else {
parent
::putlog("Unable to spawn harvester: ($i)", 5
);
}
$start = $new_start;
$split_offset = $split_offset + $increment;
}
if ($pid) {
while ($i > 0
) {
pcntl_waitpid
(-1
, &$status);
$val = pcntl_wexitstatus
($status);
--$i;
}
parent
::putlog("Verification complete!", 3
);
}
} else {
// TODO - Bib verification for those poor saps w/o pcntl
}
}
/**
* Does the actual update of the bib record if something has changed.
* This function is called by verify_bibs()
*
* @param array $bib_arr Array of bibs like: key => val is bnum => last update date
* @return array Array of # updated and # retired
*/
public function update_bib
($bib_arr) {
$db = MDB2
::connect($this->dsn);
$updated = 0;
$retired = 0;
foreach ($bib_arr as $bnum => $init_bib_date) {
$bib = $this->locum_cntl->scrape_bib($bnum, TRUE);
if ($bib == FALSE) {
// TODO add a verification of weed in here somehow
$sql_prep =& $db->prepare('UPDATE bib_items SET active = ? WHERE bnum = ?', array('text', 'integer'));
$sql_prep->execute(array('0', $bnum));
$sql_prep =& $db->prepare('DELETE FROM bib_items_subject WHERE bnum = ?', array('integer'));
$sql_prep->execute(array($bnum));
$sql_prep->free();
$retired++;
} else if ($bib[bnum
] && $bib[bib_lastupdate
] != $init_bib_date) {
$subj = array_pop($bib);
$valid_vals = array('bib_created', 'bib_lastupdate', 'bib_prevupdate', 'bib_revs', 'lang', 'loc_code', 'mat_code', 'author', 'addl_author', 'title', 'title_medium', 'edition', 'series', 'callnum', 'pub_info', 'pub_year', 'stdnum', 'lccn', 'descr', 'notes', 'bnum');
foreach ($bib as $bkey => $bval) {
if (in_array($bkey, $valid_vals)) { $bib_values[$bkey] = $bval; }
}
$bib_values[subjects_ser
] = serialize($subj);
$types = array('date', 'date', 'date', 'integer', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'text', 'integer', 'text', 'text', 'text', 'text', 'text', 'integer');
$setlist =
"bib_created = :bib_created, " .
"bib_lastupdate = :bib_lastupdate, " .
"bib_prevupdate = :bib_prevupdate, " .
"bib_revs = :bib_revs, " .
"lang = :lang, " .
"loc_code = :loc_code, " .
"mat_code = :mat_code, " .
"author = :author, " .
"addl_author = :addl_author, " .
"title = :title, " .
"title_medium = :title_medium, " .
"edition = :edition, " .
"series = :series, " .
"callnum = :callnum, " .
"pub_info = :pub_info, " .
"pub_year = :pub_year, " .
"stdnum = :stdnum, " .
"lccn = :lccn, " .
"descr = :descr, " .
"notes = :notes, " .
"subjects = :subjects_ser, " .
"modified = NOW()";
$sql_prep =& $db->prepare('UPDATE bib_items SET ' . $setlist . ' WHERE bnum = :bnum', $types, MDB2_PREPARE_MANIP
);
$res = $sql_prep->execute($bib_values);
$sql_prep =& $db->prepare('DELETE FROM bib_items_subject WHERE bnum = ?', array('integer'));
$sql_prep->execute(array($bnum));
$sql_prep->free();
if (is_array($subj) && count($subj)) {
foreach ($subj as $subj_heading) {
$insert_data = array($bnum, $subj_heading);
$types = array('integer', 'text');
$sql_prep =& $db->prepare('INSERT INTO bib_items_subject VALUES (?, ?)', $types, MDB2_PREPARE_MANIP
);
$affrows = $sql_prep->execute($insert_data);
$sql_prep->free();
}
}
parent
::putlog("Updated record # $bnum", 2
, TRUE);
$updated++;
}
}
$db->disconnect();
return array('retired' => $retired, 'updated' => $updated);
}
/**
* Scans for newly cataloged bib records.
* Uses the ini "harvest_reach" param to determine how far forward to seek
*/
public function new_bib_scan
() {
$db = MDB2
::connect($this->dsn);
$sql = 'SELECT MAX(bnum) FROM bib_items';
$max_bib_result =& $db->query($sql);
$max_bib = $max_bib_result->fetchOne();
$next_bib = $max_bib + 1;
$last_bib = $next_bib + $this->locum_config[harvest_config
][harvest_reach
];
$db->disconnect();
self::harvest_bibs($next_bib, $last_bib);
}
/**
* Grabs the cover image URL for caching (much faster on the front-end to do it this way)
* Will try amazon if the ini says so, then syndetics
*
* @param string $stdnum_raw - stdnum/ISBN from the bib record
* @return string Image URL or NULL
*/
public function get_cover_img
($stdnum_raw) {
// Format stdnum as best we can
if (preg_match('/ /', $stdnum_raw)) {
$stdnum_arr = explode(' ', $stdnum_raw);
$stdnum = trim($stdnum_arr[0
]);
} else {
$stdnum = trim($stdnum_raw);
}
$api_cfg = $this->locum_config[api_config
];
$image_url = '';
if ($api_cfg[use_amazon_images
] && $api_cfg[use_syndetic_images
]) {
if ($api_cfg[amazon_img_prio
] >= $api_cfg[syndetic_img_prio
]) {
$image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key
]);
if (!$image_url) { $image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid
]); }
} else {
$image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid
]);
if (!$image_url) { $image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key
]); }
}
} else if ($api_cfg[use_amazon_images
]) {
$image_url = self::get_amazon_image($stdnum, $api_cfg[amazon_access_key
]);
} else if ($api_cfg[use_syndetic_images
]) {
$image_url = self::get_syndetic_image($stdnum, $api_cfg[syndetic_custid
]);
}
return $image_url;
}
/**
* Used by get_cover_img to get the Amazon cover image URL
* You'll need to put in your own Amazon API key into the ini
*
* @param string $stdnum Stdnum/ISBN
* @param string $api_key Amazon API key - they're free. Go git one.
* @return string Cover image URL
*/
public function get_amazon_image
($stdnum, $api_key) {
$url = 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService';
$url.= "&AWSAccessKeyId=$api_key";
$url.= "&Operation=ItemLookup&IdType=ASIN&ItemId=$stdnum";
$url.= '&ResponseGroup=Medium,OfferFull';
$az_dl = @file_get_contents($url);
list($version, $status_code, $msg) = explode(' ', $http_response_header[0
], 3
);
if ($status_code == '200') {
$az = simplexml_load_string($az_dl);
if (is_object($az->Items)) {
if ($az->Items->Item->MediumImage->URL) {
$image_url = trim($az->Items->Item->MediumImage->URL);
}
}
}
return $image_url;
}
/**
* Used by get_cover_img to get the Syndetics cover image URL
* You'll need to put in your own customer ID into the ini
*
* @param string $stdnum Stdnum/ISBN
* @param string $cust_id Your syndetics ID - it's overpriced. Go git one.
* @return string Cover image URL
*/
public function get_syndetic_image
($stdnum, $cust_id) {
$image_url = '';
$url = 'http://syndetics.com/index.aspx?isbn=' . $stdnum . '/index.xml&client=' . $cust_id . '&type=xw10';
$syn_dl = @file_get_contents($url);
list($version, $status_code, $msg) = explode(' ', $http_response_header[0
], 3
);
if (preg_match('/xml/', $syn_dl) && $status_code == '200') {
$syn = simplexml_load_string($syn_dl);
if ($syn->SC == 'SC.GIF') {
$image_url = 'http://syndetics.com/hw7.pl?isbn=' . $stdnum . '/SC.GIF&client=' . $cust_id;
$img_size = @getimagesize($image_url);
if ($img_size[0
] == 1
) { $image_url = ''; }
}
}
return $image_url;
}
}
?>