'');
$currentPost = null;
$currentText = '';
function parseDateISO8601($input) {
// returns the date in SQL (MySQL, at least)-compatible text format
return substr($input, 0, 10) . ' ' . substr($input, 11, 8);
}
function parseDateRFC822($input) {
// returns the date in SQL (MySQL, at least)-compatible text format
return strftime('%Y-%m-%d %H:%I:%S', strtotime($input));
}
function startElement($parser, $name, $attrs) {
global $currentPost, $currentText, $currentGuidAttrs;
if ($name == 'item') {
$currentPost = new post();
$currentPost->categories = array();
}
elseif ($name == 'guid') {
$currentGuidAttrs = $attrs;
}
$currentText = '';
}
function endElement($parser, $name) {
global $currentPost, $currentText;
switch ($name) {
case 'title': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# title':
$currentPost->title = $currentText;
break;
case 'content:encoded': case 'http://purl.org/rss/1.0/modules/content/ encoded':
$currentPost->content = $currentText;
break;
case 'description': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# description':
// content:encoded trumps description, so only save the description
// if there's no content already
if (!isset($currentPost->content) || !strlen($currentPost->content)) {
$currentPost->content = $currentText;
}
break;
case 'pubDate':
$currentPost->createDate = parseDateRFC822($currentText);
break;
case 'dc:date': case 'http://purl.org/dc/elements/1.1/ date':
$currentPost->createDate = parseDateISO8601($currentText);
break;
case 'dcterms:modified': case 'http://purl.org/dc/terms/ modified':
$currentPost->modDate = parseDateISO8601($currentText);
break;
case 'category': case 'dc:subject': case 'http://purl.org/dc/elements/1.1/ subject':
$currentPost->categories[] = $currentText;
break;
case 'guid':
if (isset($currentGuidAttrs['isPermaLink']) && $currentGuidAttrs['isPermaLink'] == 'true') {
$currentPost->permalink = $currentText;
}
$currentPost->guid = $currentText;
break;
case 'item': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# item':
processPost($currentPost);
$currentPost = null;
break;
}
$currentText = '';
}
function characterData($parser, $data) {
global $currentText;
$currentText .= $data;
}
// WordPress-specific code
$post_author = 'admin';
require_once('../wp-config.php');
require_once(ABSPATH.WPINC.'/template-functions.php');
require_once(ABSPATH.WPINC.'/functions.php');
require_once(ABSPATH.WPINC.'/vars.php');
if ($kCreateModDateField && !$kTakeNoAction) {
require_once(ABSPATH.'/wp-admin/install-helper.php');
$res = '';
$tablename = $tableposts;
$ddl = "ALTER TABLE $tableposts ADD COLUMN post_modified datetime";
maybe_add_column($tablename, 'post_modified', $ddl);
if (check_column($tablename, 'post_modified', 'datetime')) {
$res .= $tablename . ' - ok
'."\n";
} else {
$res .= 'There was a problem with ' . $tablename . '
'."\n";
//++$error_count;
}
echo $res;
}
function processPost(&$post) {
global $kSetModDateField, $kUpdatePostsAlways, $kUpdatePostsIfNewer, $kTakeNoAction;
//print_r($post);
// Filter out (ignore) posts having categories that are all listed as "excluded"
// If a post has no categories, or at least one non-excluded category, it is still
// included.
if (sizeof($post->categories)) {
$gotIncludedCategory = false;
foreach ($post->categories as $categoryName) {
if (!isset($kExcludedCategories[$categoryName])) {
$gotIncludedCategory = true;
break;
}
}
if (!$gotIncludedCategory) {
return;
}
}
global $post_author, $kExcludeCategories;
global $wpdb;
global $tableusers, $tableposts, $tablepost2cat, $tablecategories;
$post_author_ID = $wpdb->get_var("SELECT ID FROM $tableusers WHERE user_login = '$post_author'");
$post_content = $post->content;
$post_content = str_replace('
', '
', $post_content); // XHTMLify
tags
/* Un-word-wrap the content, because
tags will be added at display time
for line breaks, and RSS feeds are often already soft-wrapped. Replace \n and \r
with spaces.
However, we don't want to remove word wrapping inside
tags. Stopping short
of a full HTML parser, we only un-wrap those sections not inside tag pairs.
(This code could be misled by things that look like tags wrapped in HTML comments,
but oh well.)
*/
/*$pos = $lastpos = 0;
while ($lastpos !== false && ($pos = strpos($post_content, '', $lastpos)) !== false) {
$post_content = substr($post_content, 0, $lastpos)
. str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos, $pos - $lastpos)))
. substr($post_content, $pos);
$lastpos = strpos($post_content, '', $pos);
}
if ($lastpos !== false) {
$post_content = substr($post_content, 0, $lastpos)
. str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos)));
}
*/
$post_content = addslashes($post_content);
#$post_content = str_replace("\r", ' ', $post_content);
#$post_content = str_replace("\n", ' ', $post_content);
$post_date = addslashes($post->createDate);
$post_title = addslashes($post->title);
$post_modified = $kSetModDateField ? addslashes($post->modDate) : '';
$post_name = '';
if (isset($post->permalink) && strlen($post->permalink)) {
$matches = array();
if (preg_match('|/[0-9]{4}/[0-9]{2}/[0-9]{2}/([A-Za-z0-9_-]*)/?|', $post->permalink, $matches)) {
$post_name = $matches[1];
$post_name = mysql_escape_string($post_name);
}
}
$categoryIDList = array();
foreach ($post->categories as $categoryName) {
if (isset($kExcludedCategories[$categoryName])) {
continue;
}
$categoryID = $wpdb->get_var("SELECT cat_ID FROM $tablecategories WHERE cat_name = '".mysql_escape_string($categoryName)."'");
if (!$categoryID) {
if ($kTakeNoAction) {
echo "Would have inserted new category '$categoryName'.";
$categoryID = 0;
}
else {
$categoryNiceName = sanitize_title($categoryName);
$wpdb->query("INSERT INTO $tablecategories
(cat_name, category_nicename)
VALUES
('".mysql_escape_string($categoryName)."','".mysql_escape_string($categoryNiceName)."')");
$categoryID = $wpdb->get_var("SELECT LAST_INSERT_ID()");
}
}
else {
// category already exists; could update its nicename here if it tended not to be correct already.
//$wpdb->query("UPDATE $tablecategories SET category_nicename = '".mysql_escape_string(sanitize_title($categoryName))."' WHERE cat_ID = ".intval($categoryID));
}
$categoryIDList[] = $categoryID;
}
// Quick-n-dirty check for dups:
if ($kUpdatePostsIfNewer) {
$dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title,post_modified FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A);
}
else {
$dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A);
}
if ($dupcheck[0]['ID']) {
// post already exists
if ($kUpdatePostsAlways || ($kUpdatePostsIfNewer && $kSetModDateField && $dupcheck[0]['post_modified'] < $post_modified)) {
print "
\n\nUpdating post, ID = '" . $dupcheck[0]['ID'] . "'
\n";
print "Timestamp: " . $post_date . "
\n";
print "Post Title: '" . stripslashes($post_title) . "'
\n";
if (!$kTakeNoAction) {
$postID = $dupcheck[0]['ID'];
$result = $wpdb->query("
UPDATE $tableposts
SET post_author = '$post_author_ID', post_date = '$post_date',
".($kSetModDateField ? "post_modified = '$post_modified', " : "")."
post_content='$post_content',
post_title = '$post_title', post_name = '$post_name' WHERE ID = ".intval($postID));
//echo "DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID);
$result = $wpdb->query("DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID));
foreach ($categoryIDList as $categoryID) {
$result = $wpdb->query("
INSERT INTO $tablepost2cat
(post_id, category_id)
VALUES
(".intval($postID).",".intval($categoryID).")
");
}
}
}
else {
print "
\n\nSkipping duplicate post, ID = '" . $dupcheck[0]['ID'] . "'
\n";
print "Timestamp: " . $post_date . "
\n";
print "Post Title: '" . stripslashes($post_title) . "'
\n";
}
}
else {
print "
\nInserting new post.
\n";
print "Timestamp: " . $post_date . "
\n";
print "Post Title: '" . stripslashes($post_title) . "'
\n";
if (!$kTakeNoAction) {
$result = $wpdb->query("
INSERT INTO $tableposts
(post_author,post_date,post_content,post_title,post_name,post_category".($post_modified ? ",post_modified" : "").")
VALUES
('$post_author_ID','$post_date','$post_content','$post_title','$post_name','1'".($post_modified ? ",'$post_modified'" : "").")
");
$postID = $wpdb->get_var("SELECT LAST_INSERT_ID();");
if ($postID) {
foreach ($categoryIDList as $categoryID) {
$result = $wpdb->query("
INSERT INTO $tablepost2cat
(post_id, category_id)
VALUES
(".intval($postID).",".intval($categoryID).")
");
}
}
}
}
}
// XML parsing code
function importRSSFile($filePath) {
if (function_exists('xml_parser_create_ns')) {
$xml_parser = xml_parser_create_ns('iso-8859-1',' '); // space sep for namespace URI
}
else {
$xml_parser = xml_parser_create();
}
// make sure to turn off case-folding; XML 1.0 is case-sensitive
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
if (!($fp = fopen($filePath, "r"))) {
die("could not open XML input");
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($xml_parser, $data, feof($fp))) {
die(sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
}
xml_parser_free($xml_parser);
fclose($fp);
}
function importBlogArchive($dirPath) {
$startYear = 1980;
$endYear = intval(strftime('%Y'));
for ($testYear = $startYear; $testYear <= $endYear; $testYear++) {
for ($testMonth = 1; $testMonth <= 12; $testMonth++) {
$rssFilePath = $dirPath.'/'.$testYear.'/'.($testMonth < 10 ? '0' : '').$testMonth.'.xml';
if (is_file($rssFilePath)) {
importRSSFile($rssFilePath);
}
}
}
}
if (is_dir($path)) {
importBlogArchive($path);
}
else {
importRSSFile($path);
}
/*echo '';
print_r($EZSQL_ERROR);
echo '
';
*/
?>