view dbreindex.php @ 6:bc6045ed0b2e

Added script to fix and re-index database fields and references
author Ivo Smits <Ivo@UCIS.nl>
date Tue, 12 Apr 2011 11:41:35 +0200
parents
children 01dc7eeaf5df
line wrap: on
line source

#!/usr/bin/php
<?php
chdir(__DIR__);
require_once './common.php';

$lastposted = NULL;
while (TRUE) {
	if ($lastposted === NULL) {
		$articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10');
	} else {
		$articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted);
	}
	if (!count($articles)) break;
	foreach ($articles as $article) {
		$headers = array();
		$header = array();
		$headerchanged = FALSE;
		foreach (explode("\r\n", $article['header']) as $line) {
			if (!strlen($line) || $line == '.') {
				print("Article $article[id] Contains empty or terminating header line\n");
				continue;
			}
			$parts = explode(': ', $line, 2);
			$headername = strtoupper($parts[0]);
			switch ($headername) {
				case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER':
					if (isset($headers[$headername])) {
						print("Article $article[id] Contains duplicate header $headername, removing.\n");
						$headerchanged = TRUE;
						break;
					}
					$header[] = $line;
					$headers[strtoupper($parts[0])] = $parts[1];
					break;
				case 'ORGANIZATION': case 'LINES':
				case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT':
				case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO':
				case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY':
					$header[] = $line;
					break;
				case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO':
				case 'NNTP-POSTING-DATE':
					print("Article $article[id] Contains unacceptable header $headername\n");
					$headerchanged = TRUE;
					break;
				default:
					$header[] = $line;
					break;
			}
		}
		foreach (explode("\r\n", $article['body']) as $line) if ($line == '.') print("Article $article[id] Contains terminating body line\n");
		if (!isset($headers['NEWSGROUPS'])) {
			print("Article $article[id] Missing required Newsgroups header\n");
			continue;
		}
		$newsgroups = array();
		foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) {
			$group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname);
			if ($group === FALSE) continue;
			$newsgroups[] = $group['id'];
		}
		if (!count($newsgroups)) {
			print("Article $article[id] No known newsgroups listed\n");
			continue;
		}
		if (!isset($headers['MESSAGE-ID'])) {
			print("Article $article[id] Missing required Message-ID header\n");
			continue;
		}
		$msgid = $headers['MESSAGE-ID'];
		if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') {
			print("Article $article[id] Malformed Message-ID\n");
		} else {
			$msgid = substr($msgid, 1, -1);
			if ($msgid != $article['messageid']) {
				print("Article $article[id] Message-ID header does not match database, fixing.\n");
				$db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id']));
			}
		}
		if ($headerchanged) {
			print("Article $article[id] Updating headers.\n");
			$db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id']));
		}
		foreach ($newsgroups as $groupid) {
			if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) {
				print("Article $article[id] Missing link in group $groupid, fixing.\n");
				$db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id']));
			}
		}
		if ($article['id'] > $lastposted) $lastposted = $article['id'];
	}
}