# HG changeset patch # User Ivo Smits # Date 1302601295 -7200 # Node ID bc6045ed0b2ef637ec26f2a167ae5ec8042f1655 # Parent 5d62af5270ddc08d6e3230f59d3af0e2ec96105a Added script to fix and re-index database fields and references diff -r 5d62af5270dd -r bc6045ed0b2e dbreindex.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dbreindex.php Tue Apr 12 11:41:35 2011 +0200 @@ -0,0 +1,92 @@ +#!/usr/bin/php +evalAllAssoc('SELECT * FROM `messages` LIMIT 10'); + } else { + $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted); + } + if (!count($articles)) break; + foreach ($articles as $article) { + $headers = array(); + $header = array(); + $headerchanged = FALSE; + foreach (explode("\r\n", $article['header']) as $line) { + if (!strlen($line) || $line == '.') { + print("Article $article[id] Contains empty or terminating header line\n"); + continue; + } + $parts = explode(': ', $line, 2); + $headername = strtoupper($parts[0]); + switch ($headername) { + case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER': + if (isset($headers[$headername])) { + print("Article $article[id] Contains duplicate header $headername, removing.\n"); + $headerchanged = TRUE; + break; + } + $header[] = $line; + $headers[strtoupper($parts[0])] = $parts[1]; + break; + case 'ORGANIZATION': case 'LINES': + case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT': + case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO': + case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY': + $header[] = $line; + break; + case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO': + case 'NNTP-POSTING-DATE': + print("Article $article[id] Contains unacceptable header $headername\n"); + $headerchanged = TRUE; + break; + default: + $header[] = $line; + break; + } + } + foreach (explode("\r\n", $article['body']) as $line) if ($line == '.') print("Article $article[id] Contains terminating body line\n"); + if (!isset($headers['NEWSGROUPS'])) { + print("Article $article[id] Missing required Newsgroups header\n"); + continue; + } + $newsgroups = array(); + foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) { + $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname); + if ($group === FALSE) continue; + $newsgroups[] = $group['id']; + } + if (!count($newsgroups)) { + print("Article $article[id] No known newsgroups listed\n"); + continue; + } + if (!isset($headers['MESSAGE-ID'])) { + print("Article $article[id] Missing required Message-ID header\n"); + continue; + } + $msgid = $headers['MESSAGE-ID']; + if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') { + print("Article $article[id] Malformed Message-ID\n"); + } else { + $msgid = substr($msgid, 1, -1); + if ($msgid != $article['messageid']) { + print("Article $article[id] Message-ID header does not match database, fixing.\n"); + $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id'])); + } + } + if ($headerchanged) { + print("Article $article[id] Updating headers.\n"); + $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id'])); + } + foreach ($newsgroups as $groupid) { + if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) { + print("Article $article[id] Missing link in group $groupid, fixing.\n"); + $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id'])); + } + } + if ($article['id'] > $lastposted) $lastposted = $article['id']; + } +} diff -r 5d62af5270dd -r bc6045ed0b2e todo.txt --- a/todo.txt Tue Apr 12 02:23:22 2011 +0200 +++ b/todo.txt Tue Apr 12 11:41:35 2011 +0200 @@ -1,4 +1,4 @@ - Make sure that group article numbers are never reused, not even if the last one is deleted (groupmessages table) - Support IHAVE command to speed up synchronization -- Handle received cross-posted messages (according to Newsgroups header) -- Add script for re-indexing articles (based on Newsgroups header) +- Use STAT before POSTing articles +- Allow to store (part of) article data in file