annotate dbreindex.php @ 6:bc6045ed0b2e

Added script to fix and re-index database fields and references
author Ivo Smits <Ivo@UCIS.nl>
date Tue, 12 Apr 2011 11:41:35 +0200
parents
children 01dc7eeaf5df
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
1 #!/usr/bin/php
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
2 <?php
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
3 chdir(__DIR__);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
4 require_once './common.php';
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
5
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
6 $lastposted = NULL;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
7 while (TRUE) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
8 if ($lastposted === NULL) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
9 $articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10');
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
10 } else {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
11 $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
12 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
13 if (!count($articles)) break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
14 foreach ($articles as $article) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
15 $headers = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
16 $header = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
17 $headerchanged = FALSE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
18 foreach (explode("\r\n", $article['header']) as $line) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
19 if (!strlen($line) || $line == '.') {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
20 print("Article $article[id] Contains empty or terminating header line\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
21 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
22 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
23 $parts = explode(': ', $line, 2);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
24 $headername = strtoupper($parts[0]);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
25 switch ($headername) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
26 case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
27 if (isset($headers[$headername])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
28 print("Article $article[id] Contains duplicate header $headername, removing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
29 $headerchanged = TRUE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
30 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
31 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
32 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
33 $headers[strtoupper($parts[0])] = $parts[1];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
34 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
35 case 'ORGANIZATION': case 'LINES':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
36 case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
37 case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
38 case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
39 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
40 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
41 case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
42 case 'NNTP-POSTING-DATE':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
43 print("Article $article[id] Contains unacceptable header $headername\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
44 $headerchanged = TRUE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
45 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
46 default:
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
47 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
48 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
49 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
50 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
51 foreach (explode("\r\n", $article['body']) as $line) if ($line == '.') print("Article $article[id] Contains terminating body line\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
52 if (!isset($headers['NEWSGROUPS'])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
53 print("Article $article[id] Missing required Newsgroups header\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
54 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
55 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
56 $newsgroups = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
57 foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
58 $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
59 if ($group === FALSE) continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
60 $newsgroups[] = $group['id'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
61 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
62 if (!count($newsgroups)) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
63 print("Article $article[id] No known newsgroups listed\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
64 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
65 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
66 if (!isset($headers['MESSAGE-ID'])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
67 print("Article $article[id] Missing required Message-ID header\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
68 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
69 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
70 $msgid = $headers['MESSAGE-ID'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
71 if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
72 print("Article $article[id] Malformed Message-ID\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
73 } else {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
74 $msgid = substr($msgid, 1, -1);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
75 if ($msgid != $article['messageid']) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
76 print("Article $article[id] Message-ID header does not match database, fixing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
77 $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
78 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
79 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
80 if ($headerchanged) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
81 print("Article $article[id] Updating headers.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
82 $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
83 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
84 foreach ($newsgroups as $groupid) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
85 if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
86 print("Article $article[id] Missing link in group $groupid, fixing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
87 $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
88 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
89 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
90 if ($article['id'] > $lastposted) $lastposted = $article['id'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
91 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
92 }