annotate dbreindex.php @ 7:01dc7eeaf5df

Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
author Ivo Smits <Ivo@UCIS.nl>
date Tue, 12 Apr 2011 14:10:41 +0200
parents bc6045ed0b2e
children 005339a1b2ce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
1 #!/usr/bin/php
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
2 <?php
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
3 chdir(__DIR__);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
4 require_once './common.php';
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
5
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
6 $lastposted = NULL;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
7 while (TRUE) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
8 if ($lastposted === NULL) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
9 $articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10');
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
10 } else {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
11 $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
12 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
13 if (!count($articles)) break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
14 foreach ($articles as $article) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
15 $headers = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
16 $header = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
17 $headerchanged = FALSE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
18 foreach (explode("\r\n", $article['header']) as $line) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
19 if (!strlen($line) || $line == '.') {
7
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
20 print("Article $article[id] Contains empty or terminating header line, fixing.\n");
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
21 $headerchanged = TRUE;
6
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
22 continue;
7
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
23 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) {
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
24 print("Article $article[id] Contains invalid newline or NUL character in header, fixing.\n");
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
25 $line = str_replace(array("\r","\n","\0"), '', $line);
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
26 $headerchanged = TRUE;
6
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
27 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
28 $parts = explode(': ', $line, 2);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
29 $headername = strtoupper($parts[0]);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
30 switch ($headername) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
31 case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
32 if (isset($headers[$headername])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
33 print("Article $article[id] Contains duplicate header $headername, removing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
34 $headerchanged = TRUE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
35 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
36 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
37 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
38 $headers[strtoupper($parts[0])] = $parts[1];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
39 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
40 case 'ORGANIZATION': case 'LINES':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
41 case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
42 case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
43 case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
44 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
45 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
46 case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
47 case 'NNTP-POSTING-DATE':
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
48 print("Article $article[id] Contains unacceptable header $headername\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
49 $headerchanged = TRUE;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
50 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
51 default:
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
52 $header[] = $line;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
53 break;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
54 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
55 }
7
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
56 foreach (explode("\r\n", $article['body']) as $line) {
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
57 if ($line == '.') {
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
58 print("Article $article[id] Contains terminating body line\n");
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
59 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) {
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
60 print("Article $article[id] Contains invalid newline or NUL character in body\n");
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
61 }
01dc7eeaf5df Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents: 6
diff changeset
62 }
6
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
63 if (!isset($headers['NEWSGROUPS'])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
64 print("Article $article[id] Missing required Newsgroups header\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
65 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
66 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
67 $newsgroups = array();
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
68 foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
69 $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
70 if ($group === FALSE) continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
71 $newsgroups[] = $group['id'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
72 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
73 if (!count($newsgroups)) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
74 print("Article $article[id] No known newsgroups listed\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
75 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
76 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
77 if (!isset($headers['MESSAGE-ID'])) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
78 print("Article $article[id] Missing required Message-ID header\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
79 continue;
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
80 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
81 $msgid = $headers['MESSAGE-ID'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
82 if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
83 print("Article $article[id] Malformed Message-ID\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
84 } else {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
85 $msgid = substr($msgid, 1, -1);
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
86 if ($msgid != $article['messageid']) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
87 print("Article $article[id] Message-ID header does not match database, fixing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
88 $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
89 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
90 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
91 if ($headerchanged) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
92 print("Article $article[id] Updating headers.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
93 $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
94 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
95 foreach ($newsgroups as $groupid) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
96 if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) {
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
97 print("Article $article[id] Missing link in group $groupid, fixing.\n");
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
98 $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id']));
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
99 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
100 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
101 if ($article['id'] > $lastposted) $lastposted = $article['id'];
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
102 }
bc6045ed0b2e Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff changeset
103 }