Mercurial > hg > pnewss
annotate dbreindex.php @ 7:01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
author | Ivo Smits <Ivo@UCIS.nl> |
---|---|
date | Tue, 12 Apr 2011 14:10:41 +0200 |
parents | bc6045ed0b2e |
children | 005339a1b2ce |
rev | line source |
---|---|
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
1 #!/usr/bin/php |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
2 <?php |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
3 chdir(__DIR__); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
4 require_once './common.php'; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
5 |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
6 $lastposted = NULL; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
7 while (TRUE) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
8 if ($lastposted === NULL) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
9 $articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10'); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
10 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
11 $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
12 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
13 if (!count($articles)) break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
14 foreach ($articles as $article) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
15 $headers = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
16 $header = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
17 $headerchanged = FALSE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
18 foreach (explode("\r\n", $article['header']) as $line) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
19 if (!strlen($line) || $line == '.') { |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
20 print("Article $article[id] Contains empty or terminating header line, fixing.\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
21 $headerchanged = TRUE; |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
22 continue; |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
23 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
24 print("Article $article[id] Contains invalid newline or NUL character in header, fixing.\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
25 $line = str_replace(array("\r","\n","\0"), '', $line); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
26 $headerchanged = TRUE; |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
27 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
28 $parts = explode(': ', $line, 2); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
29 $headername = strtoupper($parts[0]); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
30 switch ($headername) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
31 case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
32 if (isset($headers[$headername])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
33 print("Article $article[id] Contains duplicate header $headername, removing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
34 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
35 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
36 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
37 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
38 $headers[strtoupper($parts[0])] = $parts[1]; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
39 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
40 case 'ORGANIZATION': case 'LINES': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
41 case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
42 case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
43 case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
44 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
45 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
46 case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
47 case 'NNTP-POSTING-DATE': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
48 print("Article $article[id] Contains unacceptable header $headername\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
49 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
50 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
51 default: |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
52 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
53 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
54 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
55 } |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
56 foreach (explode("\r\n", $article['body']) as $line) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
57 if ($line == '.') { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
58 print("Article $article[id] Contains terminating body line\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
59 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
60 print("Article $article[id] Contains invalid newline or NUL character in body\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
61 } |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
62 } |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
63 if (!isset($headers['NEWSGROUPS'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
64 print("Article $article[id] Missing required Newsgroups header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
65 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
66 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
67 $newsgroups = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
68 foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
69 $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
70 if ($group === FALSE) continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
71 $newsgroups[] = $group['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
72 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
73 if (!count($newsgroups)) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
74 print("Article $article[id] No known newsgroups listed\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
75 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
76 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
77 if (!isset($headers['MESSAGE-ID'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
78 print("Article $article[id] Missing required Message-ID header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
79 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
80 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
81 $msgid = $headers['MESSAGE-ID']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
82 if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
83 print("Article $article[id] Malformed Message-ID\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
84 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
85 $msgid = substr($msgid, 1, -1); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
86 if ($msgid != $article['messageid']) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
87 print("Article $article[id] Message-ID header does not match database, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
88 $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
89 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
90 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
91 if ($headerchanged) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
92 print("Article $article[id] Updating headers.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
93 $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
94 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
95 foreach ($newsgroups as $groupid) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
96 if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
97 print("Article $article[id] Missing link in group $groupid, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
98 $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
99 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
100 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
101 if ($article['id'] > $lastposted) $lastposted = $article['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
102 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
103 } |