Mercurial > hg > pnewss
annotate dbreindex.php @ 11:e0807e0b1a67
Added common.php, updated server synchronization to continue after a connection failure
author | Ivo Smits <Ivo@UCIS.nl> |
---|---|
date | Sat, 18 Jun 2011 15:59:11 +0200 |
parents | 005339a1b2ce |
children |
rev | line source |
---|---|
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
1 #!/usr/bin/php |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
2 <?php |
8 | 3 /* Copyright 2010 Ivo Smits <Ivo@UCIS.nl>. All rights reserved. |
4 Redistribution and use in source and binary forms, with or without modification, are | |
5 permitted provided that the following conditions are met: | |
6 | |
7 1. Redistributions of source code must retain the above copyright notice, this list of | |
8 conditions and the following disclaimer. | |
9 | |
10 2. Redistributions in binary form must reproduce the above copyright notice, this list | |
11 of conditions and the following disclaimer in the documentation and/or other materials | |
12 provided with the distribution. | |
13 | |
14 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
15 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
16 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR | |
17 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
18 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
19 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |
20 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
21 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
22 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
23 | |
24 The views and conclusions contained in the software and documentation are those of the | |
25 authors and should not be interpreted as representing official policies, either expressed | |
26 or implied, of Ivo Smits.*/ | |
27 | |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
28 chdir(__DIR__); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
29 require_once './common.php'; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
30 |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
31 $lastposted = NULL; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
32 while (TRUE) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
33 if ($lastposted === NULL) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
34 $articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10'); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
35 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
36 $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
37 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
38 if (!count($articles)) break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
39 foreach ($articles as $article) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
40 $headers = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
41 $header = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
42 $headerchanged = FALSE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
43 foreach (explode("\r\n", $article['header']) as $line) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
44 if (!strlen($line) || $line == '.') { |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
45 print("Article $article[id] Contains empty or terminating header line, fixing.\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
46 $headerchanged = TRUE; |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
47 continue; |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
48 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
49 print("Article $article[id] Contains invalid newline or NUL character in header, fixing.\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
50 $line = str_replace(array("\r","\n","\0"), '', $line); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
51 $headerchanged = TRUE; |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
52 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
53 $parts = explode(': ', $line, 2); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
54 $headername = strtoupper($parts[0]); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
55 switch ($headername) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
56 case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
57 if (isset($headers[$headername])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
58 print("Article $article[id] Contains duplicate header $headername, removing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
59 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
60 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
61 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
62 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
63 $headers[strtoupper($parts[0])] = $parts[1]; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
64 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
65 case 'ORGANIZATION': case 'LINES': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
66 case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
67 case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
68 case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
69 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
70 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
71 case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
72 case 'NNTP-POSTING-DATE': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
73 print("Article $article[id] Contains unacceptable header $headername\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
74 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
75 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
76 default: |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
77 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
78 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
79 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
80 } |
7
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
81 foreach (explode("\r\n", $article['body']) as $line) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
82 if ($line == '.') { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
83 print("Article $article[id] Contains terminating body line\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
84 } else if (strpos($line, "\r") !== FALSE || strpos($line, "\n") !== FALSE || strpos($line, "\0")) { |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
85 print("Article $article[id] Contains invalid newline or NUL character in body\n"); |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
86 } |
01dc7eeaf5df
Added some more checks to article data, added some commands, added 'dot stuffing' for block transfers
Ivo Smits <Ivo@UCIS.nl>
parents:
6
diff
changeset
|
87 } |
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
88 if (!isset($headers['NEWSGROUPS'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
89 print("Article $article[id] Missing required Newsgroups header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
90 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
91 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
92 $newsgroups = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
93 foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
94 $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
95 if ($group === FALSE) continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
96 $newsgroups[] = $group['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
97 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
98 if (!count($newsgroups)) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
99 print("Article $article[id] No known newsgroups listed\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
100 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
101 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
102 if (!isset($headers['MESSAGE-ID'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
103 print("Article $article[id] Missing required Message-ID header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
104 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
105 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
106 $msgid = $headers['MESSAGE-ID']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
107 if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
108 print("Article $article[id] Malformed Message-ID\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
109 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
110 $msgid = substr($msgid, 1, -1); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
111 if ($msgid != $article['messageid']) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
112 print("Article $article[id] Message-ID header does not match database, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
113 $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
114 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
115 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
116 if ($headerchanged) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
117 print("Article $article[id] Updating headers.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
118 $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
119 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
120 foreach ($newsgroups as $groupid) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
121 if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
122 print("Article $article[id] Missing link in group $groupid, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
123 $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
124 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
125 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
126 if ($article['id'] > $lastposted) $lastposted = $article['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
127 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
128 } |