Mercurial > hg > pnewss
annotate dbreindex.php @ 6:bc6045ed0b2e
Added script to fix and re-index database fields and references
author | Ivo Smits <Ivo@UCIS.nl> |
---|---|
date | Tue, 12 Apr 2011 11:41:35 +0200 |
parents | |
children | 01dc7eeaf5df |
rev | line source |
---|---|
6
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
1 #!/usr/bin/php |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
2 <?php |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
3 chdir(__DIR__); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
4 require_once './common.php'; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
5 |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
6 $lastposted = NULL; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
7 while (TRUE) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
8 if ($lastposted === NULL) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
9 $articles = $db->evalAllAssoc('SELECT * FROM `messages` LIMIT 10'); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
10 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
11 $articles = $db->evalAllAssoc('SELECT * FROM `messages` WHERE `id` > ? LIMIT 10', $lastposted); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
12 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
13 if (!count($articles)) break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
14 foreach ($articles as $article) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
15 $headers = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
16 $header = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
17 $headerchanged = FALSE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
18 foreach (explode("\r\n", $article['header']) as $line) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
19 if (!strlen($line) || $line == '.') { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
20 print("Article $article[id] Contains empty or terminating header line\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
21 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
22 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
23 $parts = explode(': ', $line, 2); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
24 $headername = strtoupper($parts[0]); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
25 switch ($headername) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
26 case 'PATH': case 'FROM': case 'NEWSGROUPS': case 'SUBJECT': case 'DATE': case 'MESSAGE-ID': case 'SENDER': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
27 if (isset($headers[$headername])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
28 print("Article $article[id] Contains duplicate header $headername, removing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
29 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
30 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
31 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
32 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
33 $headers[strtoupper($parts[0])] = $parts[1]; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
34 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
35 case 'ORGANIZATION': case 'LINES': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
36 case 'MIME-VERSION': case 'CONTENT-TYPE': case 'CONTENT-TRANSFER-ENCODING': case 'USER-AGENT': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
37 case 'REFERENCES': case 'REPLY-TO': case 'SENDER': case 'FOLLOWUP-TO': case 'IN-REPLY-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
38 case 'EXPIRES': case 'CONTROL': case 'DISTRIBUTION': case 'KEYWORDS': case 'SUMMARY': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
39 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
40 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
41 case 'NNTP-POSTING-HOST': case 'X-TRACE': case 'XREF': case 'X-COMPLAINTS-TO': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
42 case 'NNTP-POSTING-DATE': |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
43 print("Article $article[id] Contains unacceptable header $headername\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
44 $headerchanged = TRUE; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
45 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
46 default: |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
47 $header[] = $line; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
48 break; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
49 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
50 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
51 foreach (explode("\r\n", $article['body']) as $line) if ($line == '.') print("Article $article[id] Contains terminating body line\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
52 if (!isset($headers['NEWSGROUPS'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
53 print("Article $article[id] Missing required Newsgroups header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
54 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
55 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
56 $newsgroups = array(); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
57 foreach (explode(',', $headers['NEWSGROUPS']) as $groupname) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
58 $group = $db->evalRowAssoc('SELECT * FROM `groups` WHERE `name` = ?', $groupname); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
59 if ($group === FALSE) continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
60 $newsgroups[] = $group['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
61 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
62 if (!count($newsgroups)) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
63 print("Article $article[id] No known newsgroups listed\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
64 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
65 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
66 if (!isset($headers['MESSAGE-ID'])) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
67 print("Article $article[id] Missing required Message-ID header\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
68 continue; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
69 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
70 $msgid = $headers['MESSAGE-ID']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
71 if (strlen($msgid) <= 2 || $msgid[0] != '<' || $msgid[strlen($msgid)-1] != '>') { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
72 print("Article $article[id] Malformed Message-ID\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
73 } else { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
74 $msgid = substr($msgid, 1, -1); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
75 if ($msgid != $article['messageid']) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
76 print("Article $article[id] Message-ID header does not match database, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
77 $db->update('UPDATE `messages` SET `messageid` = ? WHERE `id` = ?', array($msgid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
78 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
79 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
80 if ($headerchanged) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
81 print("Article $article[id] Updating headers.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
82 $db->update('UPDATE `messages` SET `header` = ? WHERE `id` = ?', array(implode("\r\n", $header), $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
83 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
84 foreach ($newsgroups as $groupid) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
85 if (FALSE === $db->evalRow('SELECT * FROM `groupmessages` WHERE `group` = ? AND `message` = ?', array($groupid, $article['id']))) { |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
86 print("Article $article[id] Missing link in group $groupid, fixing.\n"); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
87 $db->insert('INSERT INTO `groupmessages` (`group`, `message`) VALUES (?, ?)', array($groupid, $article['id'])); |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
88 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
89 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
90 if ($article['id'] > $lastposted) $lastposted = $article['id']; |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
91 } |
bc6045ed0b2e
Added script to fix and re-index database fields and references
Ivo Smits <Ivo@UCIS.nl>
parents:
diff
changeset
|
92 } |