| 1 | #!/usr/bin/env python |
|---|
| 2 | ## Contributors for this file: |
|---|
| 3 | ## - Yann Le Boulanger <asterix@lagaule.org> |
|---|
| 4 | ## - Nikos Kouremenos <kourem@gmail.com> |
|---|
| 5 | ## |
|---|
| 6 | ## Copyright (C) 2003-2004 Yann Le Boulanger <asterix@lagaule.org> |
|---|
| 7 | ## Vincent Hanquez <tab@snarc.org> |
|---|
| 8 | ## Copyright (C) 2005 Yann Le Boulanger <asterix@lagaule.org> |
|---|
| 9 | ## Vincent Hanquez <tab@snarc.org> |
|---|
| 10 | ## Nikos Kouremenos <nkour@jabber.org> |
|---|
| 11 | ## Dimitur Kirov <dkirov@gmail.com> |
|---|
| 12 | ## Travis Shirk <travis@pobox.com> |
|---|
| 13 | ## Norman Rasmussen <norman@rasmussen.co.za> |
|---|
| 14 | ## |
|---|
| 15 | ## This program is free software; you can redistribute it and/or modify |
|---|
| 16 | ## it under the terms of the GNU General Public License as published |
|---|
| 17 | ## by the Free Software Foundation; version 2 only. |
|---|
| 18 | ## |
|---|
| 19 | ## This program is distributed in the hope that it will be useful, |
|---|
| 20 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 21 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 22 | ## GNU General Public License for more details. |
|---|
| 23 | ## |
|---|
| 24 | |
|---|
| 25 | import os |
|---|
| 26 | import sre |
|---|
| 27 | import sys |
|---|
| 28 | import time |
|---|
| 29 | import signal |
|---|
| 30 | import logger |
|---|
| 31 | import i18n |
|---|
| 32 | _ = i18n._ |
|---|
| 33 | from helpers import from_one_line, decode_string |
|---|
| 34 | |
|---|
| 35 | signal.signal(signal.SIGINT, signal.SIG_DFL) # ^C exits the application |
|---|
| 36 | |
|---|
| 37 | from pysqlite2 import dbapi2 as sqlite |
|---|
| 38 | |
|---|
| 39 | if os.name == 'nt': |
|---|
| 40 | try: |
|---|
| 41 | PATH_TO_LOGS_BASE_DIR = os.path.join(os.environ['appdata'], 'Gajim', 'Logs') |
|---|
| 42 | PATH_TO_DB = os.path.join(os.environ['appdata'], 'Gajim', 'logs.db') # database is called logs.db |
|---|
| 43 | except KeyError: |
|---|
| 44 | # win9x |
|---|
| 45 | PATH_TO_LOGS_BASE_DIR = '../src/Logs' |
|---|
| 46 | PATH_TO_DB = '../src/logs.db' |
|---|
| 47 | else: |
|---|
| 48 | PATH_TO_LOGS_BASE_DIR = os.path.expanduser('~/.gajim/logs') |
|---|
| 49 | PATH_TO_DB = os.path.expanduser('~/.gajim/logs.db') # database is called logs.db |
|---|
| 50 | |
|---|
| 51 | class Migration: |
|---|
| 52 | def __init__(self): |
|---|
| 53 | self.constants = logger.Constants() |
|---|
| 54 | self.DONE = False |
|---|
| 55 | self.PROCESSING = False |
|---|
| 56 | |
|---|
| 57 | if os.path.exists(PATH_TO_DB): |
|---|
| 58 | print '%s already exists. Exiting..' % PATH_TO_DB |
|---|
| 59 | sys.exit() |
|---|
| 60 | |
|---|
| 61 | self.jids_already_in = [] # jid we already put in DB |
|---|
| 62 | |
|---|
| 63 | def get_jid(self, dirname, filename): |
|---|
| 64 | # jids.jid text column will be JID if TC-related, room_jid if GC-related, |
|---|
| 65 | # ROOM_JID/nick if pm-related. Here I get names from filenames |
|---|
| 66 | if dirname.endswith('logs') or dirname.endswith('Logs'): |
|---|
| 67 | # we have file (not dir) in logs base dir, so it's TC |
|---|
| 68 | jid = filename # file is JID |
|---|
| 69 | else: |
|---|
| 70 | # we are in a room folder (so it can be either pm or message in room) |
|---|
| 71 | if filename == os.path.basename(dirname): # room/room |
|---|
| 72 | jid = dirname # filename is ROOM_JID |
|---|
| 73 | else: #room/nick it's pm |
|---|
| 74 | jid = dirname + '/' + filename |
|---|
| 75 | |
|---|
| 76 | if jid.startswith('/'): |
|---|
| 77 | p = len(PATH_TO_LOGS_BASE_DIR) |
|---|
| 78 | jid = jid[p+1:] |
|---|
| 79 | jid = jid.lower() |
|---|
| 80 | return jid |
|---|
| 81 | |
|---|
| 82 | def decode_jid(self, string): |
|---|
| 83 | '''try to decode (to make it Unicode instance) given jid''' |
|---|
| 84 | string = decode_string(string) |
|---|
| 85 | if isinstance(string, str): |
|---|
| 86 | return None # decode failed |
|---|
| 87 | return string |
|---|
| 88 | |
|---|
| 89 | def visit(self, arg, dirname, filenames): |
|---|
| 90 | s = _('Visiting %s') % dirname |
|---|
| 91 | if self.queue: |
|---|
| 92 | self.queue.put(s) |
|---|
| 93 | else: |
|---|
| 94 | print s |
|---|
| 95 | for filename in filenames: |
|---|
| 96 | # Don't take this file into account, this is dup info |
|---|
| 97 | # notifications are also in contact log file |
|---|
| 98 | if filename in ('notify.log', 'README'): |
|---|
| 99 | continue |
|---|
| 100 | path_to_text_file = os.path.join(dirname, filename) |
|---|
| 101 | if os.path.isdir(path_to_text_file): |
|---|
| 102 | continue |
|---|
| 103 | |
|---|
| 104 | jid = self.get_jid(dirname, filename) |
|---|
| 105 | |
|---|
| 106 | jid = self.decode_jid(jid) |
|---|
| 107 | if not jid: |
|---|
| 108 | continue |
|---|
| 109 | |
|---|
| 110 | if filename == os.path.basename(dirname): # gajim@conf/gajim@conf then gajim@conf is type room |
|---|
| 111 | jid_type = self.constants.JID_ROOM_TYPE |
|---|
| 112 | #Type of log |
|---|
| 113 | typ = 'room' |
|---|
| 114 | else: |
|---|
| 115 | jid_type = self.constants.JID_NORMAL_TYPE |
|---|
| 116 | #Type of log |
|---|
| 117 | typ = _('normal') |
|---|
| 118 | s = _('Processing %s of type %s') % (jid.encode('utf-8'), typ) |
|---|
| 119 | if self.queue: |
|---|
| 120 | self.queue.put(s) |
|---|
| 121 | else: |
|---|
| 122 | print s |
|---|
| 123 | |
|---|
| 124 | JID_ID = None |
|---|
| 125 | f = open(path_to_text_file, 'r') |
|---|
| 126 | lines = f.readlines() |
|---|
| 127 | for line in lines: |
|---|
| 128 | line = from_one_line(line) |
|---|
| 129 | splitted_line = line.split(':') |
|---|
| 130 | if len(splitted_line) > 2: |
|---|
| 131 | # type in logs is one of |
|---|
| 132 | # 'gc', 'gcstatus', 'recv', 'sent' and if nothing of those |
|---|
| 133 | # it is status |
|---|
| 134 | # new db has: |
|---|
| 135 | # status, gcstatus, gc_msg, (we only recv those 3), |
|---|
| 136 | # single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent |
|---|
| 137 | # to meet all our needs |
|---|
| 138 | # here I convert |
|---|
| 139 | # gc ==> gc_msg, gcstatus ==> gcstatus, recv ==> chat_msg_recv |
|---|
| 140 | # sent ==> chat_msg_sent, status ==> status |
|---|
| 141 | typ = splitted_line[1] # line[1] has type of logged message |
|---|
| 142 | message_data = splitted_line[2:] # line[2:] has message data |
|---|
| 143 | # line[0] is date, |
|---|
| 144 | # some lines can be fucked up, just drop them |
|---|
| 145 | try: |
|---|
| 146 | tim = int(float(splitted_line[0])) |
|---|
| 147 | except: |
|---|
| 148 | continue |
|---|
| 149 | |
|---|
| 150 | contact_name = None |
|---|
| 151 | show = None |
|---|
| 152 | if typ == 'gc': |
|---|
| 153 | contact_name = message_data[0] |
|---|
| 154 | message = ':'.join(message_data[1:]) |
|---|
| 155 | kind = self.constants.KIND_GC_MSG |
|---|
| 156 | elif typ == 'gcstatus': |
|---|
| 157 | contact_name = message_data[0] |
|---|
| 158 | show = message_data[1] |
|---|
| 159 | message = ':'.join(message_data[2:]) # status msg |
|---|
| 160 | kind = self.constants.KIND_GCSTATUS |
|---|
| 161 | elif typ == 'recv': |
|---|
| 162 | message = ':'.join(message_data[0:]) |
|---|
| 163 | kind = self.constants.KIND_CHAT_MSG_RECV |
|---|
| 164 | elif typ == 'sent': |
|---|
| 165 | message = ':'.join(message_data[0:]) |
|---|
| 166 | kind = self.constants.KIND_CHAT_MSG_SENT |
|---|
| 167 | else: # status |
|---|
| 168 | kind = self.constants.KIND_STATUS |
|---|
| 169 | show = message_data[0] |
|---|
| 170 | message = ':'.join(message_data[1:]) # status msg |
|---|
| 171 | |
|---|
| 172 | message = message[:-1] # remove last \n |
|---|
| 173 | if not message: |
|---|
| 174 | continue |
|---|
| 175 | |
|---|
| 176 | # jid is already in the DB, don't create a new row, just get his jid_id |
|---|
| 177 | if not JID_ID: |
|---|
| 178 | if jid in self.jids_already_in: |
|---|
| 179 | self.cur.execute('SELECT jid_id FROM jids WHERE jid = "%s"' % jid) |
|---|
| 180 | JID_ID = self.cur.fetchone()[0] |
|---|
| 181 | else: |
|---|
| 182 | self.jids_already_in.append(jid) |
|---|
| 183 | self.cur.execute('INSERT INTO jids (jid, type) VALUES (?, ?)', |
|---|
| 184 | (jid, jid_type)) |
|---|
| 185 | self.con.commit() |
|---|
| 186 | JID_ID = self.cur.lastrowid |
|---|
| 187 | |
|---|
| 188 | sql = 'INSERT INTO logs (jid_id, contact_name, time, kind, show, message) '\ |
|---|
| 189 | 'VALUES (?, ?, ?, ?, ?, ?)' |
|---|
| 190 | |
|---|
| 191 | values = (JID_ID, contact_name, tim, kind, show, message) |
|---|
| 192 | self.cur.execute(sql, values) |
|---|
| 193 | self.con.commit() |
|---|
| 194 | |
|---|
| 195 | def migrate(self, queue = None): |
|---|
| 196 | self.queue = queue |
|---|
| 197 | self.con = sqlite.connect(PATH_TO_DB) |
|---|
| 198 | os.chmod(PATH_TO_DB, 0600) # rw only for us |
|---|
| 199 | self.cur = self.con.cursor() |
|---|
| 200 | # create the tables |
|---|
| 201 | # kind can be |
|---|
| 202 | # status, gcstatus, gc_msg, (we only recv for those 3), |
|---|
| 203 | # single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent |
|---|
| 204 | # to meet all our needs |
|---|
| 205 | # logs.jid_id --> jids.jid_id but Sqlite doesn't do FK etc so it's done in python code |
|---|
| 206 | self.cur.executescript( |
|---|
| 207 | ''' |
|---|
| 208 | CREATE TABLE jids( |
|---|
| 209 | jid_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, |
|---|
| 210 | jid TEXT UNIQUE, |
|---|
| 211 | type INTEGER |
|---|
| 212 | ); |
|---|
| 213 | |
|---|
| 214 | CREATE TABLE logs( |
|---|
| 215 | log_line_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, |
|---|
| 216 | jid_id INTEGER, |
|---|
| 217 | contact_name TEXT, |
|---|
| 218 | time INTEGER, |
|---|
| 219 | kind INTEGER, |
|---|
| 220 | show INTEGER, |
|---|
| 221 | message TEXT, |
|---|
| 222 | subject TEXT |
|---|
| 223 | ); |
|---|
| 224 | ''' |
|---|
| 225 | ) |
|---|
| 226 | |
|---|
| 227 | self.con.commit() |
|---|
| 228 | |
|---|
| 229 | self.PROCESSING = True |
|---|
| 230 | os.path.walk(PATH_TO_LOGS_BASE_DIR, self.visit, None) |
|---|
| 231 | s = ''' |
|---|
| 232 | |
|---|
| 233 | We do not use plain-text files anymore, because they do not meet our needs. |
|---|
| 234 | Those files here are logs for Gajim up until 0.8.2 |
|---|
| 235 | We now use an sqlite database called logs.db found in %s |
|---|
| 236 | You can now safely remove your %s folder |
|---|
| 237 | Thank you''' % (os.path.dirname(PATH_TO_LOGS_BASE_DIR), PATH_TO_LOGS_BASE_DIR) |
|---|
| 238 | f = open(os.path.join(PATH_TO_LOGS_BASE_DIR, 'README'), 'w') |
|---|
| 239 | f.write(s) |
|---|
| 240 | f.close() |
|---|
| 241 | if queue: |
|---|
| 242 | queue.put(s) |
|---|
| 243 | self.DONE = True |
|---|
| 244 | |
|---|
| 245 | if __name__ == '__main__': |
|---|
| 246 | print 'IMPORTNANT: PLEASE READ http://trac.gajim.org/wiki/MigrateLogToDot9DB' |
|---|
| 247 | print 'Migration will start in 40 seconds unless you press Ctrl+C' |
|---|
| 248 | time.sleep(40) # give the user time to act |
|---|
| 249 | print |
|---|
| 250 | print 'Starting Logs Migration' |
|---|
| 251 | print '=======================' |
|---|
| 252 | print 'Please do NOT run Gajim until this script is over' |
|---|
| 253 | m = Migration() |
|---|
| 254 | m.migrate() |
|---|