| 1 | #!/usr/bin/env python |
|---|
| 2 | import os |
|---|
| 3 | import sre |
|---|
| 4 | import sys |
|---|
| 5 | import time |
|---|
| 6 | import signal |
|---|
| 7 | import dircache |
|---|
| 8 | |
|---|
| 9 | signal.signal(signal.SIGINT, signal.SIG_DFL) # ^C exits the application |
|---|
| 10 | |
|---|
| 11 | from pysqlite2 import dbapi2 as sqlite |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | class Constants: |
|---|
| 15 | def __init__(self): |
|---|
| 16 | ( |
|---|
| 17 | self.JID_NORMAL_TYPE, |
|---|
| 18 | self.JID_ROOM_TYPE # image to show state (online, new message etc) |
|---|
| 19 | ) = range(2) |
|---|
| 20 | |
|---|
| 21 | ( |
|---|
| 22 | self.KIND_STATUS, |
|---|
| 23 | self.KIND_GCSTATUS, |
|---|
| 24 | self.KIND_GC_MSG, |
|---|
| 25 | self.KIND_SINGLE_MSG_RECV, |
|---|
| 26 | self.KIND_CHAT_MSG_RECV, |
|---|
| 27 | self.KIND_SINGLE_MSG_SENT, |
|---|
| 28 | self.KIND_CHAT_MSG_SENT |
|---|
| 29 | ) = range(7) |
|---|
| 30 | |
|---|
| 31 | ( |
|---|
| 32 | self.SHOW_ONLINE, |
|---|
| 33 | self.SHOW_CHAT, |
|---|
| 34 | self.SHOW_AWAY, |
|---|
| 35 | self.SHOW_XA, |
|---|
| 36 | self.SHOW_DND, |
|---|
| 37 | self.SHOW_OFFLINE |
|---|
| 38 | ) = range(6) |
|---|
| 39 | |
|---|
| 40 | constants = Constants() |
|---|
| 41 | |
|---|
| 42 | if os.name == 'nt': |
|---|
| 43 | try: |
|---|
| 44 | PATH_TO_LOGS_BASE_DIR = os.path.join(os.environ['appdata'], 'PsiData', 'profiles') |
|---|
| 45 | PATH_TO_DB = os.path.join(os.environ['appdata'], 'Gajim', 'logs.db') # database is called logs.db |
|---|
| 46 | except KeyError: |
|---|
| 47 | # win9x |
|---|
| 48 | try: |
|---|
| 49 | PATH_TO_LOGS_BASE_DIR = os.path.join('C:', 'Program Files', 'Psi', 'PsiData', 'profiles') |
|---|
| 50 | PATH_TO_DB = '../src/logs.db' |
|---|
| 51 | except: |
|---|
| 52 | print >> sys.sterr, 'Psi logs can\'t be found' |
|---|
| 53 | sys.exit() |
|---|
| 54 | else: |
|---|
| 55 | PATH_TO_LOGS_BASE_DIR = os.path.expanduser('~/.psi/profiles/') |
|---|
| 56 | PATH_TO_DB = os.path.expanduser('~/.gajim/logs.db') # database is called logs.db |
|---|
| 57 | |
|---|
| 58 | jids_already_in = [] # jid we already put in DB |
|---|
| 59 | |
|---|
| 60 | if not os.path.exists(PATH_TO_DB): |
|---|
| 61 | con = sqlite.connect(PATH_TO_DB) |
|---|
| 62 | cur = con.cursor() |
|---|
| 63 | # create the tables |
|---|
| 64 | # kind can be |
|---|
| 65 | # status, gcstatus, gc_msg, (we only recv for those 3), |
|---|
| 66 | # single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent |
|---|
| 67 | # to meet all our needs |
|---|
| 68 | # logs.jid_id --> jids.jid_id but Sqlite doesn't do FK etc so it's done in python code |
|---|
| 69 | cur.executescript( |
|---|
| 70 | ''' |
|---|
| 71 | CREATE TABLE jids( |
|---|
| 72 | jid_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, |
|---|
| 73 | jid TEXT UNIQUE, |
|---|
| 74 | type INTEGER |
|---|
| 75 | ); |
|---|
| 76 | |
|---|
| 77 | CREATE TABLE logs( |
|---|
| 78 | log_line_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE, |
|---|
| 79 | jid_id INTEGER, |
|---|
| 80 | contact_name TEXT, |
|---|
| 81 | time INTEGER, |
|---|
| 82 | kind INTEGER, |
|---|
| 83 | show INTEGER, |
|---|
| 84 | message TEXT, |
|---|
| 85 | subject TEXT |
|---|
| 86 | ); |
|---|
| 87 | ''' |
|---|
| 88 | ) |
|---|
| 89 | |
|---|
| 90 | con.commit() |
|---|
| 91 | else: |
|---|
| 92 | con = sqlite.connect(PATH_TO_DB) |
|---|
| 93 | cur = con.cursor() |
|---|
| 94 | cur.execute('SELECT jid FROM jids') |
|---|
| 95 | jids = cur.fetchall() |
|---|
| 96 | for jid in jids: |
|---|
| 97 | jids_already_in.append(jid[0]) |
|---|
| 98 | |
|---|
| 99 | def decode_jid(string): |
|---|
| 100 | '''try to decode (to make it Unicode instance) given string''' |
|---|
| 101 | # by the time we go to iso15 it better be the one else we show bad characters |
|---|
| 102 | encodings = (sys.getfilesystemencoding(), 'utf-8', 'iso-8859-15') |
|---|
| 103 | for encoding in encodings: |
|---|
| 104 | try: |
|---|
| 105 | string = string.decode(encoding) |
|---|
| 106 | except UnicodeError: |
|---|
| 107 | continue |
|---|
| 108 | return string |
|---|
| 109 | |
|---|
| 110 | return None |
|---|
| 111 | |
|---|
| 112 | def decode_message(string): |
|---|
| 113 | '''try to decode (to make it Unicode instance) given string''' |
|---|
| 114 | # by the time we go to iso15 it better be the one else we show bad characters |
|---|
| 115 | encodings = (sys.getfilesystemencoding(), 'utf-8', 'iso-8859-15') |
|---|
| 116 | for encoding in encodings: |
|---|
| 117 | try: |
|---|
| 118 | string = string.decode(encoding).encode(encoding) |
|---|
| 119 | except UnicodeError: |
|---|
| 120 | continue |
|---|
| 121 | return string |
|---|
| 122 | |
|---|
| 123 | return None |
|---|
| 124 | |
|---|
| 125 | def get_jid(dirname, filename): |
|---|
| 126 | # jids.jid text column will be JID if TC-related, room_jid if GC-related, |
|---|
| 127 | jid = filename # JID is in filename |
|---|
| 128 | jid = jid[:-8] #-8 remove .history at end of filename |
|---|
| 129 | jid = jid.lower() |
|---|
| 130 | if jid.count('_at_') > 1: |
|---|
| 131 | print jid + 'contain more than one "_at_". We cannot guess which is the JID @' |
|---|
| 132 | return None |
|---|
| 133 | jid = jid.replace('_at_', '@', 1) |
|---|
| 134 | |
|---|
| 135 | #Special chars in msn users jid are stored with the ascii code in psi, not in gajim |
|---|
| 136 | jid = jid.replace('%25', '%') |
|---|
| 137 | jid = jid.replace('%2d', '-') |
|---|
| 138 | jid = jid.replace('%2e', '.') |
|---|
| 139 | jid = jid.replace('%5f', '_') |
|---|
| 140 | |
|---|
| 141 | jid = decode_jid(jid) |
|---|
| 142 | return jid |
|---|
| 143 | |
|---|
| 144 | def visit(arg, dirname, filenames): |
|---|
| 145 | print 'Visiting', dirname |
|---|
| 146 | for filename in filenames: |
|---|
| 147 | # Don't take into account jid that contains conference or chat |
|---|
| 148 | if filename.find('conference') > 0: |
|---|
| 149 | continue |
|---|
| 150 | if filename.find('chat') > 0: |
|---|
| 151 | continue |
|---|
| 152 | path_to_text_file = os.path.join(dirname, filename) |
|---|
| 153 | if os.path.isdir(path_to_text_file): |
|---|
| 154 | continue |
|---|
| 155 | |
|---|
| 156 | jid = get_jid(dirname, filename) |
|---|
| 157 | if not jid: |
|---|
| 158 | continue |
|---|
| 159 | |
|---|
| 160 | type = constants.JID_NORMAL_TYPE |
|---|
| 161 | print 'Processing', jid, 'of type normal' |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | JID_ID = None |
|---|
| 165 | f = open(path_to_text_file, 'r') |
|---|
| 166 | lines = f.readlines() |
|---|
| 167 | for line in lines: |
|---|
| 168 | splitted_line = line.split('|') |
|---|
| 169 | if len(splitted_line) > 5: |
|---|
| 170 | # new db has: |
|---|
| 171 | # status, gcstatus, gc_msg, (we only recv those 3), |
|---|
| 172 | # single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent |
|---|
| 173 | # to meet all our needs |
|---|
| 174 | date = splitted_line[1] |
|---|
| 175 | |
|---|
| 176 | #Let's convert the date to unix timestamp |
|---|
| 177 | try: |
|---|
| 178 | year = int(date[:4]) |
|---|
| 179 | month = int(date[5:7]) |
|---|
| 180 | day = int(date[8:10]) |
|---|
| 181 | hour = int(date[11:13]) |
|---|
| 182 | min = int(date[14:16]) |
|---|
| 183 | sec = int(date[17:19]) |
|---|
| 184 | except ValueError: |
|---|
| 185 | print 'The date of the following line cannot be parsed correctly. Line skipped' |
|---|
| 186 | print str(line) |
|---|
| 187 | continue |
|---|
| 188 | utc_time = [year, month, day, hour, min, sec, 0, 1, -1] |
|---|
| 189 | unixtime = time.mktime(utc_time) |
|---|
| 190 | |
|---|
| 191 | type = splitted_line[2] # line[2] has type of logged message |
|---|
| 192 | direction = splitted_line[3] #from or to ? |
|---|
| 193 | flags = splitted_line[4] # flags = Nxyz |
|---|
| 194 | if flags[0] != 'N': |
|---|
| 195 | continue |
|---|
| 196 | if flags[2:4] != '--': |
|---|
| 197 | continue |
|---|
| 198 | flag1 = flags[1] |
|---|
| 199 | # x can be 1 (subject), 2 (url + url_description), 3 (both) |
|---|
| 200 | # y and z are '-' |
|---|
| 201 | data = splitted_line[5:] # line[2:] has message data |
|---|
| 202 | |
|---|
| 203 | # get subject and message |
|---|
| 204 | subject = '' |
|---|
| 205 | if flag1 == '-': |
|---|
| 206 | message = data[0] |
|---|
| 207 | elif flag1 == '1': |
|---|
| 208 | subject = data[0] |
|---|
| 209 | message = data[1] |
|---|
| 210 | elif flag1 == '2': |
|---|
| 211 | message = data[2] # url -> trash |
|---|
| 212 | elif flag1 == '3': |
|---|
| 213 | subject = data[0] |
|---|
| 214 | message = data[3] |
|---|
| 215 | message = decode_message(message) |
|---|
| 216 | if not message: |
|---|
| 217 | continue |
|---|
| 218 | if subject: |
|---|
| 219 | subject = decode_message(subject) |
|---|
| 220 | if not subject: |
|---|
| 221 | continue |
|---|
| 222 | |
|---|
| 223 | # type:0->message;1->chat; |
|---|
| 224 | # 3->subscribe;6->subscribed;7->unsubscribe;8->unsubscribed |
|---|
| 225 | # 4->error;5->headline;2->old system message(useless) |
|---|
| 226 | if type in ('0', '1'): |
|---|
| 227 | if direction == 'from': |
|---|
| 228 | kind = constants.KIND_CHAT_MSG_RECV |
|---|
| 229 | elif direction == 'to': |
|---|
| 230 | kind = constants.KIND_CHAT_MSG_SENT |
|---|
| 231 | elif type in ('2', '3', '4', '5', '6', '7', '8'): |
|---|
| 232 | #Not an interessant line, just forget it |
|---|
| 233 | continue |
|---|
| 234 | else: |
|---|
| 235 | print 'Unknown message type in the following line. Line skipped' |
|---|
| 236 | print str(line) |
|---|
| 237 | continue |
|---|
| 238 | |
|---|
| 239 | # jid is already in the DB, don't create a new row, just get his jid_id |
|---|
| 240 | if not JID_ID: |
|---|
| 241 | if jid in jids_already_in: |
|---|
| 242 | cur.execute('SELECT jid_id FROM jids WHERE jid = "%s"' % jid) |
|---|
| 243 | JID_ID = cur.fetchone()[0] |
|---|
| 244 | else: |
|---|
| 245 | jids_already_in.append(jid) |
|---|
| 246 | cur.execute('INSERT INTO jids (jid, type) VALUES (?, ?)', (jid, 0)) |
|---|
| 247 | con.commit() |
|---|
| 248 | JID_ID = cur.lastrowid |
|---|
| 249 | |
|---|
| 250 | contact_name = None |
|---|
| 251 | show = None |
|---|
| 252 | |
|---|
| 253 | # Replace escaped newlines by 'real ones' |
|---|
| 254 | message = message.replace('\\n', '\n') |
|---|
| 255 | |
|---|
| 256 | # Delete the last char if it's a newline |
|---|
| 257 | if message[len(message)-1] == '\n': |
|---|
| 258 | message = message[0:len(message)-1] |
|---|
| 259 | |
|---|
| 260 | sql = 'INSERT INTO logs (jid_id, contact_name, time, kind, show, message, subject) VALUES (?, ?, ?, ?, ?, ?, ?)' |
|---|
| 261 | values = (JID_ID, contact_name, unixtime, kind, show, message, subject) |
|---|
| 262 | cur.execute(sql, values) |
|---|
| 263 | else: |
|---|
| 264 | print 'The following line cannot be parsed correctly. Line skipped: \'', str(line), '\'' |
|---|
| 265 | con.commit() |
|---|
| 266 | |
|---|
| 267 | if __name__ == '__main__': |
|---|
| 268 | print 'Starting Psi Logs Migration' |
|---|
| 269 | print '=======================' |
|---|
| 270 | print 'Please do NOT run Gajim until this script is over' |
|---|
| 271 | profiles = dircache.listdir(PATH_TO_LOGS_BASE_DIR) |
|---|
| 272 | for profile in profiles: |
|---|
| 273 | print 'Converting history for profile', profile |
|---|
| 274 | logsdir = os.path.join(PATH_TO_LOGS_BASE_DIR, profile, 'history') |
|---|
| 275 | os.path.walk(logsdir, visit, None) |
|---|