| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | ''' Atom (rfc 4287) feed parser, used to read data from atom-over-pubsub transports |
|---|
| 3 | and services. Very simple. Actually implements only atom:entry. Implement more features |
|---|
| 4 | if you need. ''' |
|---|
| 5 | |
|---|
| 6 | # suggestion: rewrite functions that return dates to return standard python time tuples, |
|---|
| 7 | # exteneded to contain timezone |
|---|
| 8 | |
|---|
| 9 | import xmpp |
|---|
| 10 | import time |
|---|
| 11 | |
|---|
| 12 | class PersonConstruct(xmpp.Node, object): |
|---|
| 13 | ''' Not used for now, as we don't need authors/contributors in pubsub.com feeds. |
|---|
| 14 | They rarely exist there. ''' |
|---|
| 15 | def __init__(self, node): |
|---|
| 16 | ''' Create person construct from node. ''' |
|---|
| 17 | xmpp.Node.__init__(self, node=node) |
|---|
| 18 | |
|---|
| 19 | def get_name(self): |
|---|
| 20 | return self.getTagData('name') |
|---|
| 21 | |
|---|
| 22 | name = property(get_name, None, None, |
|---|
| 23 | '''Conveys a human-readable name for the person. Should not be None, |
|---|
| 24 | although some badly generated atom feeds don't put anything here |
|---|
| 25 | (this is non-standard behavior, still pubsub.com sometimes does that.)''') |
|---|
| 26 | |
|---|
| 27 | def get_uri(self): |
|---|
| 28 | return self.getTagData('uri') |
|---|
| 29 | |
|---|
| 30 | uri = property(get_uri, None, None, |
|---|
| 31 | '''Conveys an IRI associated with the person. Might be None when not set.''') |
|---|
| 32 | |
|---|
| 33 | def get_email(self): |
|---|
| 34 | return self.getTagData('email') |
|---|
| 35 | |
|---|
| 36 | email = property(get_email, None, None, |
|---|
| 37 | '''Conveys an e-mail address associated with the person. Might be None when |
|---|
| 38 | not set.''') |
|---|
| 39 | |
|---|
| 40 | class Entry(xmpp.Node, object): |
|---|
| 41 | def __init__(self, node=None): |
|---|
| 42 | ''' Create new atom entry object. ''' |
|---|
| 43 | xmpp.Node.__init__(self, 'entry', node=node) |
|---|
| 44 | |
|---|
| 45 | def __repr__(self): |
|---|
| 46 | return '<Atom:Entry object of id="%r">' % self.id |
|---|
| 47 | |
|---|
| 48 | class OldEntry(xmpp.Node, object): |
|---|
| 49 | ''' Parser for feeds from pubsub.com. They use old Atom 0.3 format with |
|---|
| 50 | their extensions. ''' |
|---|
| 51 | def __init__(self, node=None): |
|---|
| 52 | ''' Create new Atom 0.3 entry object. ''' |
|---|
| 53 | xmpp.Node.__init__(self, 'entry', node=node) |
|---|
| 54 | |
|---|
| 55 | def __repr__(self): |
|---|
| 56 | return '<Atom0.3:Entry object of id="%r">' % self.id |
|---|
| 57 | |
|---|
| 58 | def get_feed_title(self): |
|---|
| 59 | ''' Returns title of feed, where the entry was created. The result is the feed name |
|---|
| 60 | concatenated with source-feed title. ''' |
|---|
| 61 | if self.parent is not None: |
|---|
| 62 | main_feed = self.parent.getTagData('title') |
|---|
| 63 | else: |
|---|
| 64 | main_feed = None |
|---|
| 65 | |
|---|
| 66 | if self.getTag('source-feed') is not None: |
|---|
| 67 | source_feed = self.getTag('source-feed').getTagData('title') |
|---|
| 68 | else: |
|---|
| 69 | source_feed = None |
|---|
| 70 | |
|---|
| 71 | if main_feed is not None and source_feed is not None: |
|---|
| 72 | return u'%s: %s' % (main_feed, source_feed) |
|---|
| 73 | elif main_feed is not None: |
|---|
| 74 | return main_feed |
|---|
| 75 | elif source_feed is not None: |
|---|
| 76 | return source_feed |
|---|
| 77 | else: |
|---|
| 78 | return u'' |
|---|
| 79 | |
|---|
| 80 | feed_title = property(get_feed_title, None, None, |
|---|
| 81 | ''' Title of feed. It is built from entry's original feed title and title of feed |
|---|
| 82 | which delivered this entry. ''') |
|---|
| 83 | |
|---|
| 84 | def get_feed_link(self): |
|---|
| 85 | ''' Get a link to main page of feed (in pubsub.com: second link of rel='alternate', |
|---|
| 86 | first contains raw xml data). ''' |
|---|
| 87 | try: |
|---|
| 88 | return self.getTag('source-feed').getTags('link', {'rel':'alternate'})[1].getData() |
|---|
| 89 | except: |
|---|
| 90 | return None |
|---|
| 91 | |
|---|
| 92 | feed_link = property(get_feed_link, None, None, |
|---|
| 93 | ''' Link to main webpage of the feed. ''') |
|---|
| 94 | |
|---|
| 95 | def get_title(self): |
|---|
| 96 | ''' Get an entry's title. ''' |
|---|
| 97 | return self.getTagData('title') |
|---|
| 98 | |
|---|
| 99 | title = property(get_title, None, None, |
|---|
| 100 | ''' Entry's title. ''') |
|---|
| 101 | |
|---|
| 102 | def get_uri(self): |
|---|
| 103 | ''' Get the uri the entry points to (entry's first link element with rel='alternate' |
|---|
| 104 | or without rel attribute). ''' |
|---|
| 105 | for element in self.getTags('link'): |
|---|
| 106 | if element.attrs.has_key('rel') and element.attrs['rel']<>'alternate': continue |
|---|
| 107 | try: |
|---|
| 108 | return element.attrs['href'] |
|---|
| 109 | except AttributeError: |
|---|
| 110 | pass |
|---|
| 111 | return None |
|---|
| 112 | |
|---|
| 113 | uri = property(get_uri, None, None, |
|---|
| 114 | ''' URI that is pointed by the entry. ''') |
|---|
| 115 | |
|---|
| 116 | def get_updated(self): |
|---|
| 117 | ''' Get the time the entry was updated last time. This should be standarized, |
|---|
| 118 | but pubsub.com sends it in human-readable format. We won't try to parse it. |
|---|
| 119 | (Atom 0.3 uses the word «modified» for that). |
|---|
| 120 | |
|---|
| 121 | If there's no time given in the entry, we try with <published> |
|---|
| 122 | and <issued> elements. ''' |
|---|
| 123 | for name in ('updated', 'modified', 'published', 'issued'): |
|---|
| 124 | date = self.getTagData(name) |
|---|
| 125 | if date is not None: break |
|---|
| 126 | |
|---|
| 127 | if date is None: |
|---|
| 128 | # it is not in the standard format |
|---|
| 129 | return time.asctime() |
|---|
| 130 | |
|---|
| 131 | return date |
|---|
| 132 | |
|---|
| 133 | updated = property(get_updated, None, None, |
|---|
| 134 | ''' Last significant modification time. ''') |
|---|
| 135 | |
|---|
| 136 | feed_tagline = u'' |
|---|