Unescape scraped version names

This commit is contained in:
Ciaran Gultnieks 2012-01-02 16:50:49 +00:00
parent bed72d335b
commit 1f0f1a65ee

View file

@ -24,7 +24,7 @@ import re
import urllib import urllib
import time import time
from optparse import OptionParser from optparse import OptionParser
import HTMLParser
import common import common
#Read configuration... #Read configuration...
@ -40,6 +40,8 @@ parser.add_option("-v", "--verbose", action="store_true", default=False,
# Get all apps... # Get all apps...
apps = common.read_metadata(options.verbose) apps = common.read_metadata(options.verbose)
html_parser = HTMLParser.HTMLParser()
for app in apps: for app in apps:
if app['disabled']: if app['disabled']:
@ -55,15 +57,15 @@ for app in apps:
m = re.search('<dd itemprop="softwareVersion">([^>]+)</dd>', page) m = re.search('<dd itemprop="softwareVersion">([^>]+)</dd>', page)
if m: if m:
version = m.group(1) version = html_parser.unescape(m.group(1))
m = re.search('data-paramValue="(\d+)"><div class="goog-menuitem-content">Latest Version<', page) m = re.search('data-paramValue="(\d+)"><div class="goog-menuitem-content">Latest Version<', page)
if m: if m:
vercode = m.group(1) vercode = m.group(1)
if vercode is None: if not vercode:
print "...couldn't find version code" print "...couldn't find version code"
elif version is None: elif not version:
print "...couldn't find version" print "...couldn't find version"
elif vercode == app['marketvercode'] and version == app['marketversion']: elif vercode == app['marketvercode'] and version == app['marketversion']:
print "...up to date" print "...up to date"