python - Error with calais.py -
i tried use calais.py library , run following code
api_key ='token' calais = calais(api_key=api_key, submitter="my app") print calais.analyze_url('https://www.python.org/download/releases/2.5.1/')
i following error:
*valueerror: invalid request format - request has missing or invalid parameters*
calais.py here:
""" python-calais v.1.4 -- python interface opencalais api author: jordan dimov (jdimov@mlke.net) last-update: 01/12/2009 """ import httplib, urllib, re import simplejson json stringio import stringio params_xml = """ <c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <c:processingdirectives %s> </c:processingdirectives> <c:userdirectives %s> </c:userdirectives> <c:externalmetadata %s> </c:externalmetadata> </c:params> """ strip_re = re.compile('<script.*?</script>|<noscript.*?</noscript>|<style.*?</style>', re.ignorecase) __version__ = "1.4" class appurlopener(urllib.fancyurlopener): version = "mozilla/5.0 (x11; u; linux x86_64; en-us; rv:1.9.0.5) gecko/2008121623 ubuntu/8.10 (intrepid)firefox/3.0.5" # lie shamelessly wikipedia. urllib._urlopener = appurlopener() class calais(): """ python class knows how talk opencalais api. use analyze() , analyze_url() methods, return calaisresponse objects. """ api_key = none processing_directives = {"contenttype":"text/raw", "outputformat":"application/json", "reltagbaseurl":none, "calculaterelevancescore":"true", "enablemetadatatype":none, "discardmetadata":none, "omitoutputtingoriginaltext":"true"} user_directives = {"allowdistribution":"false", "allowsearch":"false", "externalid":none} external_metadata = {} def __init__(self, api_key, submitter="python-calais client v.%s" % __version__): self.api_key = api_key self.user_directives["submitter"]=submitter def _get_params_xml(self): return params_xml % (" ".join('c:%s="%s"' % (k,v) (k,v) in self.processing_directives.items() if v), " ".join('c:%s="%s"' % (k,v) (k,v) in self.user_directives.items() if v), " ".join('c:%s="%s"' % (k,v) (k,v) in self.external_metadata.items() if v)) def rest_post(self, content): params = urllib.urlencode({'licenseid':self.api_key, 'content':content, 'paramsxml':self._get_params_xml()}) headers = {"content-type":"application/x-www-form-urlencoded"} conn = httplib.httpconnection("api.opencalais.com:80") conn.request("post", "/enlighten/rest/", params, headers) response = conn.getresponse() data = response.read() conn.close() return (data) def get_random_id(self): """ creates random 10-character id submission. """ import string random import choice chars = string.letters + string.digits np = "" in range(10): np = np + choice(chars) return np def get_content_id(self, text): """ creates sha1 hash of text of submission. """ import hashlib h = hashlib.sha1() h.update(text) return h.hexdigest() def preprocess_html(self, html): html = html.replace('\n', '') html = strip_re.sub('', html) return html def analyze(self, content, content_type="text/raw", external_id=none): if not (content , len(content.strip())): return none self.processing_directives["contenttype"]=content_type if external_id: self.user_directives["externalid"] = external_id return calaisresponse(self.rest_post(content)) def analyze_url(self, url): f = urllib.urlopen(url) html = self.preprocess_html(f.read()) return self.analyze(html, content_type="text/html", external_id=url) def analyze_file(self, fn): import mimetypes try: filetype = mimetypes.guess_type(fn)[0] except: raise valueerror("can not determine file type '%s'" % fn) if filetype == "text/plain": content_type="text/raw" f = open(fn) content = f.read() f.close() elif filetype == "text/html": content_type = "text/html" f = open(fn) content = self.preprocess_html(f.read()) f.close() else: raise valueerror("only plaintext , html files supported. ") return self.analyze(content, content_type=content_type, external_id=fn) class calaisresponse(): """ encapsulates parsed calais response , provides easy pythonic access data. """ raw_response = none simplified_response = none def __init__(self, raw_result): try: self.raw_response = json.load(stringio(raw_result)) except: raise valueerror(raw_result) self.simplified_response = self._simplify_json(self.raw_response) self.__dict__['doc'] = self.raw_response['doc'] k,v in self.simplified_response.items(): self.__dict__[k] = v def _simplify_json(self, json): result = {} # first, resolve references element in json.values(): k,v in element.items(): if isinstance(v, unicode) , v.startswith("http://") , json.has_key(v): element[k] = json[v] k, v in json.items(): if v.has_key("_typegroup"): group = v["_typegroup"] if not result.has_key(group): result[group]=[] del v["_typegroup"] v["__reference"] = k result[group].append(v) return result def print_summary(self): if not hasattr(self, "doc"): return none info = self.doc['info'] print "calais request id: %s" % info['calaisrequestid'] if info.has_key('externalid'): print "external id: %s" % info['externalid'] if info.has_key('doctitle'): print "title: %s " % info['doctitle'] print "language: %s" % self.doc['meta']['language'] print "extractions: " k,v in self.simplified_response.items(): print "\t%d %s" % (len(v), k) def print_entities(self): if not hasattr(self, "entities"): return none item in self.entities: print "%s: %s (%.2f)" % (item['_type'], item['name'], item['relevance']) def print_topics(self): if not hasattr(self, "topics"): return none topic in self.topics: print topic['categoryname'] def print_relations(self): if not hasattr(self, "relations"): return none relation in self.relations: print relation['_type'] k,v in relation.items(): if not k.startswith("_"): if isinstance(v, unicode): print "\t%s:%s" % (k,v) elif isinstance(v, dict) , v.has_key('name'): print "\t%s:%s" % (k, v['name'])
the problem solved. complicated little because using old version. thank you.
Comments
Post a Comment