python - Error with calais.py -


i tried use calais.py library , run following code

api_key ='token'  calais = calais(api_key=api_key, submitter="my app")  print calais.analyze_url('https://www.python.org/download/releases/2.5.1/') 

i following error:

*valueerror: invalid request format - request has missing or invalid parameters* 

calais.py here:

""" python-calais v.1.4 -- python interface opencalais api author: jordan dimov (jdimov@mlke.net) last-update: 01/12/2009 """  import httplib, urllib, re import simplejson json stringio import stringio  params_xml = """ <c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <c:processingdirectives %s> </c:processingdirectives> <c:userdirectives %s> </c:userdirectives> <c:externalmetadata %s> </c:externalmetadata> </c:params> """  strip_re = re.compile('<script.*?</script>|<noscript.*?</noscript>|<style.*?</style>', re.ignorecase)  __version__ = "1.4"  class appurlopener(urllib.fancyurlopener):     version = "mozilla/5.0 (x11; u; linux x86_64; en-us; rv:1.9.0.5) gecko/2008121623 ubuntu/8.10 (intrepid)firefox/3.0.5" # lie shamelessly wikipedia. urllib._urlopener = appurlopener()  class calais():     """     python class knows how talk opencalais api.  use analyze() , analyze_url() methods, return calaisresponse objects.       """     api_key = none     processing_directives = {"contenttype":"text/raw", "outputformat":"application/json", "reltagbaseurl":none, "calculaterelevancescore":"true", "enablemetadatatype":none, "discardmetadata":none, "omitoutputtingoriginaltext":"true"}     user_directives = {"allowdistribution":"false", "allowsearch":"false", "externalid":none}     external_metadata = {}      def __init__(self, api_key, submitter="python-calais client v.%s" % __version__):         self.api_key = api_key         self.user_directives["submitter"]=submitter      def _get_params_xml(self):         return params_xml % (" ".join('c:%s="%s"' % (k,v) (k,v) in self.processing_directives.items() if v), " ".join('c:%s="%s"' % (k,v) (k,v) in self.user_directives.items() if v), " ".join('c:%s="%s"' % (k,v) (k,v) in self.external_metadata.items() if v))      def rest_post(self, content):         params = urllib.urlencode({'licenseid':self.api_key, 'content':content, 'paramsxml':self._get_params_xml()})         headers = {"content-type":"application/x-www-form-urlencoded"}         conn = httplib.httpconnection("api.opencalais.com:80")         conn.request("post", "/enlighten/rest/", params, headers)         response = conn.getresponse()         data = response.read()         conn.close()         return (data)      def get_random_id(self):         """         creates random 10-character id submission.           """         import string         random import choice         chars = string.letters + string.digits         np = ""         in range(10):             np = np + choice(chars)         return np      def get_content_id(self, text):         """         creates sha1 hash of text of submission.           """         import hashlib         h = hashlib.sha1()         h.update(text)         return h.hexdigest()      def preprocess_html(self, html):         html = html.replace('\n', '')         html = strip_re.sub('', html)         return html      def analyze(self, content, content_type="text/raw", external_id=none):         if not (content ,  len(content.strip())):             return none         self.processing_directives["contenttype"]=content_type         if external_id:             self.user_directives["externalid"] = external_id         return calaisresponse(self.rest_post(content))      def analyze_url(self, url):         f = urllib.urlopen(url)         html = self.preprocess_html(f.read())         return self.analyze(html, content_type="text/html", external_id=url)      def analyze_file(self, fn):         import mimetypes         try:             filetype = mimetypes.guess_type(fn)[0]         except:             raise valueerror("can not determine file type '%s'" % fn)         if filetype == "text/plain":             content_type="text/raw"             f = open(fn)             content = f.read()             f.close()         elif filetype == "text/html":             content_type = "text/html"             f = open(fn)             content = self.preprocess_html(f.read())             f.close()         else:             raise valueerror("only plaintext , html files supported.  ")         return self.analyze(content, content_type=content_type, external_id=fn)  class calaisresponse():     """     encapsulates parsed calais response , provides easy pythonic access data.     """     raw_response = none     simplified_response = none      def __init__(self, raw_result):         try:             self.raw_response = json.load(stringio(raw_result))         except:             raise valueerror(raw_result)         self.simplified_response = self._simplify_json(self.raw_response)         self.__dict__['doc'] = self.raw_response['doc']         k,v in self.simplified_response.items():             self.__dict__[k] = v      def _simplify_json(self, json):         result = {}         # first, resolve references         element in json.values():             k,v in element.items():                 if isinstance(v, unicode) , v.startswith("http://") , json.has_key(v):                     element[k] = json[v]         k, v in json.items():             if v.has_key("_typegroup"):                 group = v["_typegroup"]                 if not result.has_key(group):                     result[group]=[]                 del v["_typegroup"]                 v["__reference"] = k                 result[group].append(v)         return result      def print_summary(self):         if not hasattr(self, "doc"):             return none         info = self.doc['info']         print "calais request id: %s" % info['calaisrequestid']         if info.has_key('externalid'):              print "external id: %s" % info['externalid']         if info.has_key('doctitle'):             print "title: %s " % info['doctitle']         print "language: %s" % self.doc['meta']['language']         print "extractions: "         k,v in self.simplified_response.items():             print "\t%d %s" % (len(v), k)      def print_entities(self):         if not hasattr(self, "entities"):             return none         item in self.entities:             print "%s: %s (%.2f)" % (item['_type'], item['name'], item['relevance'])      def print_topics(self):         if not hasattr(self, "topics"):             return none         topic in self.topics:             print topic['categoryname']      def print_relations(self):         if not hasattr(self, "relations"):             return none         relation in self.relations:             print relation['_type']             k,v in relation.items():                 if not k.startswith("_"):                     if isinstance(v, unicode):                         print "\t%s:%s" % (k,v)                     elif isinstance(v, dict) , v.has_key('name'):                         print "\t%s:%s" % (k, v['name']) 

the problem solved. complicated little because using old version. thank you.


Comments

Popular posts from this blog

Magento/PHP - Get phones on all members in a customer group -

php - Bypass Geo Redirect for specific directories -

php - .htaccess mod_rewrite for dynamic url which has domain names -