#!/usr/local/bin/python import urllib import string def convertXRItoIRI(xri): iri=xri.replace("%", "%25") # find xref extents xreflevel=0 extents=[] for i in range(len(iri)): if iri[i] == "(": if xreflevel==0: curstart=i xreflevel+=1 elif iri[i] == ")": xreflevel-=1 if (xreflevel==0): extents.append((curstart, i)) offset=0 for extent in extents: xref=iri[extent[0]+offset:extent[1]+1+offset] escaped=xref.replace("#", "%23") escaped=escaped.replace("?", "%3F") escaped=escaped.replace("/", "%2F") increased = len(escaped) - len(xref) iri=iri[0:extent[0]+offset] + escaped+ iri[extent[1]+offset+1:] offset+=increased return iri def convertIRItoURI(iri, idna=False): untouched="" if idna and not (iri[6] in "(@=+$!"): pos=iri.find("/", 6) domainname=iri[6:pos] idna=domainname.encode("idna") iri=iri[pos:] untouched="xri://"+idna if (type(iri) == type(u'')): uri=iri.encode('utf-8') else: uri=iri uri=urllib.quote(uri, safe=":/?#@=+$()%*!") return untouched+uri def idna(iri): # Check to see if first segment is dns if not (iri[6] in "(@=+$!"): # assume there's no xref, so just find the first / after position 6 pos=iri.find("/", 6) domainname=iri[6:pos] idna=domainname.encode("idna") iri.replace(domainname, idna) return iri if __name__=="__main__": testxri=u"xri://(http://www.news\u1765.com)/%30test*here/(+value/(+sub/thing))/blah/(+test)/foo/(+another)" print "Converting",repr(testxri),"to URI" iri=convertXRItoIRI(testxri) uri=convertIRItoURI(iri, idna=True) print uri