Module lib.server.urlparser

Parse url and get all content

Expand source code
""" Parse url and get all content """
# Distributed under Pycameresp License
# Copyright (c) 2023 Remi BERTHOLET
import collections
import tools.strings
# url = b"http://user:passsword@192.168.1.28:8080/toto/json.htm?type=command&param=switchlight&idx=4&switchcmd=On&error=mon+error#my_anchor"

class UrlParser:
        """ Parse url """
        def __init__(self, url, http=False):
                """ Parser constructor """
                self.protocol = b""
                self.user     = b""
                self.password = b""
                self.host     = b""
                self.port     = b""
                self.path     = b""
                self.params   = collections.OrderedDict()
                self.anchor   = b""
                self.url      = url
                self.method   = b""
                self.parse(url, http)

        def parse(self, url, http=False):
                """ Parse the url """
                self.protocol = b""
                self.user     = b""
                self.password = b""
                self.host     = b""
                self.port     = b""
                self.path     = b""
                self.params   = collections.OrderedDict()
                self.anchor   = b""
                self.method   = b""
                self.url      = url

                if http:
                        spl = url.split(b" ")
                        url = b"http://localhost%s"%spl[1]
                        self.method = spl[0]

                self.protocol, part_url  = UrlParser.parse_protocol(url)
                self.anchor, part_url    = UrlParser.parse_anchor(part_url)
                user_path_port, params   = UrlParser.parse_path_param(part_url)

                self.params              = UrlParser.parse_params(params)
                user_host, self.path     = UrlParser.parse_host(user_path_port)
                self.path = UrlParser.unquote(self.path)
                user_password, host_port = UrlParser.parse_user_host(user_host)
                self.user, self.password = UrlParser.parse_user_password(user_password)
                self.host, self.port     = UrlParser.parse_host_port(host_port)

        @staticmethod
        def parse_protocol(part_url):
                """ Parse and get the protocol """
                if part_url != b"":
                        protocol = b""
                        spl = part_url.split(b"://")
                        if len(spl) > 1:
                                protocol = spl[0]
                                part_url = spl[1]
                        return protocol, part_url
                else:
                        return b"", b""

        @staticmethod
        def parse_anchor(part_url):
                """ Parse and get the anchor """
                if part_url != b"":
                        anchor = b""
                        spl = part_url.split(b"#")
                        if len(spl) > 1:
                                anchor = spl[1]
                                part_url = spl[0]
                        return anchor, part_url
                else:
                        return b"", b""

        @staticmethod
        def parse_host(part_url):
                """ Parse and get the host """
                if part_url != b"":
                        pos = part_url.find(b"/")
                        if pos > 0:
                                host = part_url[:pos]
                                part_url = part_url[pos:]
                        else:
                                host = b""
                        return host, part_url
                return b"", b""

        @staticmethod
        def parse_user_host(part_url):
                """ Parse and get the user and host """
                if part_url != b"":
                        spl = part_url.split(b"@")
                        if len(spl) > 1:
                                user = spl[0]
                                host = spl[1]
                        else:
                                user = b""
                                host = part_url
                        return user,host
                return b"", b""

        @staticmethod
        def parse_user_password(user_password):
                """ Parse and get the user and password """
                user     = b""
                password = b""
                if user_password != b"":
                        spl = user_password.split(b":")
                        if len(spl) > 1:
                                user     = spl[0]
                                password = spl[1]
                return user,password

        @staticmethod
        def parse_host_port(host_port):
                """ Parse and get the host and port """
                host = b""
                port = b""
                if host_port != b"":
                        spl = host_port.split(b":")
                        if len(spl) > 1:
                                host = spl[0]
                                port = spl[1]
                        else:
                                host = host_port
                return host,port

        @staticmethod
        def parse_path_param(path_param):
                """ Parse and get the path and parameters """
                path = b""
                params = b""
                if path_param != b"":
                        spl = path_param.split(b"?")
                        if len(spl) > 1:
                                path = spl[0]
                                params = spl[1]
                        else:
                                path = path_param
                                params = b""
                return path, params

        @staticmethod
        def parse_params(part_url):
                """ Parse and get the parameters """
                params = collections.OrderedDict()
                if part_url != b"":
                        pairs = part_url.split(b"&")
                        for pair in pairs:
                                param = [UrlParser.unquote(x) for x in pair.split(b"=", 1)]
                                if len(param) == 1:
                                        param.append(True)
                                name, value = param
                                previous_value = params.get(name)
                                if previous_value is not None:
                                        if previous_value == b'0' and value == b'':
                                                params[name] = b'1'
                                        else:
                                                if not isinstance(previous_value, list):
                                                        params[name] = [previous_value]
                                                params[name].append(value)
                                else:
                                        params[name] = value
                return params

        @staticmethod
        def unquote(url):
                """ Remove from a string special character in the url """
                url = url.replace(b'+', b' ')
                spl = url.split(b'%')
                try :
                        result = spl[0]
                        for part in range(1, len(spl)) :
                                try :
                                        result += bytes([int(spl[part][:2], 16)]) + spl[part][2:]
                                except :
                                        result += b'%' + spl[part]
                        return result
                except :
                        return url

        @staticmethod
        def quote(text):
                """ Insert in the string the character not supported in an url """
                result = b""
                for char in text:
                        if char == 0x2E or char == 0x5F or char == 0x2C or (char >= 0x30 and char < 0x39) or (char >= 0x41 and char < 0x5A) or (char >= 0x61 and char < 0x7A):
                                result += char.to_bytes(1,"big")
                        elif char == b" ":
                                result += b"+"
                        else:
                                result += b"%%%02X"%char
                return result

        @staticmethod
        def adapt_value(value):
                """ Adapt value to url format """
                if type(value) == type(0):
                        value = b"%d"%value
                elif type(value) == type(True):
                        if value:
                                value = b"1"
                        else:
                                value = b"0"
                elif type(value) == type(0.):
                        value = b"%f"%value
                elif type(value) == type(b""):
                        pass
                elif type(value) == type(""):
                        value = tools.strings.tobytes(value)
                else:
                        value = tools.strings.tobytes(value)
                return value

        def get_params(self):
                """ Get the parameters formated for an url """
                result = b""
                for key, value in self.params.items():
                        result += UrlParser.quote(UrlParser.adapt_value(key)) + b"=" + UrlParser.quote(UrlParser.adapt_value(value)) + b"&"
                if len(result) > 1:
                        result = result[:-1]
                return result

        def __repr__(self):
                """ Convert parse result intp string """
                result  = b"url      : '%s'\n"%self.url
                result += b"protocol : '%s'\n"%self.protocol
                result += b"host     : '%s'\n"%self.host
                result += b"port     : '%s'\n"%self.port
                result += b"user     : '%s'\n"%self.user
                result += b"password : '%s'\n"%self.password
                result += b"path     : '%s'\n"%self.path
                result += b"params   : %s\n"%tools.strings.tobytes(str(tools.strings.tostrings(self.params)))
                result += b"anchor   : '%s'\n"%self.anchor
                return tools.strings.tostrings(result)

Classes

class UrlParser (url, http=False)

Parse url

Parser constructor

Expand source code
class UrlParser:
        """ Parse url """
        def __init__(self, url, http=False):
                """ Parser constructor """
                self.protocol = b""
                self.user     = b""
                self.password = b""
                self.host     = b""
                self.port     = b""
                self.path     = b""
                self.params   = collections.OrderedDict()
                self.anchor   = b""
                self.url      = url
                self.method   = b""
                self.parse(url, http)

        def parse(self, url, http=False):
                """ Parse the url """
                self.protocol = b""
                self.user     = b""
                self.password = b""
                self.host     = b""
                self.port     = b""
                self.path     = b""
                self.params   = collections.OrderedDict()
                self.anchor   = b""
                self.method   = b""
                self.url      = url

                if http:
                        spl = url.split(b" ")
                        url = b"http://localhost%s"%spl[1]
                        self.method = spl[0]

                self.protocol, part_url  = UrlParser.parse_protocol(url)
                self.anchor, part_url    = UrlParser.parse_anchor(part_url)
                user_path_port, params   = UrlParser.parse_path_param(part_url)

                self.params              = UrlParser.parse_params(params)
                user_host, self.path     = UrlParser.parse_host(user_path_port)
                self.path = UrlParser.unquote(self.path)
                user_password, host_port = UrlParser.parse_user_host(user_host)
                self.user, self.password = UrlParser.parse_user_password(user_password)
                self.host, self.port     = UrlParser.parse_host_port(host_port)

        @staticmethod
        def parse_protocol(part_url):
                """ Parse and get the protocol """
                if part_url != b"":
                        protocol = b""
                        spl = part_url.split(b"://")
                        if len(spl) > 1:
                                protocol = spl[0]
                                part_url = spl[1]
                        return protocol, part_url
                else:
                        return b"", b""

        @staticmethod
        def parse_anchor(part_url):
                """ Parse and get the anchor """
                if part_url != b"":
                        anchor = b""
                        spl = part_url.split(b"#")
                        if len(spl) > 1:
                                anchor = spl[1]
                                part_url = spl[0]
                        return anchor, part_url
                else:
                        return b"", b""

        @staticmethod
        def parse_host(part_url):
                """ Parse and get the host """
                if part_url != b"":
                        pos = part_url.find(b"/")
                        if pos > 0:
                                host = part_url[:pos]
                                part_url = part_url[pos:]
                        else:
                                host = b""
                        return host, part_url
                return b"", b""

        @staticmethod
        def parse_user_host(part_url):
                """ Parse and get the user and host """
                if part_url != b"":
                        spl = part_url.split(b"@")
                        if len(spl) > 1:
                                user = spl[0]
                                host = spl[1]
                        else:
                                user = b""
                                host = part_url
                        return user,host
                return b"", b""

        @staticmethod
        def parse_user_password(user_password):
                """ Parse and get the user and password """
                user     = b""
                password = b""
                if user_password != b"":
                        spl = user_password.split(b":")
                        if len(spl) > 1:
                                user     = spl[0]
                                password = spl[1]
                return user,password

        @staticmethod
        def parse_host_port(host_port):
                """ Parse and get the host and port """
                host = b""
                port = b""
                if host_port != b"":
                        spl = host_port.split(b":")
                        if len(spl) > 1:
                                host = spl[0]
                                port = spl[1]
                        else:
                                host = host_port
                return host,port

        @staticmethod
        def parse_path_param(path_param):
                """ Parse and get the path and parameters """
                path = b""
                params = b""
                if path_param != b"":
                        spl = path_param.split(b"?")
                        if len(spl) > 1:
                                path = spl[0]
                                params = spl[1]
                        else:
                                path = path_param
                                params = b""
                return path, params

        @staticmethod
        def parse_params(part_url):
                """ Parse and get the parameters """
                params = collections.OrderedDict()
                if part_url != b"":
                        pairs = part_url.split(b"&")
                        for pair in pairs:
                                param = [UrlParser.unquote(x) for x in pair.split(b"=", 1)]
                                if len(param) == 1:
                                        param.append(True)
                                name, value = param
                                previous_value = params.get(name)
                                if previous_value is not None:
                                        if previous_value == b'0' and value == b'':
                                                params[name] = b'1'
                                        else:
                                                if not isinstance(previous_value, list):
                                                        params[name] = [previous_value]
                                                params[name].append(value)
                                else:
                                        params[name] = value
                return params

        @staticmethod
        def unquote(url):
                """ Remove from a string special character in the url """
                url = url.replace(b'+', b' ')
                spl = url.split(b'%')
                try :
                        result = spl[0]
                        for part in range(1, len(spl)) :
                                try :
                                        result += bytes([int(spl[part][:2], 16)]) + spl[part][2:]
                                except :
                                        result += b'%' + spl[part]
                        return result
                except :
                        return url

        @staticmethod
        def quote(text):
                """ Insert in the string the character not supported in an url """
                result = b""
                for char in text:
                        if char == 0x2E or char == 0x5F or char == 0x2C or (char >= 0x30 and char < 0x39) or (char >= 0x41 and char < 0x5A) or (char >= 0x61 and char < 0x7A):
                                result += char.to_bytes(1,"big")
                        elif char == b" ":
                                result += b"+"
                        else:
                                result += b"%%%02X"%char
                return result

        @staticmethod
        def adapt_value(value):
                """ Adapt value to url format """
                if type(value) == type(0):
                        value = b"%d"%value
                elif type(value) == type(True):
                        if value:
                                value = b"1"
                        else:
                                value = b"0"
                elif type(value) == type(0.):
                        value = b"%f"%value
                elif type(value) == type(b""):
                        pass
                elif type(value) == type(""):
                        value = tools.strings.tobytes(value)
                else:
                        value = tools.strings.tobytes(value)
                return value

        def get_params(self):
                """ Get the parameters formated for an url """
                result = b""
                for key, value in self.params.items():
                        result += UrlParser.quote(UrlParser.adapt_value(key)) + b"=" + UrlParser.quote(UrlParser.adapt_value(value)) + b"&"
                if len(result) > 1:
                        result = result[:-1]
                return result

        def __repr__(self):
                """ Convert parse result intp string """
                result  = b"url      : '%s'\n"%self.url
                result += b"protocol : '%s'\n"%self.protocol
                result += b"host     : '%s'\n"%self.host
                result += b"port     : '%s'\n"%self.port
                result += b"user     : '%s'\n"%self.user
                result += b"password : '%s'\n"%self.password
                result += b"path     : '%s'\n"%self.path
                result += b"params   : %s\n"%tools.strings.tobytes(str(tools.strings.tostrings(self.params)))
                result += b"anchor   : '%s'\n"%self.anchor
                return tools.strings.tostrings(result)

Static methods

def adapt_value(value)

Adapt value to url format

Expand source code
@staticmethod
def adapt_value(value):
        """ Adapt value to url format """
        if type(value) == type(0):
                value = b"%d"%value
        elif type(value) == type(True):
                if value:
                        value = b"1"
                else:
                        value = b"0"
        elif type(value) == type(0.):
                value = b"%f"%value
        elif type(value) == type(b""):
                pass
        elif type(value) == type(""):
                value = tools.strings.tobytes(value)
        else:
                value = tools.strings.tobytes(value)
        return value
def parse_anchor(part_url)

Parse and get the anchor

Expand source code
@staticmethod
def parse_anchor(part_url):
        """ Parse and get the anchor """
        if part_url != b"":
                anchor = b""
                spl = part_url.split(b"#")
                if len(spl) > 1:
                        anchor = spl[1]
                        part_url = spl[0]
                return anchor, part_url
        else:
                return b"", b""
def parse_host(part_url)

Parse and get the host

Expand source code
@staticmethod
def parse_host(part_url):
        """ Parse and get the host """
        if part_url != b"":
                pos = part_url.find(b"/")
                if pos > 0:
                        host = part_url[:pos]
                        part_url = part_url[pos:]
                else:
                        host = b""
                return host, part_url
        return b"", b""
def parse_host_port(host_port)

Parse and get the host and port

Expand source code
@staticmethod
def parse_host_port(host_port):
        """ Parse and get the host and port """
        host = b""
        port = b""
        if host_port != b"":
                spl = host_port.split(b":")
                if len(spl) > 1:
                        host = spl[0]
                        port = spl[1]
                else:
                        host = host_port
        return host,port
def parse_params(part_url)

Parse and get the parameters

Expand source code
@staticmethod
def parse_params(part_url):
        """ Parse and get the parameters """
        params = collections.OrderedDict()
        if part_url != b"":
                pairs = part_url.split(b"&")
                for pair in pairs:
                        param = [UrlParser.unquote(x) for x in pair.split(b"=", 1)]
                        if len(param) == 1:
                                param.append(True)
                        name, value = param
                        previous_value = params.get(name)
                        if previous_value is not None:
                                if previous_value == b'0' and value == b'':
                                        params[name] = b'1'
                                else:
                                        if not isinstance(previous_value, list):
                                                params[name] = [previous_value]
                                        params[name].append(value)
                        else:
                                params[name] = value
        return params
def parse_path_param(path_param)

Parse and get the path and parameters

Expand source code
@staticmethod
def parse_path_param(path_param):
        """ Parse and get the path and parameters """
        path = b""
        params = b""
        if path_param != b"":
                spl = path_param.split(b"?")
                if len(spl) > 1:
                        path = spl[0]
                        params = spl[1]
                else:
                        path = path_param
                        params = b""
        return path, params
def parse_protocol(part_url)

Parse and get the protocol

Expand source code
@staticmethod
def parse_protocol(part_url):
        """ Parse and get the protocol """
        if part_url != b"":
                protocol = b""
                spl = part_url.split(b"://")
                if len(spl) > 1:
                        protocol = spl[0]
                        part_url = spl[1]
                return protocol, part_url
        else:
                return b"", b""
def parse_user_host(part_url)

Parse and get the user and host

Expand source code
@staticmethod
def parse_user_host(part_url):
        """ Parse and get the user and host """
        if part_url != b"":
                spl = part_url.split(b"@")
                if len(spl) > 1:
                        user = spl[0]
                        host = spl[1]
                else:
                        user = b""
                        host = part_url
                return user,host
        return b"", b""
def parse_user_password(user_password)

Parse and get the user and password

Expand source code
@staticmethod
def parse_user_password(user_password):
        """ Parse and get the user and password """
        user     = b""
        password = b""
        if user_password != b"":
                spl = user_password.split(b":")
                if len(spl) > 1:
                        user     = spl[0]
                        password = spl[1]
        return user,password
def quote(text)

Insert in the string the character not supported in an url

Expand source code
@staticmethod
def quote(text):
        """ Insert in the string the character not supported in an url """
        result = b""
        for char in text:
                if char == 0x2E or char == 0x5F or char == 0x2C or (char >= 0x30 and char < 0x39) or (char >= 0x41 and char < 0x5A) or (char >= 0x61 and char < 0x7A):
                        result += char.to_bytes(1,"big")
                elif char == b" ":
                        result += b"+"
                else:
                        result += b"%%%02X"%char
        return result
def unquote(url)

Remove from a string special character in the url

Expand source code
@staticmethod
def unquote(url):
        """ Remove from a string special character in the url """
        url = url.replace(b'+', b' ')
        spl = url.split(b'%')
        try :
                result = spl[0]
                for part in range(1, len(spl)) :
                        try :
                                result += bytes([int(spl[part][:2], 16)]) + spl[part][2:]
                        except :
                                result += b'%' + spl[part]
                return result
        except :
                return url

Methods

def get_params(self)

Get the parameters formated for an url

Expand source code
def get_params(self):
        """ Get the parameters formated for an url """
        result = b""
        for key, value in self.params.items():
                result += UrlParser.quote(UrlParser.adapt_value(key)) + b"=" + UrlParser.quote(UrlParser.adapt_value(value)) + b"&"
        if len(result) > 1:
                result = result[:-1]
        return result
def parse(self, url, http=False)

Parse the url

Expand source code
def parse(self, url, http=False):
        """ Parse the url """
        self.protocol = b""
        self.user     = b""
        self.password = b""
        self.host     = b""
        self.port     = b""
        self.path     = b""
        self.params   = collections.OrderedDict()
        self.anchor   = b""
        self.method   = b""
        self.url      = url

        if http:
                spl = url.split(b" ")
                url = b"http://localhost%s"%spl[1]
                self.method = spl[0]

        self.protocol, part_url  = UrlParser.parse_protocol(url)
        self.anchor, part_url    = UrlParser.parse_anchor(part_url)
        user_path_port, params   = UrlParser.parse_path_param(part_url)

        self.params              = UrlParser.parse_params(params)
        user_host, self.path     = UrlParser.parse_host(user_path_port)
        self.path = UrlParser.unquote(self.path)
        user_password, host_port = UrlParser.parse_user_host(user_host)
        self.user, self.password = UrlParser.parse_user_password(user_password)
        self.host, self.port     = UrlParser.parse_host_port(host_port)