403WebShell

403Webshell

Server IP : 104.21.25.180 / Your IP : 104.23.197.122
Web Server : Apache/2.4.37
System : Linux almalinux.duckdns.org 4.18.0-553.111.1.el8_10.x86_64 #1 SMP Sun Mar 8 20:06:07 EDT 2026 x86_64
User : ricodeal ( 1046)
PHP Version : 7.4.33
Disable Function : NONE
MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON
Directory : /lib64/python3.6/site-packages/gnome_abrt/url/

Upload File :

[ Back ]

Current File : /lib64/python3.6/site-packages/gnome_abrt/url/urltitle.py

# coding=UTF-8

## Copyright (C) 2013 ABRT team <[email protected]>
## Copyright (C) 2013 Red Hat, Inc.

## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA  02110-1335  USA

import sys
import threading
import logging
from queue import Queue
from urllib.request import urlopen
from html.parser import HTMLParser


def get_url_title(url):
    """Returns a title of the url

    Function expects that the url points to a HTML page with the title element
    in the head element. Tries to download the page and return a value of the
    title.
    """

    try:
        urlio = urlopen(url)
    except IOError as ex:
        logging.debug("{1} ('{0}')".format(url, str(ex)))
        return None

    encoding = urlio.info().getparam('charset')
    # Can't find any documentation but try except needed.
    data = urlio.read()
    if encoding:
        try:
            data = data.decode(encoding)
        except UnicodeError as ex:
            logging.debug("{1} ('{0}')".format(url, str(ex)))
            return None

    return HTMLTitleGetter.parse(data)


class HTMLTitleGetter(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)

        self._tag_head = False
        self._tag_title = False
        self._title = None

    def handle_starttag(self, tag, attrs):
        if not self._tag_head:
            self._tag_head = tag.lower() == "head"

        if self._tag_head and not self._tag_title:
            self._tag_title = tag.lower() == "title"

    def handle_endtag(self, tag):
        if self._tag_head:
            self._tag_head = tag.lower() != "head"

        if self._tag_head and self._tag_title:
            self._tag_title = tag.lower() != "title"

    def handle_data(self, data):
        if self._tag_title:
            if not self._title:
                self._title = data
            else:
                self._title += data

    def error(self, message):
        raise ValueError(message)

    @staticmethod
    def parse(data):
        tgt = HTMLTitleGetter()
        try:
            tgt.feed(data)
        #pylint: disable=W0703
        except Exception as ex:
            logging.debug("{1} ('{0}')".format(data, str(ex)))
            # Hopefully title is parsed correctly

        return tgt._title


class GetURLTitleThread(threading.Thread):
    """Simple thread which puts url's title into a Queue
    """

    def __init__(self, url, que):
        super(GetURLTitleThread, self).__init__()

        self._url = url
        self._que = que

    def run(self):
        title = get_url_title(self._url)
        self._que.put((self._url, title))


if __name__ == "__main__":
    URLS = ["http://docs.python.org/2/library/htmlparser.html"]

    USE_THREADS = False
    if len(sys.argv) > 1:
        i = 1
        if sys.argv[i] == "--threads":
            USE_THREADS = True
            i += 1
        URLS = sys.argv[i:]

    if USE_THREADS:
        QUEUE = Queue()
        THREADS = []
        for u in URLS:
            t = GetURLTitleThread(u, QUEUE)
            t.start()
            THREADS.append(t)

        for t in THREADS:
            t.join()

        while not QUEUE.empty():
            ORIG_URL, GOT_TITLE = QUEUE.get()
            print(GOT_TITLE or ORIG_URL)
    else:
        for u in URLS:
            print(get_url_title(u) or u)

Youez - 2016 - github.com/yon3zu
LinuXploit