parser.py 807 B

12345678910111213141516171819202122232425262728
  1. #!/usr/bin/env python
  2. try:
  3. from html.parser import HTMLParser
  4. except ImportError:
  5. from HTMLParser import HTMLParser
  6. # Don't blame on me for this mess, we can't use external libs and all we have is HTMLParser
  7. class WTParser(HTMLParser):
  8. def __init__(self):
  9. HTMLParser.__init__(self)
  10. self.meta = {}
  11. self.scripts = []
  12. def handle_starttag(self, tag, attrs):
  13. if tag == 'meta':
  14. m = {}
  15. for name, value in attrs:
  16. m[name] = value
  17. name = m.get('name') or m.get('property')
  18. if name:
  19. self.meta[name] = m.get('content', '')
  20. elif tag == 'script':
  21. for name, value in attrs:
  22. if name == 'src':
  23. self.scripts.append(value)
  24. return