Extract Tweets from HTML 2017/11/18 code python I wanted tweets in this format: @jcfrancisco 8:36 AM - 15 Nov 2017 Not even Firefox Quantum can scroll thru The Outline's site without slowing down @RodericDay 10:03 AM - 15 Nov 2017 probably the worst designed site that I occasionally want to actually check out So I wrote this code: import codecs import requests import os import pickle import re from html.parser import HTMLParser def request(url, cache=False): if cache: fname = codecs.encode(url.encode(), "hex").decode() if not os.path.exists(fname): response = requests.get(url) response.raise_for_status() with open(fname, "wb") as fp: pickle.dump(response, fp) with open(fname, "rb") as fp: return pickle.load(fp).content.decode() else: response = requests.get(url) response.raise_for_status() return response.content.decode() class MyHTMLParser(HTMLParser): listening = False def handle_starttag(self, tag, attrs): attrd = dict(attrs) classs = attrd.get('class', '') if 'tweet' in classs and 'data-screen-name' in attrd: print('@'+attrd['data-screen-name']) if 'tweet-timestamp' in classs: print(attrd["title"]) if 'js-tweet-text-container' in classs: self.listening = True def handle_data(self, data): if self.listening and data.strip(): print(data) print() self.listening = False html = request("https://twitter.com/sirosenbaum/status/932789502472523777") parser = MyHTMLParser() parser.feed(html)