Twitter streaming API with IPython and Mongo DB


#1

Hello, I am new to coding and trying to develop a Twitter application to stream tweets into a Mongo DB database.
I think I'm nearly there, but when I try to find my 'collection' in the database I get a 'Name Error- 'collection' not defined'- have I created the collection properly?

Full code is below:

import numpy as np
import pandas as pd

import time
import math
import os
import sys 
from geopy import geocoders
import time
import os
import sys

import matplotlib.pyplot as plt
%matplotlib inline

import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime

import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener

api_key = "" # <---- Add your API Key
api_secret = "" # <---- Add your API Secret
access_token = "" # <---- Add your access token
access_token_secret = "" # <---- Add your access token secret

auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)


class listener(StreamListener):

    def __init__(self, start_time, time_limit=60):

        self.time = start_time
        self.limit = time_limit
        self.tweet_data = []

    def on_data(self, data):

        saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')

        while (time.time() - self.time) < self.limit:

            try:

                self.tweet_data.append(data)

                return True

            except BaseException as e:
                print ('failed ondata,', str(e))
                time.sleep(5)
                pass

        saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
        saveFile.write(u'[\n')
        saveFile.write(','.join(self.tweet_data))
        saveFile.write(u'\n]')
        saveFile.close()
        exit()

    def on_error(self, status):

        print (status)
        
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
start_time = time.time() #grabs the system time

        
twitterStream = Stream(auth, listener(start_time, time_limit=20)) 
API = tweepy.API(auth)

API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)
twitterStream.filter(track=['API.reverse_geocode'], async=True)


import pymongo
from pymongo import MongoClient
import json

import pymongo

from pymongo import MongoClient

import json

​

# Connection to Mongo DB

try:

    conn=pymongo.MongoClient()

    print ("Connected successfully!!!")

​

except pymongo.errors.ConnectionFailure as e:

        print ("Could not connect to MongoDB: %s") % e 

        conn

output- Connected successfully!!!


class listener(StreamListener):
    def __init__(self, start_time, time_limit=60):
        self.time = start_time
        self.limit = time_limit
        
    def on_data(self, data):
        
        while (time.time() - self.time) < self.limit:
            
            try:
                client = MongoClient('localhost', 27017)
                db = client['happycity_db']
                collection = db['happycity_collection']
                tweet = json.loads(data)
                collection.insert(tweet)
                
                return True
            
            except BaseException as e:
                print ('failed ondata,', str(e))
                time.sleep(5)
                pass
            exit()
            
            def on_error(self, status):
                print (status)
    
    
    collection.find_one()
    print (collection)
    collection.count()

print("Tweets collected: %s" % StreamListener.counter)
print("Total tweets in collection: %s" % collection.count())

Error- NameError Traceback (most recent call last)
in ()
----> 1 collection.find_one()
2
3 print (collection)
4
5 collection.count()

NameError: name 'collection' is not defined


#2

For this line, in the MongoDB db does this have to be already defined in order to insert? Does the error give a line number. It will tell you if collection is empty of if collection.insert is the issue here.