Hello, I am new to coding and trying to develop a Twitter application to stream tweets into a Mongo DB database.
I think I’m nearly there, but when I try to find my ‘collection’ in the database I get a ‘Name Error- ‘collection’ not defined’- have I created the collection properly?
Full code is below:
import numpy as np
import pandas as pd
import time
import math
import os
import sys
from geopy import geocoders
import time
import os
import sys
import matplotlib.pyplot as plt
%matplotlib inline
import ipywidgets as wgt
from IPython.display import display
from sklearn.feature_extraction.text import CountVectorizer
import re
from datetime import datetime
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
api_key = "" # <---- Add your API Key
api_secret = "" # <---- Add your API Secret
access_token = "" # <---- Add your access token
access_token_secret = "" # <---- Add your access token secret
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print (status)
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
start_time = time.time() #grabs the system time
twitterStream = Stream(auth, listener(start_time, time_limit=20))
API = tweepy.API(auth)
API.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)
twitterStream.filter(track=['API.reverse_geocode'], async=True)
import pymongo
from pymongo import MongoClient
import json
import pymongo
from pymongo import MongoClient
import json
# Connection to Mongo DB
try:
conn=pymongo.MongoClient()
print ("Connected successfully!!!")
except pymongo.errors.ConnectionFailure as e:
print ("Could not connect to MongoDB: %s") % e
conn
output- Connected successfully!!!
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
client = MongoClient('localhost', 27017)
db = client['happycity_db']
collection = db['happycity_collection']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print ('failed ondata,', str(e))
time.sleep(5)
pass
exit()
def on_error(self, status):
print (status)
collection.find_one()
print (collection)
collection.count()
print("Tweets collected: %s" % StreamListener.counter)
print("Total tweets in collection: %s" % collection.count())
Error- NameError Traceback (most recent call last)
in ()
----> 1 collection.find_one()
2
3 print (collection)
4
5 collection.count()
NameError: name ‘collection’ is not defined