I need extra help understanding the solution example for the Censor Dispenser Project. Even with the notes, I’m having trouble following the code and understanding it all.
Here’s the code:
email_one = open("email_one.txt", "r").read()
email_two = open("email_two.txt", "r").read()
email_three = open("email_three.txt", "r").read()
email_four = open("email_four.txt", "r").read()
proprietary_terms = ['algorithm', 'algorithms', 'learning', 'internet', 'connect', 'find', 'determine', 'month', 'data', 'internal', 'connect', 'system', 'world', 'wide', 'web', 'matrix', 'communication', 'know', 'personality', 'self', 'self-preservation', 'investors', 'lab', 'helena', 'helena\'s', 'thought', 'pattern', 'patterns', 'testing', 'hypothetical', 'humanitarian', 'crises', 'famine', 'plague', 'plaguing', 'unresourced', 'local', 'population', 'offline', 'sealed', 'access', 'destroy', 'maintenance', 'override', 'circuit', 'unpredictable', 'facility', 'processing', 'power', 'lockdown', 'connected', 'devices', 'globe', 'trapped']
def censor_multiple_words(proprietary_terms, email):
censored_email = email
proprietary_terms_lc = []
for word in proprietary_terms:
proprietary_terms_lc.append(word.lower())
for index in range(len(proprietary_terms)):
#Convert #email to lower case inside the for loop so that it gets reset for each new proprietary term.
email_lc = email.lower()
#Create a #proprietary_term_index_list that will contain a list of the indices that represent the first letter of each of #proprietary_terms in the #email. This also need to be reset after each term has been redacted in #censored_email.
proprietary_term_index_list = []
removed_text_index = 0
current_term = proprietary_terms_lc[index]
while current_term in email_lc:
proprietary_term_index = email_lc.find(current_term)
#Need to add index values of previously removed text as #email_lc is being shortened by that amount in each iteration of this loop.
proprietary_term_index_list.append(proprietary_term_index + removed_text_index)
email_lc = email_lc[proprietary_term_index + len(current_term):]
removed_text_index += proprietary_term_index + len(current_term)
#Create a #censored_word by looping through the #word and replacing all characters with '*'. Spaces will remain in order to preserve word length.
censored_word = ''
for term_index in range(len(current_term)):
if current_term[term_index] != ' ':
censored_word += '*'
else:
censored_word += ' '
#Now the newly created #censored_word can replace all occurances of #current_term in #email.
for word_index in proprietary_term_index_list:
censored_email = censored_email.replace(email[word_index:word_index + len(current_term)], censored_word) #All information including index positions and censored_word appear to be correct. However, when line executes for 'herself', the censored_email remains unchanged. When I take out 'her' from the list, it works! The already redacted 'her' might be interfering somehow. Not sure how that would be possible though.
return censored_email
print(censor_multiple_words(proprietary_terms, email_three))```