Background
- ran an email campaign for a client, with 3.2k emails
- received 800+ emails back, including manual answers, Out Of Office & Undeliverable
Goal
- provide to client a list of email addresses to be flagged as "Undeliverable" (ie Do Not Email) in Hubspot
Steps
- run Imapee Imapee to sweep my email inbox for emails in "Undeliverable" emails. As the Imapee script was run multiple times, emails were log in
.txt
files in/log
folder - extract all emails logged by Imapee in a list of unique values (= set in Python)
- match emails with list of emails in client's Hubspot (to remove all emails collected that were not originally in the list)
- remove any email that was already flagged as "unsubsribe" in Hubspot (perhaps not necessary - there is some value in knowing an email is undeliverable vs a simple unsubscribe)
Script
# default boilerplate here. See /python/boilerplate
####################
# 220910 getting final list of all DNEs from xxxxxx campaign from log files
import csv
### set of emails from logs
set_emails_from_log = set()
count_files = 0
count_lines = 0
for root, dirs, files in os.walk("log"):
for name in files:
if name.endswith((".txt")):
count_files += 1
file_path = f"{root}/{name}"
with open(file_path, 'r') as df:
lines = df.readlines()
count_lines_in_file = 0
for line in lines:
count_lines += 1
count_lines_in_file += 1
if '\n' in line:
line = line.rstrip()
# print(repr(line))
if '@' in line: # basic but enough here - just taking care of removing any empty row
set_emails_from_log.add(line.lower().strip())
### DELETE EMPTY FILES / not necessary here, maintenance only
if count_lines_in_file == 0:
os.remove(file_path)
print(f"DELETED {file_path} with {count_lines_in_file} lines.")
### Check
# print(f"{file_path}:\t{count_lines_in_file} lines.")
# print(f"\n\nset_emails_from_log:")
# pp.pprint(set_emails_from_log)
### Match with existing XX contacts in Hubspot
csv_file = "data/220911_hubspot-crm-export_my-XX-contacts.csv"
count_row_csv = 0
count_unsubscribe = 0
all_hubspot_emails = []
all_hubspot_unsubscribe_emails = []
with open(csv_file, 'r', newline='', encoding='UTF-8') as h:
reader = csv.reader(h, delimiter=",")
next(reader)
data = list(reader)
for row in data:
count_row_csv += 1
email = row[4]
unsubscribe = row[5]
if '@' in email:
all_hubspot_emails.append(email.lower().strip())
if unsubscribe != '':
count_unsubscribe += 1
# print(f"{count_row_csv} = {email}: {unsubscribe}")
all_hubspot_unsubscribe_emails.append(email.lower().strip())
list_delta = [] # to collect emails not added to final list / for verification or else.
final_list_of_new_unsubscribe_emails = []
for email_found in set_emails_from_log:
if email_found in all_hubspot_emails:
if email_found not in all_hubspot_unsubscribe_emails:
final_list_of_new_unsubscribe_emails.append(email_found)
else:
list_delta.append(email_found)
# pp.pprint(final_list_of_new_unsubscribe_emails)
### write final file to share with client (CSV)
with open(f"data/{ts_file}_emails_to_unsubscribe_in_hubspot.csv", 'w', newline='', encoding='utf-8') as i:
writer = csv.writer(i, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
writer.writerow(['email']) # header row
for item in final_list_of_new_unsubscribe_emails:
writer.writerow([item]) # pass list for row, else iterates through string
########################################################################################################
# default boilerplate here. See /python/boilerplate
prints:
-------------------------------
220910_XX_list_dne.py
count_files = 12
count_lines = 72090
set_emails_from_log: 762
all_hubspot_emails: 3577
all_hubspot_unsubscribe_emails: 595
final_list_of_new_unsubscribe_emails: 487
-------------------------------
220910_XX_list_dne.py finished in 0.1s.
CSV list of 487 emails to mark as "Do Not Email" in their Hubspot provided to client.