Hi,
I am new here and new to pyinstaller.
My team and I are working on a school project that sends requests to the
openAI API. The Python script works perfectly in Python itself but when I
build it as an exe using pyinstaller it makes more instances of itself and
crashes our rate limit. The code still works as intended on a single script
level but each subsequent script tries to overwrite the last. We are also
compiling this build on macOS Arm architecture.
Here is the process we do:
pyinstaller --onefile GPTScanner.py
pyinstaller GPTScanner.spec (listed below)
chmod +x GPTScanner
./GPTScanner
Thanks for the help!
Source code:
import os
import sys
import time
import portalocker
from openai import OpenAI
import extract_msg
import csv
from tqdm import tqdm
import numpy as np
import logging
# Configure logging
logging.basicConfig(filename='email_classification.log', level=logging.INFO,
format='%(asctime)s %(levelname)s:%(message)s')
def ensure_downloaded_emails_dir(script_dir):
folder_path = os.path.join(script_dir, 'downloadedEmails')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return folder_path
def extract_msg_content(msg_path):
msg = extract_msg.Message(msg_path)
sender = msg.sender or "Unknown Sender"
subject = msg.subject or "No Subject"
body = msg.body or "No Body Content"
return f"Sender: {sender}\nSubject: {subject}\n\n{body}"
def read_txt_file(txt_path):
with open(txt_path, 'r', encoding='utf-8') as file:
content = file.read()
return content
def classify_email(email_content):
prompt = f"""
Classify the following email as valid or invalid based on the criteria
below:
Invalid:
- The sender is on break, returning soon, gone for a few days, on maternity
leave, on holiday or vacation.
- The email is spam or promotional.
- The email is not an autoreply.
- The email contains content involving business transactions or regular
correspondence.
- The email appears to be something random or entirely unrelated
Valid:
- The email states that the sender is permanently gone from the company.
- The email indicates the mailbox is no longer in use or active.
- The email specifies that the mailbox has transitioned to a new owner.
- The email mentions that the sender has switched companies or roles,
retired, or resigned.
- The email signifies the account has been deactivated, closed, disabled,
or is otherwise no longer operational.
Email:'''''
{email_content}
'''''
Classification:"""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an email classification assistant
that looks at the body of emails to determine if they fit a specific
criteria. You must always reconfirm your own reasoning when producing an
output. You only respond with the one word answers 'valid' or 'invalid'"},
{"role": "user", "content": prompt.strip()}
],
max_tokens=1,
temperature=0.0,
logprobs=True,
)
classification = response.choices[0].message.content.strip().lower()
logprobs = response.choices[0].logprobs.content
confidence = round(np.exp(logprobs[0].logprob) * 100, 2)
return classification, confidence
def process_files(folder_path, output_csv):
files = [f for f in os.listdir(folder_path) if f.endswith('.msg') or f
.endswith('.txt')]
total_files = len(files)
results = []
for filename in tqdm(files, desc="Processing files", unit="file"):
file_path = os.path.join(folder_path, filename)
if filename.endswith('.msg'):
email_content = extract_msg_content(file_path)
elif filename.endswith('.txt'):
email_content = read_txt_file(file_path)
else:
continue
classification, confidence = classify_email(email_content)
logging.info(f"The email '{filename}' is classified as: {classification}
with confidence: {confidence:.2f}%")
first_20_words = ' '.join(email_content.split()[:20])
results.append([filename, first_20_words, classification, confidence])
results.sort(key=lambda x: x[3], reverse=True)
total_confidence = np.mean([x[3] for x in results])
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['Filename', 'First 20 Words', 'Classification',
'Confidence
(%)'])
csv_writer.writerows(results)
csv_writer.writerow([])
csv_writer.writerow(['Total Accuracy', '', '', f'{total_confidence:.2f}%'])
def main():
logging.info("Script started.")
if getattr(sys, 'frozen', False):
script_dir = os.path.dirname(sys.executable)
else:
script_dir = os.path.dirname(os.path.abspath(__file__))
# Ensure single instance execution using a lock file
lock_file = os.path.join(script_dir, 'script.lock')
with open(lock_file, 'w') as fp:
try:
portalocker.lock(fp, portalocker.LOCK_EX | portalocker.LOCK_NB)
folder_path = ensure_downloaded_emails_dir(script_dir)
output_csv = os.path.join(script_dir, 'email_classification_results.csv')
process_files(folder_path, output_csv)
logging.info(f"Results have been saved to {output_csv}")
except portalocker.LockException:
logging.warning("Another instance of the script is already running.
Exiting.")
sys.exit(0)
except Exception as e:
logging.error(f"An error occurred: {e}")
finally:
portalocker.unlock(fp)
fp.close()
if os.path.exists(lock_file):
os.remove(lock_file)
logging.info("Script completed. Exiting.")
# Delay to prevent immediate restart
time.sleep(30)
if __name__ == "__main__":
main()
sys.exit()
GPTScanner.spec:
# -*- mode: python ; coding: utf-8 -*-
a = Analysis(
['GPTScanner.py'],
pathex=[],
binaries=[],
datas=[('downloadedEmails', 'downloadedEmails')],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='GPTScanner',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
--
You received this message because you are subscribed to the Google Groups
"PyInstaller" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/pyinstaller/bab20e10-bd99-4d15-bbec-b7d1c39bba9cn%40googlegroups.com.