Hi,
I am new here and new to pyinstaller. 

My team and I are working on a school project that sends requests to the 
openAI API. The Python script works perfectly in Python itself but when I 
build it as an exe using pyinstaller it makes more instances of itself and 
crashes our rate limit. The code still works as intended on a single script 
level but each subsequent script tries to overwrite the last. We are also 
compiling this build on macOS Arm architecture.

Here is the process we do:
pyinstaller --onefile GPTScanner.py

pyinstaller GPTScanner.spec  (listed below)

chmod +x GPTScanner

./GPTScanner 


Thanks for the help!


Source code:
 import os
import sys
import time
import portalocker
from openai import OpenAI
import extract_msg
import csv
from tqdm import tqdm
import numpy as np
import logging

# Configure logging
logging.basicConfig(filename='email_classification.log', level=logging.INFO, 

format='%(asctime)s %(levelname)s:%(message)s')

def ensure_downloaded_emails_dir(script_dir):
folder_path = os.path.join(script_dir, 'downloadedEmails')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return folder_path

def extract_msg_content(msg_path):
msg = extract_msg.Message(msg_path)
sender = msg.sender or "Unknown Sender"
subject = msg.subject or "No Subject"
body = msg.body or "No Body Content"
return f"Sender: {sender}\nSubject: {subject}\n\n{body}"

def read_txt_file(txt_path):
with open(txt_path, 'r', encoding='utf-8') as file:
content = file.read()
return content

def classify_email(email_content):
prompt = f"""
Classify the following email as valid or invalid based on the criteria 
below:

Invalid:
- The sender is on break, returning soon, gone for a few days, on maternity 
leave, on holiday or vacation.
- The email is spam or promotional.
- The email is not an autoreply.
- The email contains content involving business transactions or regular 
correspondence.
- The email appears to be something random or entirely unrelated

Valid:
- The email states that the sender is permanently gone from the company.
- The email indicates the mailbox is no longer in use or active.
- The email specifies that the mailbox has transitioned to a new owner.
- The email mentions that the sender has switched companies or roles, 
retired, or resigned.
- The email signifies the account has been deactivated, closed, disabled, 
or is otherwise no longer operational.

Email:'''''
{email_content}
'''''
Classification:"""
client = OpenAI()

response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an email classification assistant 
that looks at the body of emails to determine if they fit a specific 
criteria. You must always reconfirm your own reasoning when producing an 
output. You only respond with the one word answers 'valid' or 'invalid'"},
{"role": "user", "content": prompt.strip()}
],
max_tokens=1,
temperature=0.0,
logprobs=True,
)

classification = response.choices[0].message.content.strip().lower()
logprobs = response.choices[0].logprobs.content
confidence = round(np.exp(logprobs[0].logprob) * 100, 2)
return classification, confidence

def process_files(folder_path, output_csv):
files = [f for f in os.listdir(folder_path) if f.endswith('.msg') or f
.endswith('.txt')]
total_files = len(files)
results = []

for filename in tqdm(files, desc="Processing files", unit="file"):
file_path = os.path.join(folder_path, filename)
if filename.endswith('.msg'):
email_content = extract_msg_content(file_path)
elif filename.endswith('.txt'):
email_content = read_txt_file(file_path)
else:
continue

classification, confidence = classify_email(email_content)
logging.info(f"The email '{filename}' is classified as: {classification} 
with confidence: {confidence:.2f}%")
first_20_words = ' '.join(email_content.split()[:20])
results.append([filename, first_20_words, classification, confidence])

results.sort(key=lambda x: x[3], reverse=True)
total_confidence = np.mean([x[3] for x in results])

with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['Filename', 'First 20 Words', 'Classification', 
'Confidence 
(%)'])
csv_writer.writerows(results)
csv_writer.writerow([])
csv_writer.writerow(['Total Accuracy', '', '', f'{total_confidence:.2f}%'])

def main():
logging.info("Script started.")
if getattr(sys, 'frozen', False):
script_dir = os.path.dirname(sys.executable)
else:
script_dir = os.path.dirname(os.path.abspath(__file__))

# Ensure single instance execution using a lock file
lock_file = os.path.join(script_dir, 'script.lock')
with open(lock_file, 'w') as fp:
try:
portalocker.lock(fp, portalocker.LOCK_EX | portalocker.LOCK_NB)
folder_path = ensure_downloaded_emails_dir(script_dir)
output_csv = os.path.join(script_dir, 'email_classification_results.csv')
process_files(folder_path, output_csv)
logging.info(f"Results have been saved to {output_csv}")
except portalocker.LockException:
logging.warning("Another instance of the script is already running. 
Exiting.")
sys.exit(0)
except Exception as e:
logging.error(f"An error occurred: {e}")
finally:
portalocker.unlock(fp)
fp.close()
if os.path.exists(lock_file):
os.remove(lock_file)
logging.info("Script completed. Exiting.")
# Delay to prevent immediate restart
time.sleep(30)

if __name__ == "__main__":
main()
sys.exit()


GPTScanner.spec:
# -*- mode: python ; coding: utf-8 -*-

a = Analysis(
    ['GPTScanner.py'],
    pathex=[],
    binaries=[],
    datas=[('downloadedEmails', 'downloadedEmails')],
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    noarchive=False,
    optimize=0,
)
pyz = PYZ(a.pure)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.datas,
    [],
    name='GPTScanner',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)


-- 
You received this message because you are subscribed to the Google Groups 
"PyInstaller" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/pyinstaller/bab20e10-bd99-4d15-bbec-b7d1c39bba9cn%40googlegroups.com.

Reply via email to