Color
TQDM progress bars
RICH menus etc
PANDOC latex to PDF
Multi IO handling
https://docs.python.org/3/library/concurrent.futures.html
import time
import requests
import concurrent.futures
def get_wiki_page_existence(wiki_page_url, timeout=10):
response = requests.get(url=wiki_page_url, timeout=timeout)
page_status = "unknown"
if response.status_code == 200:
page_status = "exists"
elif response.status_code == 404:
page_status = "does not exist"
time.sleep(3)
return wiki_page_url + " - " + page_status
wiki_page_urls = ["https://en.wikipedia.org/wiki/" + str(i) for i in range(50)]
print("Running threaded:")
threaded_start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for url in wiki_page_urls:
futures.append(executor.submit(get_wiki_page_existence, wiki_page_url=url))
count = 1
for future in concurrent.futures.as_completed(futures):
print(future.result())
print(count)
count += 1
print("Threaded time:", time.time() - threaded_start)
Multi Core Processing
import time
import multiprocessing as mp
def multiprocessing_func(x):
time.sleep(3)
print('{} done'.format(x))
if __name__ == '__main__':
cpu_c = mp.cpu_count()
starttime = time.time()
pool = mp.Pool()
list_of_lists = [i for i in range(0,cpu_c)]
pool.map(multiprocessing_func, list_of_lists)
print("next")
pool.map(multiprocessing_func, list_of_lists)
pool.close()
print('That took {} seconds'.format(time.time() - starttime))
Compare
import concurrent.futures
import datetime
import hashlib
import multiprocessing as mp
import os
import random
import shutil
import time
PATH = r'F:\Audio\processed'
def create_checksum(file_path):
hash = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash.update(chunk)
return(hash.hexdigest())
def hash_file(file_path):
#time.sleep(3)
return(file_path, create_checksum(file_path))
def create_chunks(list_name, n):
for i in range(0, len(list_name), n):
yield list_name[i:i + n]
if __name__ == "__main__":
begin_time = datetime.datetime.now()
print(begin_time)
all_paths = {}
count = 0
for root, dirs, files in os.walk(PATH):
for file in files:
all_paths[os.path.join(root, file)] = True
if len(all_paths) > 1000:
break
else:
continue
break
print(len(all_paths))
done_time = datetime.datetime.now()
print(done_time)
_start = time.time()
all_hashed = {}
for file_p in all_paths:
filepath, hash_of_file = hash_file(file_p)
all_hashed[filepath] = hash_of_file
#print(count)
count += 1
print("Loop took: {}".format(time.time() - _start))
# print(all_hashed)
confirmation = ''
while confirmation not in ['Y', 'N']:
confirmation = input('Continue? (Y/N): ').upper()
if confirmation == 'N':
sys.exit()
_start = time.time()
all_hashed = {}
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for filepath in all_paths:
futures.append(executor.submit(hash_file, file_path=filepath))
count = 1
for future in concurrent.futures.as_completed(futures):
filepath, hash_of_file = future.result()
all_hashed[filepath] = hash_of_file
#print(count)
count += 1
print("ThreadPoolExecutor took: {}".format(time.time() - _start))
# print(all_hashed)
confirmation = ''
while confirmation not in ['Y', 'N']:
confirmation = input('Continue? (Y/N): ').upper()
if confirmation == 'N':
sys.exit()
_start = time.time()
cpu_c = mp.cpu_count()
pool = mp.Pool()
all_hashed = {}
for list_of_files in create_chunks(list(all_paths), cpu_c):
#print(len(list_of_files))
#print(cpu_c)
pool.map(hash_file, list_of_files)
hashes = pool.map(hash_file, list_of_files)
for file_path, hash_of_file in hashes:
all_hashed[file_path] = hash_of_file
#print(count)
#count += 1
pool.close()
print("Multiprocessing took: {}".format(time.time() - _start))
#print(all_hashed)