diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9905f81 --- /dev/null +++ b/.gitignore @@ -0,0 +1,141 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# VSCode settings +.vscode \ No newline at end of file diff --git a/samples/Test_Procedures.py b/samples/Test_Procedures.py index 2ecaaea..1e43f7b 100644 --- a/samples/Test_Procedures.py +++ b/samples/Test_Procedures.py @@ -1,11 +1,16 @@ +import os import csv +import functools + import pandas as pd import numpy as np + # Decorator for printing function results. We return a results value to enable # automated testing of the methods upon refactoring. def output_decorator(func): + @functools.wraps(func) def inner(*args, **kwargs): print(f'{func.__name__} is now started') t = func(*args, **kwargs) @@ -14,48 +19,64 @@ def inner(*args, **kwargs): return t["results"] return inner - class Test_1_Procedures: - + + def __init__( + self, + general_ledger_df, + log_file_df, + output_folder='Output_Folder' + ): + self.general_ledger_df = general_ledger_df + self.log_file_df = log_file_df + self.output_folder = output_folder + # Creates output directory unless already created + os.makedirs(output_folder, exist_ok=True) + # 3.1.1 - Test 1.1 Check for gaps in journal entry numbers # This method assumes JE's are already sorted in ascending order - @output_decorator - def check_for_gaps_in_JE_ID(GL_Detail, - Journal_ID_Column = 'Journal_ID', - output_file = 'Output_Folder/Test_3_1_1_check_for_gaps_in_JE_ID.csv'): + def check_for_gaps_in_JE_ID( + self, + Journal_ID_Column = 'Journal_ID', + output_file = 'Test_3_1_1_check_for_gaps_in_JE_ID.csv' + ): gaps = [] previous = None - + output_path = f'{self.output_folder}/{output_file}' + # Loop through each Journal ID, compare to previous - for item in GL_Detail[Journal_ID_Column]: + for item in self.general_ledger_df[Journal_ID_Column]: if previous and (item - previous > 1): gaps.append([previous, item]) - previous = item - + previous = item + # Write results to the output csv file, set output_file = None for no # output_file. if output_file: - with open(output_file, 'w') as file: + with open(output_path, 'w') as file: writer = csv.writer(file) writer.writerow([f'Gap identified! Start gap number is followed by end gap number']) writer.writerows(gaps) - writer.writerow(['Test Results:']) + writer.writerow(['Test Results:']) writer.writerow([f'Total of {len(gaps)} gaps found']) - - return ({"results":len(gaps), "output":output_file}) + return ({"results":len(gaps), "output":output_file}) # 3.1.2 Compare listing of journal entry numbers from system to log file @output_decorator - def comparison_of_entries_of_GL_and_log_file(GL_Detail_YYYYMMDD_YYYYMMDD, - Log_File_YYYYMMDD_YYYYMMDD, output_file = "Output_Folder/Test_3_1_2_Comparison_of_Entries_of_GL_and_Log_File.csv"): - - In_GL_not_in_LOG = set(GL_Detail_YYYYMMDD_YYYYMMDD['Journal_ID']) - set(Log_File_YYYYMMDD_YYYYMMDD['Journal_ID']) - In_LOG_not_in_GL = set(Log_File_YYYYMMDD_YYYYMMDD['Journal_ID']) - set(GL_Detail_YYYYMMDD_YYYYMMDD['Journal_ID']) - + def comparison_of_entries_of_GL_and_log_file( + self, + output_file = "Test_3_1_2_Comparison_of_Entries_of_GL_and_Log_File.csv" + ): + output_path = f'{self.output_folder}/{output_file}' + In_GL_not_in_LOG = set(self.general_ledger_df['Journal_ID']) - \ + set(self.log_file_df['Journal_ID']) + In_LOG_not_in_GL = set(self.log_file_df['Journal_ID']) - \ + set(self.general_ledger_df['Journal_ID']) + if output_file: - with open(output_file, 'w') as file: + with open(output_path, 'w') as file: writer = csv.writer(file) writer.writerow(['Following %a journal entries exist in General Ledger, but missing from the Log File:' %(len(In_GL_not_in_LOG))]) @@ -64,49 +85,52 @@ def comparison_of_entries_of_GL_and_log_file(GL_Detail_YYYYMMDD_YYYYMMDD, writer.writerow(['Amounts of following %a journal entries do not match their amounts in Log File:' %(len(In_LOG_not_in_GL))]) writer.writerow(list(In_LOG_not_in_GL)) + return ({"results": (len(In_LOG_not_in_GL) + len(In_GL_not_in_LOG)), "output": output_file}) # 3.1.3 Test 1.3 Compare total debit amounts and credit amounts of journal entries to system control totals by entry type - def comparison_of_amounts_of_GL_and_log_file(GL_Detail_YYYYMMDD_YYYYMMDD, Log_File_YYYYMMDD_YYYYMMDD): - - gl_totals_pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['Journal_ID', 'Amount_Credit_Debit_Indicator'], - values='Net', - aggfunc=sum).reset_index() - recon_gl_to_log = gl_totals_pivot.merge(Log_File_YYYYMMDD_YYYYMMDD, on = ['Journal_ID', 'Amount_Credit_Debit_Indicator'], + def comparison_of_amounts_of_GL_and_log_file(self): + + gl_totals_pivot = self.general_ledger_df.pivot_table( + index=['Journal_ID', 'Amount_Credit_Debit_Indicator'], + values='Net', + aggfunc=sum + ).reset_index() + recon_gl_to_log = gl_totals_pivot.merge(self.log_file_df, on = ['Journal_ID', 'Amount_Credit_Debit_Indicator'], how = 'outer').fillna(0) recon_gl_to_log['Comparison'] = round(abs(recon_gl_to_log['Net']), 2) - round(abs(recon_gl_to_log['Total']), 2) recon_gl_to_log = recon_gl_to_log.drop('Entered_Date', axis=1) recon_gl_to_log = recon_gl_to_log.drop('Entered_Time', axis=1) failed_test = recon_gl_to_log.loc[recon_gl_to_log['Comparison'] != 0] - + if output_file: failed_test.to_csv('Output_Folder/Test_3_1_3_comparison_of_amounts_of_GL_and_log_file.csv') - + return ({"results": len(In_LOG_not_in_GL), "output": output_file}) -class Test_2_Procedures: +class Test_2_Procedures: # 3.2.1 - Examine population for missing or incomplete journal entries # Pivot by Journal_ID and make sure Net is 0 for each Journal ID, to check if debits and credits are equal for each entry - def check_for_incomplete_entries(GL_Detail_YYYYMMDD_YYYYMMDD, + def check_for_incomplete_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD, output_file='', Journal_ID_Column = 'Journal_ID'): - + GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=Journal_ID_Column, values='Net', aggfunc=sum) failed_test = GL_Pivot.loc[round(GL_Pivot['Net'], 2) != 0] failed_test = pd.DataFrame(failed_test.to_records()) - + if output_file: failed_test.to_csv('Output_Folder/Test_3_2_1_check_for_incomplete_entries.csv') - + return ({"results": len(failed_test[Journal_ID_Column]), "output": output_file}) # 3.2.2 - Examine possible duplicate account entries # Check for Journal Entries that have same account and amount in the same period - def check_for_duplicate_entries(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_duplicate_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Duplicate Entries is started') import pandas as pd import numpy as np - GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['GL_Account_Number', 'Period', 'Net'], + GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['GL_Account_Number', 'Period', 'Net'], values='Journal_ID', aggfunc= np.count_nonzero) GL_Pivot.columns = ['Journal_Entry_Count'] Duplicates = GL_Pivot.loc[GL_Pivot['Journal_Entry_Count'] != 1] @@ -120,11 +144,11 @@ def check_for_duplicate_entries(GL_Detail_YYYYMMDD_YYYYMMDD): #3.2.3 - Examine round-dollar entries # Devide Amounts by 1000 and look for remainder of 0 to check for journal entries with exact amounts in '000s - def check_for_round_dollar_entries(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_round_dollar_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Round Dollar Entries is started') GL_Copy = GL_Detail_YYYYMMDD_YYYYMMDD[['Journal_ID', 'GL_Account_Number', 'Period', 'Net']].copy() GL_Copy['1000s Remainder'] = GL_Copy['Net'] % 1000 - failed_test = GL_Copy.loc[GL_Copy['1000s Remainder'] == 0] + failed_test = GL_Copy.loc[GL_Copy['1000s Remainder'] == 0] failed_test.to_csv('Output_Folder/Test_3_2_3_check_for_round_dollar_entries.csv') print('%d instances detected' %len(failed_test['Journal_ID'])) print('Results saved at Output_Folder/Test_3_2_3_check_for_round_dollar_entries.csv') @@ -134,7 +158,7 @@ def check_for_round_dollar_entries(GL_Detail_YYYYMMDD_YYYYMMDD): #Check if Document Date was later than Entry Date #Document_Date does not appear in Data Standards #optimize&clarify - def check_for_post_date_entries(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_post_date_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Post Date Entries is started') GL_Copy = GL_Detail_YYYYMMDD_YYYYMMDD[['Journal_ID', 'Document_Date', 'Entered_Date', 'Period', 'Net']].copy() failed_test = GL_Copy.loc[GL_Copy['Document_Date'] > (GL_Copy['Entered_Date'] + 100)] #optimize&"accurify" @@ -146,12 +170,12 @@ def check_for_post_date_entries(GL_Detail_YYYYMMDD_YYYYMMDD): #3.2.5 - Examine entries posted on weekends/nights # Check if Entry Date falls on Saturday or Sunday - def check_for_weekend_entries(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_weekend_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Weekend Entries is started') from datetime import datetime import pandas as pd GL_Copy = GL_Detail_YYYYMMDD_YYYYMMDD[['Journal_ID', 'Entered_Date', 'Entered_Time']].copy() - GL_Copy['Entry_Date_Time_Formatted'] = pd.to_datetime(GL_Copy['Entered_Date'].astype(str) + + GL_Copy['Entry_Date_Time_Formatted'] = pd.to_datetime(GL_Copy['Entered_Date'].astype(str) + GL_Copy['Entered_Time'].astype(str), format='%Y%m%d%H%M%S') GL_Copy['WeekDayNo'] = GL_Copy['Entry_Date_Time_Formatted'].apply(lambda x: x.isoweekday()) failed_test = GL_Copy.loc[GL_Copy['WeekDayNo'] >= 6] @@ -160,11 +184,11 @@ def check_for_weekend_entries(GL_Detail_YYYYMMDD_YYYYMMDD): print('Results saved at Output_Folder/Test_3_2_5.1_check_for_weekend_entries.csv') # Check if Entry Time falls on between 8pm and 6am - def check_for_nights_entries(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_nights_entries(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Night Entries is started') from datetime import datetime GL_Copy = GL_Detail_YYYYMMDD_YYYYMMDD[['Journal_ID', 'Entered_Date', 'Entered_Time']].copy() - GL_Copy['Entry_Date_Time_Formatted'] = pd.to_datetime(GL_Copy['Entered_Date'].astype(str) + + GL_Copy['Entry_Date_Time_Formatted'] = pd.to_datetime(GL_Copy['Entered_Date'].astype(str) + GL_Copy['Entered_Time'].astype(str), format='%Y%m%d%H%M%S') GL_Copy['Hour'] = GL_Copy['Entry_Date_Time_Formatted'].dt.hour failed_test = GL_Copy.loc[(GL_Copy['Hour'] >= 20) | (GL_Copy['Hour'] <= 5)] @@ -173,14 +197,14 @@ def check_for_nights_entries(GL_Detail_YYYYMMDD_YYYYMMDD): print('Results saved at Output_Folder/Test_3_2_5.2_check_for_nights_entries.csv') - #3.2.6 - Summarize by person, type and period in order to identify individuals who normally do not post entries, + #3.2.6 - Summarize by person, type and period in order to identify individuals who normally do not post entries, #and to identify accounts that are normally not used. #Check for individuals who posted 10 or fewer entries and identify entries made by these individuals - def check_for_rare_users(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_rare_users(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Rare Users is started') - GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['Entered_By'], values='Journal_ID', + GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['Entered_By'], values='Journal_ID', aggfunc=np.count_nonzero).fillna(0) Rare_Users = GL_Pivot.loc[GL_Pivot['Journal_ID'] <= 10] Rare_Users = pd.DataFrame(Rare_Users.to_records()) @@ -191,10 +215,10 @@ def check_for_rare_users(GL_Detail_YYYYMMDD_YYYYMMDD): print('Results saved at Output_Folder/Test_3_2_6.1_check_for_rare_users.csv') # Check for accounts that were used 3 or fewer times and identify entries made to these accounts - def check_for_rare_accounts(GL_Detail_YYYYMMDD_YYYYMMDD): + def check_for_rare_accounts(self, GL_Detail_YYYYMMDD_YYYYMMDD): print('Checking for Rare Accounts is started') - GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['GL_Account_Number'], values='Journal_ID', + GL_Pivot = GL_Detail_YYYYMMDD_YYYYMMDD.pivot_table(index=['GL_Account_Number'], values='Journal_ID', aggfunc=np.count_nonzero).fillna(0) Rare_Accounts = GL_Pivot.loc[GL_Pivot['Journal_ID'] <= 3] Rare_Accounts = pd.DataFrame(Rare_Accounts.to_records()) diff --git a/samples/__pycache__/Test_Procedures.cpython-38.pyc b/samples/__pycache__/Test_Procedures.cpython-38.pyc deleted file mode 100644 index c6206da..0000000 Binary files a/samples/__pycache__/Test_Procedures.cpython-38.pyc and /dev/null differ diff --git a/samples/journal_entry_testing_final.ipynb b/samples/journal_entry_testing_final.ipynb index a0763cc..807fbdd 100644 --- a/samples/journal_entry_testing_final.ipynb +++ b/samples/journal_entry_testing_final.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -61,82 +61,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [ { + "output_type": "execute_result", "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Journal_IDAmount_Credit_Debit_IndicatorTotalEntered_DateEntered_Time
0100000000H9,770.5220070122101205
1100000000S9,770.5220070122101205
2100000001H5,875.2020070122101206
3100000001S5,875.2020070122101206
4100000002H244.8020070122101206
\n", - "
" - ], "text/plain": [ " Journal_ID Amount_Credit_Debit_Indicator Total Entered_Date \\\n", "0 100000000 H 9,770.52 20070122 \n", @@ -151,11 +81,11 @@ "2 101206 \n", "3 101206 \n", "4 101206 " - ] + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Journal_IDAmount_Credit_Debit_IndicatorTotalEntered_DateEntered_Time
0100000000H9,770.5220070122101205
1100000000S9,770.5220070122101205
2100000001H5,875.2020070122101206
3100000001S5,875.2020070122101206
4100000002H244.8020070122101206
\n
" }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ @@ -164,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "subslide" @@ -187,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -203,59 +133,57 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", - "text": [ - "Checking for gaps in Journal Entry IDs is started\n", - "12 instances detected\n", - "Results saved at Output_Folder/Test_3_1_1_check_for_gaps_in_JE_ID.csv\n" - ] - } - ], - "source": [ - "Test_1_Procedures.check_for_gaps_in_JE_ID(GL_Detail_20070101_200701231)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { "name": "stdout", - "output_type": "stream", "text": [ - "Comparison of entries in General Ledger and Log File is for gaps in Journal Entry IDs is started\n", - "0 instances detected\n", - "Results saved at Output_Folder/Test_3_1_2_Comparison_of_Entries_of_GL_and_Log_File.csv\n" + "check_for_gaps_in_JE_ID is now started\n12 instances detected\nResults saved at Test_3_1_1_check_for_gaps_in_JE_ID.csv\n" ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "12" + ] + }, + "metadata": {}, + "execution_count": 16 } ], "source": [ - "Test_1_Procedures.comparison_of_entries_of_GL_and_log_file(GL_Detail_20070101_200701231, Log_File_20070101_200701231)" + "first_tester = Test_1_Procedures(GL_Detail_20070101_200701231, Log_File_20070101_200701231)\n", + "first_tester.check_for_gaps_in_JE_ID()" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 21, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Comparison of entries in General Ledger and Log File is for gaps in Journal Entry IDs is started\n", - "0 instances detected\n", - "Results saved at Output_Folder/Test_3_1_2_Comparison_of_Entries_of_GL_and_Log_File.csv\n" + "comparison_of_entries_of_GL_and_log_file is now started\n0 instances detected\nResults saved at Test_3_1_2_Comparison_of_Entries_of_GL_and_Log_File.csv\n" ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 21 } ], "source": [ - "Test_1_Procedures.comparison_of_entries_of_GL_and_log_file(GL_Detail_20070101_200701231, Log_File_20070101_200701231)" + "first_tester.comparison_of_entries_of_GL_and_log_file()" ] }, { @@ -267,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -452,9 +380,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.5 64-bit ('venv': venv)", "language": "python", - "name": "python3" + "name": "python385jvsc74a57bd0e05b5a7c51a49927f4a46b496f45b922e1e4497c2b769e887a16580e2cea8557" }, "language_info": { "codemirror_mode": { @@ -466,9 +394,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file