Shume shume keq data input. Done!

This commit is contained in:
2022-09-12 16:37:18 +02:00
parent b7c71eee3d
commit 991170c510
8 changed files with 33 additions and 13 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -3,6 +3,7 @@ from os import listdir
from os.path import isfile, join
from docx.api import Document
from enum import Enum
from datetime import datetime
class Klinika(Enum):
@@ -12,7 +13,7 @@ class Klinika(Enum):
def convert(klinika: str):
print(klinika)
print(f"Filloi {klinika} me {datetime.now()}")
columns = []
filtered_columns = []
docx_path = './Word/'
@@ -26,6 +27,14 @@ def convert(klinika: str):
for path in list_paths:
document = Document(path)
for table in document.tables:
if "urgjenca_2016.docx" in path:
grid = table._tbl.find("w:tblGrid", table._tbl.nsmap)
for cell in table.column_cells(2):
cell._tc.getparent().remove(cell._tc)
col_elem = grid[2]
grid.remove(col_elem)
print(f"U FSHI PAVIONI")
for cell in table.rows[0].cells:
columns.append(cell.text.strip().replace('\n', ' '))
RowA = table.rows[0]
@@ -34,16 +43,17 @@ def convert(klinika: str):
for row in table.rows:
text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True)
print(f'{path} Done')
print(f"Mbaroi {path} me {datetime.now()}")
for word in columns:
if word not in filtered_columns:
filtered_columns.append(word)
print(f'{len(filtered_columns)} Columns found')
print('writing to excel...')
print(f"Perfundoi {klinika} me {datetime.now()}")
df.columns = filtered_columns
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
df.to_excel(excel_path, index=False)
convert(Klinika.OBS.value)
convert(Klinika.URGJENCA.value)

View File

@@ -37,7 +37,7 @@ def konverto_data(datat: str, i: int):
# i +=1
datat_pattern = "%d/%m/%y"
try:
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94":
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94" or datat == "20/0/81" or "Colicaabdpd" in datat or "2379/11" in datat or "2710/44" in datat or "31/4/56" in datat or "11/1/199" in datat or "27/7/889" in datat or "31/9/84" in datat:
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
df.iat[i, df.columns.get_loc('ERROR')] = datat
return ""
@@ -53,7 +53,6 @@ def konverto_data(datat: str, i: int):
return "DEKLARUAR"
if datat.endswith('/'):
datat = datat[:-1]
print(datat)
datat = datat.split('/')
if len(datat) == 1:
datat.insert(0, '1')
@@ -65,14 +64,17 @@ def konverto_data(datat: str, i: int):
if (len(datat)) > 2:
if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
datat[2] = f"0{datat[2]}"
if int(datat[2]) > 99:
datat_pattern = "%d/%m/%Y"
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
elif int(datat[2]) > 19 and int(datat[2]) < 100:
datat[2] = f"19{datat[2]}"
datat_pattern = "%d/%m/%Y"
else:
datat_pattern = "%d/%m/%y"
try:
if int(datat[2]) > 99:
datat_pattern = "%d/%m/%Y"
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
elif int(datat[2]) > 19 and int(datat[2]) < 100:
datat[2] = f"19{datat[2]}"
datat_pattern = "%d/%m/%Y"
else:
datat_pattern = "%d/%m/%y"
except:
print(i, datat)
datat = "/".join(datat)
try:
datat = datetime.strptime(datat, datat_pattern)
@@ -92,6 +94,7 @@ def pastro_string(datat: str, i: int):
datat = datat.replace(" ", "")
datat = datat.replace(".", "/")
datat = datat.replace("'", "")
datat = konverto_data(datat, i)
return datat
@@ -120,11 +123,18 @@ for index, row in df.iterrows():
datelindja_korigj.append(datelindja)
i += 1
df.insert(len(df.columns), "DT_SHTRIMI_KORIGJ", data_shtri_korigj)
df.insert(len(df.columns), "DATELINDJA_KORIGJ", datelindja_korigj)
df.drop(columns=df.columns[0], axis=1, inplace=True)
df.drop(columns=df.columns[0], axis=1, inplace=True)
cols = df.columns.tolist()
index = cols.index('MOSHA')
del cols[index]
cols.insert(3,'MOSHA')
df = df[cols]
current_GMT = time.gmtime()
ts = calendar.timegm(current_GMT)