10 Appendix
10.1 Python code
10.1.1 Webcollector Tool
86
87
current_top_level = row.split('\t')[0]
current_mid_level = 1 if row.startswith('■'):
row_list[row_list.index(row)] = row.replace('■',current_top_level + str(current_mid _level) + '.')
current_mid_level += 1 for row in row_list:
if row.split('\t')[0].count('.') == 3:
current_top_level = row.split('\t')[0]
current_mid_level = 1 if row.startswith('●'):
row_list[row_list.index(row)] = row.replace('●',current_top_level + str(current_mid _level) + '.')
current_mid_level += 1
#Een lijst van tuples van index (1.1.1.1) en naam ('Boeddhisme') cat_tuples = []
for row in row_list:
cat_tuples.append((row.split('\t')[0],(row.split('\t')[1]))) #Het omzetten van deze lijst in een dictionary met enkel de index cat_dict = {}
for cat in cat_tuples:
if cat[0].count('.') == 1:
cat_dict[cat[0]] = {}
if cat[0].count('.') == 2:
for key in cat_dict.keys():
if cat[0].startswith(key):
cat_dict[key][cat[0]] = {}
if cat[0].count('.') == 3:
for key_top, value in cat_dict.items():
for key_mid in value.keys():
if cat[0].startswith(key_mid):
cat_dict[key_top][key_mid][cat[0]] = {}
if cat[0].count('.') == 4:
for key_top, value_top in cat_dict.items():
for key_mid, value_mid in value_top.items():
for key_bottom in value_mid.keys():
if cat[0].startswith(key_bottom):
cat_dict[key_top][key_mid][key_bottom][cat[0]] = {}
#Het omzetten van de indexen in de namen van de stromingen/categorieën dict_to_string = str(cat_dict)
for cat in cat_tuples:
dict_to_string = dict_to_string.replace(f"'{cat[0]}'",f"\"{cat[1]}\"") final_cat_dict = json.loads(dict_to_string)
#Het omzetten van de dictionary naar de GUI, in de vorm van checkboxes in een tabblad.
#Hier wordt uitgegaan van een maximale 'diepte' van 4. (Bv Wereldreligie/Christendom/protes tantisme/evangelisme)
checkbox_value_list = []
for key_top, value_top in final_cat_dict.items():
value_top_frame = ttk.Frame(master=scrollbar_frame) value_top_frame.pack(side='top',anchor='nw')
chk_value_top_var = tk.BooleanVar()
chk_value_top = tk.Checkbutton(master=value_top_frame, variable=chk_value_top_var,offva lue=False,onvalue=True)
chk_value_top.pack(side='left')
value_top_label = ttk.Label(value_top_frame, text=key_top) value_top_label.pack(side='left')
checkbox_value_list.append([chk_value_top_var,key_top,chk_value_top]) for key_mid, value_mid in value_top.items():
value_mid_frame = ttk.Frame(master=scrollbar_frame) value_mid_frame.pack(side='top',anchor='nw',padx=(20,0))
88
chk_value_mid_var = tk.BooleanVar()
chk_value_mid = tk.Checkbutton(master=value_mid_frame, variable=chk_value_mid_var,o ffvalue=False,onvalue=True)
chk_value_mid.pack(side='left')
value_mid_label = ttk.Label(value_mid_frame, text=key_mid) value_mid_label.pack(side='left')
checkbox_value_list.append([chk_value_mid_var,f'{key_top}/{key_mid}',chk_value_mid]) for key_low, value_low in value_mid.items():
value_low_frame = ttk.Frame(master=scrollbar_frame) value_low_frame.pack(side='top',anchor='nw',padx=(40,0)) chk_value_low_var = tk.BooleanVar()
chk_value_low = tk.Checkbutton(master=value_low_frame, variable=chk_value_low_v ar,offvalue=False,onvalue=True)
chk_value_low.pack(side='left')
value_low_label = ttk.Label(value_low_frame, text=key_low) value_low_label.pack(side='left')
checkbox_value_list.append([chk_value_low_var,f'{key_top}/{key_mid}/{key_low}', chk_value_low])
for key_lowest, value_lowest in value_low.items():
value_lowest_frame = ttk.Frame(master=scrollbar_frame) value_lowest_frame.pack(side='top',anchor='nw',padx=(60,0)) chk_value_lowest_var = tk.BooleanVar()
chk_value_lowest = tk.Checkbutton(master=value_lowest_frame, variable=chk_v alue_lowest_var,offvalue=False,onvalue=True)
chk_value_lowest.pack(side='left')
value_lowest_label = ttk.Label(value_lowest_frame, text=key_lowest) value_lowest_label.pack(side='left')
checkbox_value_list.append([chk_value_lowest_var,f'{key_top}/{key_mid}/{key _low}/{key_lowest}',chk_value_lowest])
return final_cat_dict, checkbox_value_list
#Deze functie zorgt ervoor dat de checkboxes voor de categorieën respectievelijk aan of uitgekl ikt worden
#als een lagere of hogere categorie geselecteerd of gedeselecteerd wordt.
def de_check_parent_kid_boxes(checkbox_value, checkbox_value_list):
for checkbox_L1 in checkbox_value_list:
if checkbox_L1[0] == checkbox_value:
checked_box = checkbox_L1 break
if checkbox_L1[0].get() == True:
for checkbox_L2 in checkbox_value_list:
if checkbox_L2[1] in checkbox_L1[1]:
checkbox_L2[0].set(True) if checkbox_L1[0].get() == False:
for checkbox_L2 in checkbox_value_list:
if checkbox_L1[1] in checkbox_L2[1]:
checkbox_L2[0].set(False)
#Deze functie genereert een waarde voor de 'Name' column vanuit de URL. Moet zeer zeker aangepa st worden
def name_from_url(current_url):
if current_url.count('.') == 1:
return re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group().strip('.
').capitalize()
elif 'www' not in current_url:
if len(re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group()) < 3:
return re.search('\.[^\.]*', urllib.parse.urlparse(current_url).hostname).group().s trip('.').capitalize()
else:
return re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group().str ip('.').capitalize()
89
else:
return re.search('\.[^\.]*', urllib.parse.urlparse(current_url).hostname).group().strip ('.').capitalize()
def init_driver():
PATH = r'C:\Program Files\Google\Chromedriver\chromedriver.exe' global driver
driver = webdriver.Chrome(PATH)
#Functie voor als Auto Open aan staat (de site wordt in de Selenium browser geopend) def auto_open():
try:
if chk_auto_open_url_var.get():
driver.get(current_url) except:
pass
#Check voor lege waarden en returned een type waarbij een bepaalde functie geen error geeft def check_nat(value,_type):
if _type == "string":
if pd.isnull(value):
return '' else:
return value if _type == "bool":
if pd.isnull(value) or value == 'None':
return 'False' else:
return value if _type == "choice":
if pd.isnull(value):
return None else:
return value
def float_to_bool(val):
if val == 0.0 or val == '0.0':
return False
elif val == 1.0 or val == '1.0':
return True
def transform_nullstring_to_nat(value):
if value.replace(' ','').replace('\n','') == '':
return pd.NaT else:
return value.replace('\n','')
def return_emptied_dict_level(_dict,dict_path):
dict_empty = dpath.util.search(_dict,dict_path) dpath.util.set(dict_empty,dict_path,{})
return dpath.util.search(dict_empty,dict_path)
def transform_date(date):
if pd.isnull(date):
return date else:
if type(date) != str:
return str(date).replace('-','/').strip(' 00:00:00') else:
return date.replace('-','/').strip(' 00:00:00')
90
#Maakt alle velden leeg als een nieuwe row/site geladen wordt def clear_fields():
entry_name.delete(0, tk.END) entry_URL.delete(0, tk.END) chk_online_var.set(False) entry_Access.delete(0, tk.END) entry_lang.delete(0, tk.END) entry_slogan.delete(0, tk.END)
text_description_extern.delete('1.0', tk.END) text_description_intern.delete('1.0', tk.END) provincie_var.set(None)
entry_locatie.delete(0, tk.END) entry_Email.delete(0, tk.END) chk_selected_var.set(False) chk_noteworthy_var.set(False)
text_selection_reason.delete('1.0', tk.END) for meta_value in meta_types_values_list:
meta_value[0].set(False) for box in movements_checkboxes:
box[0].set(False) for box in repr_checkboxes:
box[0].set(False)
for box in goals_checkboxes:
box[0].set(False)
for box in functions_checkboxes:
box[0].set(False) for box in SM_checkboxes:
box[0].set(False)
Category_tabs.select(Tab_C2_Movement)
#Vul de categorie-checkboxes vanuit door de string die in de csv/excel is opgeslagen weer om te zetten in een
#dictionary, en die te lezen als een file tree def fill_category(df_and_column,row,checkboxes):
saved_categories = check_nat(df_and_column.iloc[row],"string") if saved_categories != '':
if not saved_categories[0] == '\'':
saved_categories = '\'' + saved_categories
string_dict = '{' + saved_categories.replace(' ',': {}').replace('""','"') + '}' for count, comma in enumerate(string_dict):
if comma == '\'':
if ((string_dict[count-1].isalpha() == True) and (string_dict[count+1].isalpha() == True)):
pass else:
split_string_dict = list(string_dict) split_string_dict[count] = '\"'
string_dict = ''.join(split_string_dict) saved_cat_dict = json.loads(string_dict)
else:
return
saved_cat_list = []
for key_top, value_top in saved_cat_dict.items():
path_top = key_top
saved_cat_list.append(path_top) try:
for key_mid, value_mid in value_top.items():
path_mid = f'{key_top}/{key_mid}' saved_cat_list.append(path_mid) try:
91
for key_low, value_low in value_mid.items():
path_low = f'{key_top}/{key_mid}/{key_low}' saved_cat_list.append(path_low)
try:
for key_lowest, value_lowest in value_low.items():
path_lowest = f'{key_top}/{key_mid}/{key_low}/{key_lowest}' saved_cat_list.append(path_lowest)
except:
pass except:
pass except:
pass
for category in checkboxes:
for saved_cat in saved_cat_list:
if category[1] == saved_cat:
category[0].set(True)
#Vul de velden in met de waarden opgeslagen in de csv/excel def fill_fields():
row = current_row
entry_name.insert(0,check_nat(dataset_pd['Name'].iloc[row],"string")) chk_online_var.set(check_nat(str(dataset_pd['Online'].iloc[row]),"bool")) entry_Access.insert(0,check_nat(dataset_pd['Access date'].iloc[row],"string")) entry_lang.insert(0, check_nat(dataset_pd['Language(s)'].iloc[row],"string")) entry_slogan.insert(0, check_nat(dataset_pd['Slogan'].iloc[row],"string"))
text_description_extern.insert("1.0",check_nat(dataset_pd['Description (external)'].iloc[ro w],"string"))
text_description_intern.insert("1.0",check_nat(dataset_pd['Comments (internal)'].iloc[row],
"string"))
provincie_var.set(check_nat(dataset_pd['Province'].iloc[row],"choice")) entry_locatie.insert(0,check_nat(dataset_pd['Location'].iloc[row],"string")) entry_Email.insert(0,check_nat(dataset_pd['Email address'].iloc[row],"string"))
chk_selected_var.set(check_nat(str(dataset_pd['Include in collection'].iloc[row]),"bool")) chk_noteworthy_var.set(check_nat(str(dataset_pd['Special interest'].iloc[row]),"bool")) text_selection_reason.insert("1.0",check_nat(dataset_pd['Reason for selection'].iloc[row],"
string")) try:
if ',' in check_nat(dataset_pd['C1: Metatype'].iloc[row],"string"):
meta_list = dataset_pd['C1: Metatype'].iloc[row].replace(' ','').replace('\'','').s plit(',')
else:
meta_list = [dataset_pd['C1: Metatype'].iloc[row].replace('\'','')]
for meta_token in meta_list:
for meta_check in meta_types_values_list:
if meta_token == meta_check[1]:
meta_check[0].set(True) except:
pass
fill_category(dataset_pd['C2: Stroming'],row,movements_checkboxes)
fill_category(dataset_pd['C3: Vertegenwoordigers of eigenaars'],row,repr_checkboxes) fill_category(dataset_pd['C4: Doelen'],row,goals_checkboxes)
fill_category(dataset_pd['C5: Functionaliteit'],row,functions_checkboxes) fill_category(dataset_pd['Social media'],row,SM_checkboxes)
#Sla de categorieën op als string. De dictionary structuur ': {}' wordt vervangen door een meer lezersvriendelijke
#dubbele spatie ' '. Dit moet onveranderd blijven, anders kan Python de waarden in de csv niet meer correct laden.
def save_category(_dict,checkboxes):
dict_to_save = {}
92
for category in checkboxes:
if category[0].get():
current_level = return_emptied_dict_level(_dict,category[1]) if dict_to_save == {}:
dict_to_save = current_level else:
dict_to_save = dpath.util.merge(dict_to_save,current_level) if dict_to_save == {}:
return pd.NaT else:
return str(dict_to_save).replace(': {}',' ')[1:-1]
#Sla alle ingevulde waarden op naar de Pandas dataframe (die later geëxporteerd wordt als csv) def save_values():
row = current_row
if save_AiWCT_values == True:
dataset_pd.loc[row,'Archived in WCT'] = archived_in_WCT if save_AiWCT_values == True:
dataset_pd.loc[row,'AiWCT Differing scheme'] = WCT_diff_scheme
dataset_pd.loc[row,'Name'] = transform_nullstring_to_nat(entry_name.get()) dataset_pd.loc[row,'URL'] = transform_nullstring_to_nat(entry_URL.get()) dataset_pd.loc[row,'Online'] = chk_online_var.get()
dataset_pd.loc[row,'Access date'] = transform_nullstring_to_nat(entry_Access.get())
dataset_pd.loc[row,'Language(s)'] = transform_nullstring_to_nat(entry_lang.get().replace('\
n',' ').strip(' '))
dataset_pd.loc[row,'Slogan'] = transform_nullstring_to_nat(entry_slogan.get().replace('\n', ' ').strip(' '))
dataset_pd.loc[row,'Description (external)'] = transform_nullstring_to_nat(text_description _extern.get("1.0",tk.END).replace('\n',' ').strip(' '))
dataset_pd.loc[row,'Comments (internal)'] = transform_nullstring_to_nat(text_description_in tern.get("1.0",tk.END).replace('\n',' ').strip(' '))
dataset_pd.loc[row,'Province'] = provincie_var.get()
dataset_pd.loc[row,'Location'] = transform_nullstring_to_nat(entry_locatie.get()) dataset_pd.loc[row,'Email address'] = transform_nullstring_to_nat(entry_Email.get()) dataset_pd.loc[row,'Include in collection'] = chk_selected_var.get()
dataset_pd.loc[row,'Special interest'] = chk_noteworthy_var.get()
dataset_pd.loc[row,'Reason for selection'] = transform_nullstring_to_nat(text_selection_rea son.get("1.0",tk.END).replace('\n',' ').strip(' '))
meta_value_final = []
for meta_value in meta_types_values_list:
if meta_value[0].get() == True:
meta_value_final.append(meta_value[1]) if meta_value_final == []:
dataset_pd.loc[row,'C1: Metatype'] = pd.NaT else:
dataset_pd.loc[row,'C1: Metatype'] = str(meta_value_final).strip('[]')
dataset_pd.loc[row,'C2: Stroming'] = save_category(movement_dict,movements_checkboxes) dataset_pd.loc[row,'C3: Vertegenwoordigers of eigenaars'] = save_category(repr_dict,repr_ch eckboxes)
dataset_pd.loc[row,'C4: Doelen'] = save_category(goals_dict,goals_checkboxes)
dataset_pd.loc[row,'C5: Functionaliteit'] = save_category(functions_dict,functions_checkbox es)
dataset_pd.loc[row,'Social media'] = save_category(SM_dict,SM_checkboxes)
#Laadt de dataset (csv/excel) als Pandas dataframe. Hier wordt een 'openen' window gebruikt def open_dataset():
global dataset_filepath
dataset_filepath = askopenfilename(filetypes=[("Excel file", "*.xlsx"),("Comma separated va lue", "*.csv"), ("All Files", "*.*")])
if not dataset_filepath:
return
93
file_name = (Path(dataset_filepath).stem) global dataset_pd
if dataset_filepath.endswith('.xlsx'):
dataset_pd = pd.read_excel(dataset_filepath) if dataset_filepath.endswith('.csv'):
dataset_pd = pd.read_csv(dataset_filepath,header=0) if 'Unnamed: 0' in dataset_pd.columns:
dataset_pd = dataset_pd.drop('Unnamed: 0',axis=1) #Bepaalde mogelijke transformaties van datatypes.
dataset_pd = dataset_pd.fillna(pd.NaT)
dataset_pd.loc[~dataset_pd['Description (external)'].isna(),'Description (external)'] = dat aset_pd.loc[~dataset_pd['Description (external)'].isna(),'Description (external)'].apply(lambda x: x.replace('\n','').strip('\"'))
dataset_pd['Archived in WCT'] = dataset_pd['Archived in WCT'].apply(float_to_bool) dataset_pd['Online'] = dataset_pd['Online'].apply(float_to_bool)
dataset_pd['Access date'] = dataset_pd['Access date'].apply(transform_date)
dataset_pd['Include in collection'] = dataset_pd['Include in collection'].apply(float_to_bo ol)
dataset_pd['Special interest'] = dataset_pd['Special interest'].apply(float_to_bool) #GUI laadt de bestandsnaam van het geladen bestand zien
lbl_load_dataset['text'] = file_name lbl_load_dataset.config(fg='green')
Current_row_dataset.config(to=(len(dataset_pd)-1)) #Eén row moet 'URL' heten om de links te laden.
if 'URL' in dataset_pd.columns:
URL_column_dataset.delete(0,tk.END)
URL_column_dataset.insert(0,str(list(dataset_pd.columns).index('URL'))) return dataset_pd, URL_column_dataset
#Open de volledgie webcollectie om te crossreferencen of een site al gearchiveerd is.
def open_webcollectie():
filepath = askopenfilename(
filetypes=[("Excel file", "*.xlsx"), ("All Files", "*.*")]
)
if not filepath:
return
global webcollectie_pd
file_name = (Path(filepath).stem)
webcollectie_pd = pd.read_excel(filepath) lbl_load_webcollectie['text'] = file_name lbl_load_webcollectie.config(fg='green') if 'URL' in webcollectie_pd.columns:
URL_column_webcollectie.delete(0,tk.END)
URL_column_webcollectie.insert(0,str(list(webcollectie_pd.columns).index('URL'))) return webcollectie_pd, URL_column_webcollectie
#Laad de gedesigneerde row/site, en crossreferenced met de volledige KB dataset of hij al gearc hiveerd is
def start_read():
if lbl_load_dataset["text"] == 'None loaded':
return else:
current_display_row = int(Current_row_dataset.get()) global current_row
current_row = int(Current_row_dataset.get())-1 if current_display_row > len(dataset_pd):
Current_row_dataset.delete(0,tk.END)
Current_row_dataset.insert(0,len(dataset_pd)) current_row = len(dataset_pd)-1
clear_fields()
dataset_urls = list(dataset_pd.iloc[:,int(URL_column_dataset.get())])
94
global current_url
current_url = dataset_urls[current_row]
lbl_current_URL['text'] = f'Current URL: {current_url}' global save_AiWCT_values
#Check if the full webcollection dataset is loaded and compare if lbl_load_webcollectie["text"] != 'None loaded':
save_AiWCT_values = True global archived_in_WCT archived_in_WCT = '' global WCT_diff_scheme WCT_diff_scheme = ''
webcollectie_urls = list(webcollectie_pd.iloc[:,int(URL_column_webcollectie.get())]) webcollectie_urls_minus_scheme = list(webcollectie_pd.iloc[:,int(URL_column_webcoll ectie.get())].apply(lambda x: x.replace(((urllib.parse.urlparse(x).scheme)),'').replace('://',' ')))
url_minus_scheme = current_url.replace(((urllib.parse.urlparse(current_url).scheme) + '://'),'').strip('/') + '/'
url_scheme = urllib.parse.urlparse(current_url).scheme if url_minus_scheme in webcollectie_urls_minus_scheme:
wc_url_scheme = urllib.parse.urlparse(webcollectie_urls[webcollectie_urls_minus _scheme.index(url_minus_scheme)]).scheme
if url_scheme != wc_url_scheme:
lbl_archived_result['text'] = f'True, except: {wc_url_scheme}' WCT_diff_scheme = wc_url_scheme
lbl_archived_result.config(fg='purple') archived_in_WCT = True
else:
lbl_archived_result['text'] = f'True' lbl_archived_result.config(fg='dark blue') archived_in_WCT = True
else:
lbl_archived_result['text'] = f'False' lbl_archived_result.config(fg='green') archived_in_WCT = False
#Read existing values else:
save_AiWCT_values = False
if not pd.isnull(dataset_pd.loc[current_row,'AiWCT Differing scheme']):
scheme_text = dataset_pd.loc[current_row,'AiWCT Differing scheme']
lbl_archived_result['text'] = f'True, except: {scheme_text}' lbl_archived_result.config(fg='purple')
else:
if dataset_pd.loc[current_row,'Archived in WCT'] == True:
lbl_archived_result['text'] = f'True' lbl_archived_result.config(fg='dark blue') else:
lbl_archived_result['text'] = f'False' lbl_archived_result.config(fg='green') #Auto_fill certain values
fill_fields()
if not entry_URL.get():
entry_URL.insert(0,current_url) if not entry_name.get():
entry_name.insert(0, name_from_url(current_url)) if not entry_Access.get():
entry_Access.insert(0,date.today().strftime("%d/%m/%Y")) auto_open()
#Lees de volgende row/site def read_next():
if int(Current_row_dataset.get()) == len(dataset_pd):
95
return
if chk_auto_save_var.get():
save_values()
row_plus_one = int(Current_row_dataset.get())+1 Current_row_dataset.delete(0,tk.END)
Current_row_dataset.insert(0,str(row_plus_one)) start_read()
#Lees de vorige row/site def read_prev():
if int(Current_row_dataset.get()) == 1:
return
if chk_auto_save_var.get():
save_values()
row_minus_one = int(Current_row_dataset.get())-1 Current_row_dataset.delete(0,tk.END)
Current_row_dataset.insert(0,str(row_minus_one)) start_read()
#Exporteer de Pandas dataframe als csv def export_csv():
try:
new_filename = f'{Path(dataset_filepath).stem}_{datetime.now().strftime("%d-%m-%Y_%H-%M -%S")}.csv'
new_path = Path(dataset_filepath).parent / new_filename dataset_pd.to_csv(new_path,index=False,encoding='utf8') except:
return
window = tk.Tk()
window.title("Webcollector Tool")
window.columnconfigure(0, weight=0, minsize=250) window.columnconfigure(1, weight=0, minsize=150) window.rowconfigure(0, weight=0, minsize=50) window.rowconfigure(1, weight=0, minsize=500) window.rowconfigure(2, weight=0, minsize=80)
#Frame for datasets and buttons
Load_datasets = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) Load_datasets.grid(row=0, column=0,padx=5,sticky="nsew")
# Buttons for dataset
frm_load_dataset = tk.Frame(master=Load_datasets)
frm_load_dataset.pack(side='top',anchor='nw',fill=tk.X,padx=5, pady=5) btn_load_dataset = tk.Button(master=frm_load_dataset, text='Load dataset', width = 14,command=open_dataset)
btn_load_dataset.pack(side=tk.LEFT)
lbl_load_dataset= tk.Label(master=frm_load_dataset, text='None loaded',fg='dark red', padx=5, pady=5)
lbl_load_dataset.pack(side=tk.LEFT)
URL_column_dataset = tk.Spinbox(master=frm_load_dataset, from_=0, to=40, width=3)
URL_column_dataset.pack(side=tk.RIGHT)
# Buttons for the webcollection dataset
frm_load_webcollectie = tk.Frame(master=Load_datasets)
frm_load_webcollectie.pack(side='top',anchor='nw',fill=tk.X,padx=5)
96
btn_load_webcollectie = tk.Button(master=frm_load_webcollectie, text='Load webcollectie',width
= 14,
command=open_webcollectie) btn_load_webcollectie.pack(side=tk.LEFT,pady=(0,5))
lbl_load_webcollectie = tk.Label(master=frm_load_webcollectie, text='None loaded',fg='dark red', padx=5, pady=5)
lbl_load_webcollectie.pack(side=tk.LEFT)
URL_column_webcollectie= tk.Spinbox(master=frm_load_webcollectie, from_=0, to=40, width=3)
URL_column_webcollectie.pack(side=tk.RIGHT)
#Buttons for starting, next and prev
check_frame = tk.Frame(master=Load_datasets)
check_frame.pack(side='top',anchor='nw',fill=tk.X, pady=2)
#check_frame.grid(row=2,column=0,padx=20,sticky="nsew")
btn_start = tk.Button(master=check_frame, text='Load row',command=start_read,width = 19) btn_start.pack(side=tk.LEFT,padx=5)
Current_row_dataset = tk.Spinbox(master=check_frame, from_=1, to=999999,width=6) Current_row_dataset.pack(side=tk.LEFT,padx=5)
btn_prev = tk.Button(master=check_frame, text='← Previous',command=read_prev,width = 9) btn_prev.pack(side=tk.LEFT,padx=5)
btn_next = tk.Button(master=check_frame, text='Next →',command=read_next,width = 9) btn_next.pack(side=tk.LEFT,padx=5)
#Frame for autoopen options
auto_frame = tk.Frame(master=Load_datasets)
auto_frame.pack(side='top',anchor='nw',fill=tk.X,pady=5)
#Initialize driver
btn_init_driver = tk.Button(master=auto_frame, text='Start driver',command=init_driver) btn_init_driver.pack(side='left',padx=(5,0))
#Auto-open
lbl_auto_open_url = tk.Label(master=auto_frame, text='Auto-open URL',font=("Arial", 10)) lbl_auto_open_url.pack(side='left',padx=(4,0))
chk_auto_open_url_var = tk.BooleanVar()
chk_auto_open_url = tk.Checkbutton(master=auto_frame, variable=chk_auto_open_url_var,offvalue=F alse,onvalue=True)
chk_auto_open_url.pack(side='left',padx=(0,19))
#Save button
btn_save = tk.Button(master=auto_frame, text='Save values',command=save_values) btn_save.pack(side='left')
#Auto-save
lbl_auto_save = tk.Label(master=auto_frame, text='Auto save',font=("Arial", 10)) lbl_auto_save.pack(side='left',padx=(4,0))
chk_auto_save_var = tk.BooleanVar() chk_auto_save_var.set(True)
chk_auto_save = tk.Checkbutton(master=auto_frame, variable=chk_auto_save_var,offvalue=False,onv alue=True)
chk_auto_save.pack(side='left')
#Frame for the right column
frame_textboxes = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) frame_textboxes.grid(row=1,rowspan=2, column=0,padx=5,sticky='nsew')
97
#Frame for autogenerated values
frame_statics = tk.Frame(master=frame_textboxes, relief=tk.SUNKEN,borderwidth=3) frame_statics.pack(side='top',anchor='nw',fill=tk.X,pady=2)
#Current_URL
lbl_current_URL = tk.Label(master=frame_statics, text='Current URL:',font=("Arial", 10), padx=5, pady=5)
lbl_current_URL.pack(side='top',anchor='nw')
#Archived?
frame_archived = tk.Frame(master=frame_statics) frame_archived.pack(side='top',anchor='nw')
lbl_archived = tk.Label(master=frame_archived, text='Archived in WCT:',font=("Arial", 10), padx=5,pady=5)
lbl_archived_result = tk.Label(master=frame_archived, text=' ',font=("Arial", 10)) lbl_archived.pack(side='left')
lbl_archived_result.pack(side='left')
#Naam
lbl_name = tk.Label(master=frame_textboxes, text='1. Name',font=("Arial", 10), padx=10, pady=5)
lbl_name.pack(side='top',anchor='nw')
entry_name = tk.Entry(master=frame_textboxes,width=57) entry_name.pack(side='top',anchor='nw',padx=10, pady=2)
#URL
lbl_URL = tk.Label(master=frame_textboxes, text='2. URL',font=("Arial", 10), padx=10, pady=2)
lbl_URL.pack(side='top',anchor='nw')
entry_URL = tk.Entry(master=frame_textboxes,width=57) entry_URL.pack(side='top',anchor='nw',padx=10, pady=3)
#Frame for dates
frame_dates = tk.Frame(master=frame_textboxes) frame_dates.pack(side='top',anchor='nw')
#Online?
frame_online = tk.Frame(master=frame_dates) frame_online.pack(side='left')
lbl_online = tk.Label(master=frame_online, text='3. Online',font=("Arial", 10), padx=4, pady=2)
lbl_online.pack(side='top',anchor='nw') chk_online_var = tk.BooleanVar()
chk_online = tk.Checkbutton(master=frame_online, variable=chk_online_var,offvalue=False,onvalue
=True)
chk_online.pack(side='top',anchor='center',padx=4, pady=3)
#Access_datum
frame_access = tk.Frame(master=frame_dates) frame_access.pack(side='left')
lbl_Access = tk.Label(master=frame_access, text='4. Access date',font=("Arial", 10), padx=4, pady=2)
lbl_Access.pack(side='top',anchor='nw')
entry_Access = tk.Entry(master=frame_access,width=14) entry_Access.pack(side='top',anchor='nw',padx=4, pady=3)
#Language
frame_lang = tk.Frame(master=frame_dates)
98
frame_lang.pack(side='left')
lbl_lang = tk.Label(master=frame_lang, text='5. Language(s) [ , as div]',font=("Arial", 10), padx=4, pady=2)
lbl_lang.pack(side='top',anchor='nw')
entry_lang = tk.Entry(master=frame_lang,width=30) entry_lang.pack(side='top',anchor='nw',padx=4, pady=3)
#Slogan
frame_slogan = tk.Frame(master=frame_textboxes) frame_slogan.pack(side='top',anchor='nw')
lbl_slogan = tk.Label(master=frame_slogan, text='6. Slogan',font=("Arial", 10)) lbl_slogan.pack(side='left',padx=(10,12), pady=5)
entry_slogan = tk.Entry(master=frame_slogan,width=44) entry_slogan.pack(side='left', pady=3)
#Description (extern)
lbl_description_extern = tk.Label(master=frame_textboxes, text='7. Description (external)',font
=("Arial", 10),
padx=10, pady=2)
lbl_description_extern.pack(side='top',anchor='nw')
text_description_extern = tk.Text(master=frame_textboxes,width=43,height=5) text_description_extern.pack(side='top',anchor='nw',padx=10, pady=3)
#Comments (intern)
lbl_description_intern = tk.Label(master=frame_textboxes, text='8. Comments (internal)',font=("
Arial", 10),
padx=10, pady=2)
lbl_description_intern.pack(side='top',anchor='nw')
text_description_intern = tk.Text(master=frame_textboxes,width=43,height=7) text_description_intern.pack(side='top',anchor='nw',padx=10, pady=3)
#Location frame
frame_location = tk.Frame(master=frame_textboxes)
frame_location.pack(side='top',anchor='nw',padx=10, pady=3)
#Provincie
frame_province = tk.Frame(master=frame_location) frame_province.pack(side='left',padx=0, pady=3) provincie_var = tk.StringVar()
provincies = [None,'Drenthe','Flevoland','Friesland','Gelderland','Groningen', 'Limburg','Noord-Brabant','Noord-Holland','Overijssel','Utrecht', 'Zeeland','Zuid-Holland']
provincie_var.set(None)
lbl_provincie = tk.Label(master=frame_province, text='9. Province',font=("Arial", 10), pady=2)
lbl_provincie.pack(side='top',anchor='nw')
option_province = tk.OptionMenu(frame_province, provincie_var, *provincies) option_province.config(width=14,direction='above')
option_province.pack(side='top',anchor='nw')
#Locatie
frame_locatie = tk.Frame(master=frame_location) frame_locatie.pack(side='left',padx=5, pady=3)
lbl_locatie = tk.Label(master=frame_locatie, text='10. Location',font=("Arial", 10), padx=15, pady=2)
lbl_locatie.pack(side='top',anchor='nw')
entry_locatie = tk.Entry(master=frame_locatie,width=30) entry_locatie.pack(side='top',anchor='nw',padx=15, pady=3)
99
#Frame for email and export button
frame_email_export = tk.Frame(master=frame_textboxes)
#Email adres
frame_email = tk.Frame(master=frame_email_export)
lbl_Email = tk.Label(master=frame_email, text='11. Email address',font=("Arial", 10), padx=10, pady=2)
lbl_Email.pack(side='top',anchor='nw')
entry_Email = tk.Entry(master=frame_email,width=30) entry_Email.pack(side='top',anchor='nw',padx=10, pady=2) frame_email.pack(side='left')
#Export button
frame_export = tk.Frame(master=frame_email_export)
btn_export = tk.Button(master=frame_export, text='Export CSV',command=export_csv,height = 2, wi dth = 19)
button_font = tkFont.Font(size=10) btn_export['font'] = button_font
btn_export.pack(side='top',anchor='nw',padx=(15,0)) frame_export.pack(side='left')
frame_email_export.pack(side='top',anchor='nw')
####The categories frame
frame_categories = tk.Frame(master=window, relief=tk.SUNKEN, borderwidth=5) frame_categories.grid(row=0,rowspan=2, column=1, pady=5,sticky="ns")
#1_Meta_category
lbl_1_meta = tk.Label(master=frame_categories, text='15. Category 1: Meta-type',font=("Arial", 10))
lbl_1_meta.pack(side='top',anchor='nw',padx=5,pady=5) frame_1_meta = tk.Frame(master=frame_categories) frame_1_meta.pack(side='top',anchor='nw',padx=(30,0)) meta_types = ['Religieus','Spiritueel','Seculier']
meta_types_values_list = []
for value in meta_types:
lbl_check = tk.Label(master=frame_1_meta, text=value,font=("Arial", 10)) lbl_check.pack(side='left',anchor='nw')
meta_var = tk.BooleanVar()
chk_meta = tk.Checkbutton(master=frame_1_meta, variable=meta_var,offvalue=False,onvalue=Tru e)
chk_meta.pack(side='left')
meta_types_values_list.append([meta_var,value])
#Tab section for checkboxes
Category_tabs = ttk.Notebook(master=frame_categories)
Tab_C2_Movement = ttk.Frame(master=Category_tabs)
Tab_C3_Representatives = ttk.Frame(master=Category_tabs) Tab_C4_Goals = ttk.Frame(master=Category_tabs)
Tab_C5_Functions = ttk.Frame(master=Category_tabs) Tab_C6_SM = ttk.Frame(master=Category_tabs)
Category_tabs.add(Tab_C2_Movement, text='16. Movement') Category_tabs.add(Tab_C3_Representatives, text='17. Repr.') Category_tabs.add(Tab_C4_Goals, text='18. Goals')
Category_tabs.add(Tab_C5_Functions, text='19. Funct.')
100
Category_tabs.add(Tab_C6_SM, text='20. SM')
Category_tabs.pack(expand=1,fill="both",side='top',anchor='nw')
#Tab 1 movement
Movement_canvas = tk.Canvas(master=Tab_C2_Movement)
Movement_scrollbar = ttk.Scrollbar(Tab_C2_Movement, orient="vertical", command=Movement_canvas.
yview)
Movement_scrollbar_frame = ttk.Frame(Movement_canvas)
Movement_scrollbar_frame.bind("<Configure>",lambda e: Movement_canvas.configure(
scrollregion=Movement_canvas.bbox("all")))
Movement_canvas.create_window((0, 0), window=Movement_scrollbar_frame, anchor="nw") Movement_canvas.configure(yscrollcommand=Movement_scrollbar.set)
#Create movement categories
movement_dict, movements_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarc hivering_Project/Notebooks_Codes/Source files/cat2_stromingen.txt', Movement_scrollbar_frame) for check_set in movements_checkboxes:
check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], movements_c heckboxes))
Movement_canvas.pack(side="left", fill="both") Movement_scrollbar.pack(side="right", fill="y")
#Tab 2 representatives
Repr_canvas = tk.Canvas(master=Tab_C3_Representatives)
Repr_scrollbar = ttk.Scrollbar(Tab_C3_Representatives, orient="vertical", command=Repr_canvas.y view)
Repr_scrollbar_frame = ttk.Frame(Repr_canvas)
Repr_scrollbar_frame.bind("<Configure>",lambda e: Repr_canvas.configure(
scrollregion=Repr_canvas.bbox("all")))
Repr_canvas.create_window((0, 0), window=Repr_scrollbar_frame, anchor="nw") Repr_canvas.configure(yscrollcommand=Repr_scrollbar.set)
#Create Repr categories
repr_dict, repr_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchivering_
Project/Notebooks_Codes/Source files/cat3_vertegenwoordigers.txt', Repr_scrollbar_frame) for check_set in repr_checkboxes:
check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], repr_checkb oxes))
Repr_canvas.pack(side="left", fill="both") Repr_scrollbar.pack(side="right", fill="y")
#Tab 3 goals
Goals_canvas = tk.Canvas(master=Tab_C4_Goals)
Goals_scrollbar = ttk.Scrollbar(Tab_C4_Goals, orient="vertical", command=Goals_canvas.yview) Goals_scrollbar_frame = ttk.Frame(Goals_canvas)
Goals_scrollbar_frame.bind("<Configure>",lambda e: Goals_canvas.configure(
scrollregion=Goals_canvas.bbox("all")))
Goals_canvas.create_window((0, 0), window=Goals_scrollbar_frame, anchor="nw") Goals_canvas.configure(yscrollcommand=Goals_scrollbar.set)
101
#Create goals categories
goals_dict, goals_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchiverin g_Project/Notebooks_Codes/Source files/cat4_doelen.txt', Goals_scrollbar_frame)
for check_set in goals_checkboxes:
check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], goals_check boxes))
Goals_canvas.pack(side="left", fill="both") Goals_scrollbar.pack(side="right", fill="y")
#Tab 4 functions
Functions_canvas = tk.Canvas(master=Tab_C5_Functions)
Functions_scrollbar = ttk.Scrollbar(Tab_C5_Functions, orient="vertical", command=Functions_canv as.yview)
Functions_scrollbar_frame = ttk.Frame(Functions_canvas)
Functions_scrollbar_frame.bind("<Configure>",lambda e: Functions_canvas.configure(
scrollregion=Functions_canvas.bbox("all")))
Functions_canvas.create_window((0, 0), window=Functions_scrollbar_frame, anchor="nw") Functions_canvas.configure(yscrollcommand=Functions_scrollbar.set)
#Create Functions categories
functions_dict, functions_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webar chivering_Project/Notebooks_Codes/Source files/cat5_functies.txt', Functions_scrollbar_frame) for check_set in functions_checkboxes:
check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], functions_c heckboxes))
Functions_canvas.pack(side="left", fill="both") Functions_scrollbar.pack(side="right", fill="y")
#Tab 5 social media
SM_canvas = tk.Canvas(master=Tab_C6_SM)
SM_scrollbar = ttk.Scrollbar(Tab_C6_SM, orient="vertical", command=SM_canvas.yview) SM_scrollbar_frame = ttk.Frame(SM_canvas)
SM_scrollbar_frame.bind("<Configure>",lambda e: SM_canvas.configure(
scrollregion=SM_canvas.bbox("all")))
SM_canvas.create_window((0, 0), window=SM_scrollbar_frame, anchor="nw") SM_canvas.configure(yscrollcommand=SM_scrollbar.set)
#Create SM categories
SM_dict, SM_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchivering_Proj ect/Notebooks_Codes/Source files/cat6_social_media.txt', SM_scrollbar_frame)
for check_set in SM_checkboxes:
check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], SM_checkbox es))
SM_canvas.pack(side="left", fill="both") SM_scrollbar.pack(side="right", fill="y")
# Section for selection and reason
frame_selection = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) frame_selection.grid(row=2, column=1,padx=5, pady=2,sticky="nsew")
102
#Frame for selection boxes
frame_selection_boxes = tk.Frame(master=frame_selection) frame_selection_boxes.pack(side='top',anchor='nw')
#Select?
frame_selected = tk.Frame(master=frame_selection_boxes) frame_selected.pack(side='left',padx=2)
lbl_selected = tk.Label(master=frame_selected, text='12. Include in collection',font=("Arial", 10),
padx=4) lbl_selected.pack(side='left') chk_selected_var = tk.BooleanVar()
chk_selected = tk.Checkbutton(master=frame_selected, variable=chk_selected_var,offvalue=False,o nvalue=True)
chk_selected.pack(side='left',padx=2)
#Noteworthy?
frame_noteworthy = tk.Frame(master=frame_selection_boxes) frame_noteworthy.pack(side='left',padx=2)
lbl_noteworthy = tk.Label(master=frame_noteworthy, text='13. Special interest',font=("Arial", 1 0),
padx=2) lbl_noteworthy.pack(side='left') chk_noteworthy_var = tk.BooleanVar()
chk_noteworthy = tk.Checkbutton(master=frame_noteworthy, variable=chk_noteworthy_var,offvalue=F alse,onvalue=True)
chk_noteworthy.pack(side='left',padx=2)
#Reason for selection
lbl_selection_reason = tk.Label(master=frame_selection, text='14. Reason for selection / reject ion',font=("Arial", 10),
padx=5, pady=2)
lbl_selection_reason.pack(side='top',anchor='nw')
text_selection_reason = tk.Text(master=frame_selection,width=41,height=3) text_selection_reason.pack(side='top',anchor='nw',padx=15, pady=2)
window.mainloop()