• No results found

10 Appendix

10.1 Python code

10.1.1 Webcollector Tool

86

87

current_top_level = row.split('\t')[0]

current_mid_level = 1 if row.startswith('■'):

row_list[row_list.index(row)] = row.replace('■',current_top_level + str(current_mid _level) + '.')

current_mid_level += 1 for row in row_list:

if row.split('\t')[0].count('.') == 3:

current_top_level = row.split('\t')[0]

current_mid_level = 1 if row.startswith('●'):

row_list[row_list.index(row)] = row.replace('●',current_top_level + str(current_mid _level) + '.')

current_mid_level += 1

#Een lijst van tuples van index (1.1.1.1) en naam ('Boeddhisme') cat_tuples = []

for row in row_list:

cat_tuples.append((row.split('\t')[0],(row.split('\t')[1]))) #Het omzetten van deze lijst in een dictionary met enkel de index cat_dict = {}

for cat in cat_tuples:

if cat[0].count('.') == 1:

cat_dict[cat[0]] = {}

if cat[0].count('.') == 2:

for key in cat_dict.keys():

if cat[0].startswith(key):

cat_dict[key][cat[0]] = {}

if cat[0].count('.') == 3:

for key_top, value in cat_dict.items():

for key_mid in value.keys():

if cat[0].startswith(key_mid):

cat_dict[key_top][key_mid][cat[0]] = {}

if cat[0].count('.') == 4:

for key_top, value_top in cat_dict.items():

for key_mid, value_mid in value_top.items():

for key_bottom in value_mid.keys():

if cat[0].startswith(key_bottom):

cat_dict[key_top][key_mid][key_bottom][cat[0]] = {}

#Het omzetten van de indexen in de namen van de stromingen/categorieën dict_to_string = str(cat_dict)

for cat in cat_tuples:

dict_to_string = dict_to_string.replace(f"'{cat[0]}'",f"\"{cat[1]}\"") final_cat_dict = json.loads(dict_to_string)

#Het omzetten van de dictionary naar de GUI, in de vorm van checkboxes in een tabblad.

#Hier wordt uitgegaan van een maximale 'diepte' van 4. (Bv Wereldreligie/Christendom/protes tantisme/evangelisme)

checkbox_value_list = []

for key_top, value_top in final_cat_dict.items():

value_top_frame = ttk.Frame(master=scrollbar_frame) value_top_frame.pack(side='top',anchor='nw')

chk_value_top_var = tk.BooleanVar()

chk_value_top = tk.Checkbutton(master=value_top_frame, variable=chk_value_top_var,offva lue=False,onvalue=True)

chk_value_top.pack(side='left')

value_top_label = ttk.Label(value_top_frame, text=key_top) value_top_label.pack(side='left')

checkbox_value_list.append([chk_value_top_var,key_top,chk_value_top]) for key_mid, value_mid in value_top.items():

value_mid_frame = ttk.Frame(master=scrollbar_frame) value_mid_frame.pack(side='top',anchor='nw',padx=(20,0))

88

chk_value_mid_var = tk.BooleanVar()

chk_value_mid = tk.Checkbutton(master=value_mid_frame, variable=chk_value_mid_var,o ffvalue=False,onvalue=True)

chk_value_mid.pack(side='left')

value_mid_label = ttk.Label(value_mid_frame, text=key_mid) value_mid_label.pack(side='left')

checkbox_value_list.append([chk_value_mid_var,f'{key_top}/{key_mid}',chk_value_mid]) for key_low, value_low in value_mid.items():

value_low_frame = ttk.Frame(master=scrollbar_frame) value_low_frame.pack(side='top',anchor='nw',padx=(40,0)) chk_value_low_var = tk.BooleanVar()

chk_value_low = tk.Checkbutton(master=value_low_frame, variable=chk_value_low_v ar,offvalue=False,onvalue=True)

chk_value_low.pack(side='left')

value_low_label = ttk.Label(value_low_frame, text=key_low) value_low_label.pack(side='left')

checkbox_value_list.append([chk_value_low_var,f'{key_top}/{key_mid}/{key_low}', chk_value_low])

for key_lowest, value_lowest in value_low.items():

value_lowest_frame = ttk.Frame(master=scrollbar_frame) value_lowest_frame.pack(side='top',anchor='nw',padx=(60,0)) chk_value_lowest_var = tk.BooleanVar()

chk_value_lowest = tk.Checkbutton(master=value_lowest_frame, variable=chk_v alue_lowest_var,offvalue=False,onvalue=True)

chk_value_lowest.pack(side='left')

value_lowest_label = ttk.Label(value_lowest_frame, text=key_lowest) value_lowest_label.pack(side='left')

checkbox_value_list.append([chk_value_lowest_var,f'{key_top}/{key_mid}/{key _low}/{key_lowest}',chk_value_lowest])

return final_cat_dict, checkbox_value_list

#Deze functie zorgt ervoor dat de checkboxes voor de categorieën respectievelijk aan of uitgekl ikt worden

#als een lagere of hogere categorie geselecteerd of gedeselecteerd wordt.

def de_check_parent_kid_boxes(checkbox_value, checkbox_value_list):

for checkbox_L1 in checkbox_value_list:

if checkbox_L1[0] == checkbox_value:

checked_box = checkbox_L1 break

if checkbox_L1[0].get() == True:

for checkbox_L2 in checkbox_value_list:

if checkbox_L2[1] in checkbox_L1[1]:

checkbox_L2[0].set(True) if checkbox_L1[0].get() == False:

for checkbox_L2 in checkbox_value_list:

if checkbox_L1[1] in checkbox_L2[1]:

checkbox_L2[0].set(False)

#Deze functie genereert een waarde voor de 'Name' column vanuit de URL. Moet zeer zeker aangepa st worden

def name_from_url(current_url):

if current_url.count('.') == 1:

return re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group().strip('.

').capitalize()

elif 'www' not in current_url:

if len(re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group()) < 3:

return re.search('\.[^\.]*', urllib.parse.urlparse(current_url).hostname).group().s trip('.').capitalize()

else:

return re.search('[^\.]*', urllib.parse.urlparse(current_url).hostname).group().str ip('.').capitalize()

89

else:

return re.search('\.[^\.]*', urllib.parse.urlparse(current_url).hostname).group().strip ('.').capitalize()

def init_driver():

PATH = r'C:\Program Files\Google\Chromedriver\chromedriver.exe' global driver

driver = webdriver.Chrome(PATH)

#Functie voor als Auto Open aan staat (de site wordt in de Selenium browser geopend) def auto_open():

try:

if chk_auto_open_url_var.get():

driver.get(current_url) except:

pass

#Check voor lege waarden en returned een type waarbij een bepaalde functie geen error geeft def check_nat(value,_type):

if _type == "string":

if pd.isnull(value):

return '' else:

return value if _type == "bool":

if pd.isnull(value) or value == 'None':

return 'False' else:

return value if _type == "choice":

if pd.isnull(value):

return None else:

return value

def float_to_bool(val):

if val == 0.0 or val == '0.0':

return False

elif val == 1.0 or val == '1.0':

return True

def transform_nullstring_to_nat(value):

if value.replace(' ','').replace('\n','') == '':

return pd.NaT else:

return value.replace('\n','')

def return_emptied_dict_level(_dict,dict_path):

dict_empty = dpath.util.search(_dict,dict_path) dpath.util.set(dict_empty,dict_path,{})

return dpath.util.search(dict_empty,dict_path)

def transform_date(date):

if pd.isnull(date):

return date else:

if type(date) != str:

return str(date).replace('-','/').strip(' 00:00:00') else:

return date.replace('-','/').strip(' 00:00:00')

90

#Maakt alle velden leeg als een nieuwe row/site geladen wordt def clear_fields():

entry_name.delete(0, tk.END) entry_URL.delete(0, tk.END) chk_online_var.set(False) entry_Access.delete(0, tk.END) entry_lang.delete(0, tk.END) entry_slogan.delete(0, tk.END)

text_description_extern.delete('1.0', tk.END) text_description_intern.delete('1.0', tk.END) provincie_var.set(None)

entry_locatie.delete(0, tk.END) entry_Email.delete(0, tk.END) chk_selected_var.set(False) chk_noteworthy_var.set(False)

text_selection_reason.delete('1.0', tk.END) for meta_value in meta_types_values_list:

meta_value[0].set(False) for box in movements_checkboxes:

box[0].set(False) for box in repr_checkboxes:

box[0].set(False)

for box in goals_checkboxes:

box[0].set(False)

for box in functions_checkboxes:

box[0].set(False) for box in SM_checkboxes:

box[0].set(False)

Category_tabs.select(Tab_C2_Movement)

#Vul de categorie-checkboxes vanuit door de string die in de csv/excel is opgeslagen weer om te zetten in een

#dictionary, en die te lezen als een file tree def fill_category(df_and_column,row,checkboxes):

saved_categories = check_nat(df_and_column.iloc[row],"string") if saved_categories != '':

if not saved_categories[0] == '\'':

saved_categories = '\'' + saved_categories

string_dict = '{' + saved_categories.replace(' ',': {}').replace('""','"') + '}' for count, comma in enumerate(string_dict):

if comma == '\'':

if ((string_dict[count-1].isalpha() == True) and (string_dict[count+1].isalpha() == True)):

pass else:

split_string_dict = list(string_dict) split_string_dict[count] = '\"'

string_dict = ''.join(split_string_dict) saved_cat_dict = json.loads(string_dict)

else:

return

saved_cat_list = []

for key_top, value_top in saved_cat_dict.items():

path_top = key_top

saved_cat_list.append(path_top) try:

for key_mid, value_mid in value_top.items():

path_mid = f'{key_top}/{key_mid}' saved_cat_list.append(path_mid) try:

91

for key_low, value_low in value_mid.items():

path_low = f'{key_top}/{key_mid}/{key_low}' saved_cat_list.append(path_low)

try:

for key_lowest, value_lowest in value_low.items():

path_lowest = f'{key_top}/{key_mid}/{key_low}/{key_lowest}' saved_cat_list.append(path_lowest)

except:

pass except:

pass except:

pass

for category in checkboxes:

for saved_cat in saved_cat_list:

if category[1] == saved_cat:

category[0].set(True)

#Vul de velden in met de waarden opgeslagen in de csv/excel def fill_fields():

row = current_row

entry_name.insert(0,check_nat(dataset_pd['Name'].iloc[row],"string")) chk_online_var.set(check_nat(str(dataset_pd['Online'].iloc[row]),"bool")) entry_Access.insert(0,check_nat(dataset_pd['Access date'].iloc[row],"string")) entry_lang.insert(0, check_nat(dataset_pd['Language(s)'].iloc[row],"string")) entry_slogan.insert(0, check_nat(dataset_pd['Slogan'].iloc[row],"string"))

text_description_extern.insert("1.0",check_nat(dataset_pd['Description (external)'].iloc[ro w],"string"))

text_description_intern.insert("1.0",check_nat(dataset_pd['Comments (internal)'].iloc[row],

"string"))

provincie_var.set(check_nat(dataset_pd['Province'].iloc[row],"choice")) entry_locatie.insert(0,check_nat(dataset_pd['Location'].iloc[row],"string")) entry_Email.insert(0,check_nat(dataset_pd['Email address'].iloc[row],"string"))

chk_selected_var.set(check_nat(str(dataset_pd['Include in collection'].iloc[row]),"bool")) chk_noteworthy_var.set(check_nat(str(dataset_pd['Special interest'].iloc[row]),"bool")) text_selection_reason.insert("1.0",check_nat(dataset_pd['Reason for selection'].iloc[row],"

string")) try:

if ',' in check_nat(dataset_pd['C1: Metatype'].iloc[row],"string"):

meta_list = dataset_pd['C1: Metatype'].iloc[row].replace(' ','').replace('\'','').s plit(',')

else:

meta_list = [dataset_pd['C1: Metatype'].iloc[row].replace('\'','')]

for meta_token in meta_list:

for meta_check in meta_types_values_list:

if meta_token == meta_check[1]:

meta_check[0].set(True) except:

pass

fill_category(dataset_pd['C2: Stroming'],row,movements_checkboxes)

fill_category(dataset_pd['C3: Vertegenwoordigers of eigenaars'],row,repr_checkboxes) fill_category(dataset_pd['C4: Doelen'],row,goals_checkboxes)

fill_category(dataset_pd['C5: Functionaliteit'],row,functions_checkboxes) fill_category(dataset_pd['Social media'],row,SM_checkboxes)

#Sla de categorieën op als string. De dictionary structuur ': {}' wordt vervangen door een meer lezersvriendelijke

#dubbele spatie ' '. Dit moet onveranderd blijven, anders kan Python de waarden in de csv niet meer correct laden.

def save_category(_dict,checkboxes):

dict_to_save = {}

92

for category in checkboxes:

if category[0].get():

current_level = return_emptied_dict_level(_dict,category[1]) if dict_to_save == {}:

dict_to_save = current_level else:

dict_to_save = dpath.util.merge(dict_to_save,current_level) if dict_to_save == {}:

return pd.NaT else:

return str(dict_to_save).replace(': {}',' ')[1:-1]

#Sla alle ingevulde waarden op naar de Pandas dataframe (die later geëxporteerd wordt als csv) def save_values():

row = current_row

if save_AiWCT_values == True:

dataset_pd.loc[row,'Archived in WCT'] = archived_in_WCT if save_AiWCT_values == True:

dataset_pd.loc[row,'AiWCT Differing scheme'] = WCT_diff_scheme

dataset_pd.loc[row,'Name'] = transform_nullstring_to_nat(entry_name.get()) dataset_pd.loc[row,'URL'] = transform_nullstring_to_nat(entry_URL.get()) dataset_pd.loc[row,'Online'] = chk_online_var.get()

dataset_pd.loc[row,'Access date'] = transform_nullstring_to_nat(entry_Access.get())

dataset_pd.loc[row,'Language(s)'] = transform_nullstring_to_nat(entry_lang.get().replace('\

n',' ').strip(' '))

dataset_pd.loc[row,'Slogan'] = transform_nullstring_to_nat(entry_slogan.get().replace('\n', ' ').strip(' '))

dataset_pd.loc[row,'Description (external)'] = transform_nullstring_to_nat(text_description _extern.get("1.0",tk.END).replace('\n',' ').strip(' '))

dataset_pd.loc[row,'Comments (internal)'] = transform_nullstring_to_nat(text_description_in tern.get("1.0",tk.END).replace('\n',' ').strip(' '))

dataset_pd.loc[row,'Province'] = provincie_var.get()

dataset_pd.loc[row,'Location'] = transform_nullstring_to_nat(entry_locatie.get()) dataset_pd.loc[row,'Email address'] = transform_nullstring_to_nat(entry_Email.get()) dataset_pd.loc[row,'Include in collection'] = chk_selected_var.get()

dataset_pd.loc[row,'Special interest'] = chk_noteworthy_var.get()

dataset_pd.loc[row,'Reason for selection'] = transform_nullstring_to_nat(text_selection_rea son.get("1.0",tk.END).replace('\n',' ').strip(' '))

meta_value_final = []

for meta_value in meta_types_values_list:

if meta_value[0].get() == True:

meta_value_final.append(meta_value[1]) if meta_value_final == []:

dataset_pd.loc[row,'C1: Metatype'] = pd.NaT else:

dataset_pd.loc[row,'C1: Metatype'] = str(meta_value_final).strip('[]')

dataset_pd.loc[row,'C2: Stroming'] = save_category(movement_dict,movements_checkboxes) dataset_pd.loc[row,'C3: Vertegenwoordigers of eigenaars'] = save_category(repr_dict,repr_ch eckboxes)

dataset_pd.loc[row,'C4: Doelen'] = save_category(goals_dict,goals_checkboxes)

dataset_pd.loc[row,'C5: Functionaliteit'] = save_category(functions_dict,functions_checkbox es)

dataset_pd.loc[row,'Social media'] = save_category(SM_dict,SM_checkboxes)

#Laadt de dataset (csv/excel) als Pandas dataframe. Hier wordt een 'openen' window gebruikt def open_dataset():

global dataset_filepath

dataset_filepath = askopenfilename(filetypes=[("Excel file", "*.xlsx"),("Comma separated va lue", "*.csv"), ("All Files", "*.*")])

if not dataset_filepath:

return

93

file_name = (Path(dataset_filepath).stem) global dataset_pd

if dataset_filepath.endswith('.xlsx'):

dataset_pd = pd.read_excel(dataset_filepath) if dataset_filepath.endswith('.csv'):

dataset_pd = pd.read_csv(dataset_filepath,header=0) if 'Unnamed: 0' in dataset_pd.columns:

dataset_pd = dataset_pd.drop('Unnamed: 0',axis=1) #Bepaalde mogelijke transformaties van datatypes.

dataset_pd = dataset_pd.fillna(pd.NaT)

dataset_pd.loc[~dataset_pd['Description (external)'].isna(),'Description (external)'] = dat aset_pd.loc[~dataset_pd['Description (external)'].isna(),'Description (external)'].apply(lambda x: x.replace('\n','').strip('\"'))

dataset_pd['Archived in WCT'] = dataset_pd['Archived in WCT'].apply(float_to_bool) dataset_pd['Online'] = dataset_pd['Online'].apply(float_to_bool)

dataset_pd['Access date'] = dataset_pd['Access date'].apply(transform_date)

dataset_pd['Include in collection'] = dataset_pd['Include in collection'].apply(float_to_bo ol)

dataset_pd['Special interest'] = dataset_pd['Special interest'].apply(float_to_bool) #GUI laadt de bestandsnaam van het geladen bestand zien

lbl_load_dataset['text'] = file_name lbl_load_dataset.config(fg='green')

Current_row_dataset.config(to=(len(dataset_pd)-1)) #Eén row moet 'URL' heten om de links te laden.

if 'URL' in dataset_pd.columns:

URL_column_dataset.delete(0,tk.END)

URL_column_dataset.insert(0,str(list(dataset_pd.columns).index('URL'))) return dataset_pd, URL_column_dataset

#Open de volledgie webcollectie om te crossreferencen of een site al gearchiveerd is.

def open_webcollectie():

filepath = askopenfilename(

filetypes=[("Excel file", "*.xlsx"), ("All Files", "*.*")]

)

if not filepath:

return

global webcollectie_pd

file_name = (Path(filepath).stem)

webcollectie_pd = pd.read_excel(filepath) lbl_load_webcollectie['text'] = file_name lbl_load_webcollectie.config(fg='green') if 'URL' in webcollectie_pd.columns:

URL_column_webcollectie.delete(0,tk.END)

URL_column_webcollectie.insert(0,str(list(webcollectie_pd.columns).index('URL'))) return webcollectie_pd, URL_column_webcollectie

#Laad de gedesigneerde row/site, en crossreferenced met de volledige KB dataset of hij al gearc hiveerd is

def start_read():

if lbl_load_dataset["text"] == 'None loaded':

return else:

current_display_row = int(Current_row_dataset.get()) global current_row

current_row = int(Current_row_dataset.get())-1 if current_display_row > len(dataset_pd):

Current_row_dataset.delete(0,tk.END)

Current_row_dataset.insert(0,len(dataset_pd)) current_row = len(dataset_pd)-1

clear_fields()

dataset_urls = list(dataset_pd.iloc[:,int(URL_column_dataset.get())])

94

global current_url

current_url = dataset_urls[current_row]

lbl_current_URL['text'] = f'Current URL: {current_url}' global save_AiWCT_values

#Check if the full webcollection dataset is loaded and compare if lbl_load_webcollectie["text"] != 'None loaded':

save_AiWCT_values = True global archived_in_WCT archived_in_WCT = '' global WCT_diff_scheme WCT_diff_scheme = ''

webcollectie_urls = list(webcollectie_pd.iloc[:,int(URL_column_webcollectie.get())]) webcollectie_urls_minus_scheme = list(webcollectie_pd.iloc[:,int(URL_column_webcoll ectie.get())].apply(lambda x: x.replace(((urllib.parse.urlparse(x).scheme)),'').replace('://',' ')))

url_minus_scheme = current_url.replace(((urllib.parse.urlparse(current_url).scheme) + '://'),'').strip('/') + '/'

url_scheme = urllib.parse.urlparse(current_url).scheme if url_minus_scheme in webcollectie_urls_minus_scheme:

wc_url_scheme = urllib.parse.urlparse(webcollectie_urls[webcollectie_urls_minus _scheme.index(url_minus_scheme)]).scheme

if url_scheme != wc_url_scheme:

lbl_archived_result['text'] = f'True, except: {wc_url_scheme}' WCT_diff_scheme = wc_url_scheme

lbl_archived_result.config(fg='purple') archived_in_WCT = True

else:

lbl_archived_result['text'] = f'True' lbl_archived_result.config(fg='dark blue') archived_in_WCT = True

else:

lbl_archived_result['text'] = f'False' lbl_archived_result.config(fg='green') archived_in_WCT = False

#Read existing values else:

save_AiWCT_values = False

if not pd.isnull(dataset_pd.loc[current_row,'AiWCT Differing scheme']):

scheme_text = dataset_pd.loc[current_row,'AiWCT Differing scheme']

lbl_archived_result['text'] = f'True, except: {scheme_text}' lbl_archived_result.config(fg='purple')

else:

if dataset_pd.loc[current_row,'Archived in WCT'] == True:

lbl_archived_result['text'] = f'True' lbl_archived_result.config(fg='dark blue') else:

lbl_archived_result['text'] = f'False' lbl_archived_result.config(fg='green') #Auto_fill certain values

fill_fields()

if not entry_URL.get():

entry_URL.insert(0,current_url) if not entry_name.get():

entry_name.insert(0, name_from_url(current_url)) if not entry_Access.get():

entry_Access.insert(0,date.today().strftime("%d/%m/%Y")) auto_open()

#Lees de volgende row/site def read_next():

if int(Current_row_dataset.get()) == len(dataset_pd):

95

return

if chk_auto_save_var.get():

save_values()

row_plus_one = int(Current_row_dataset.get())+1 Current_row_dataset.delete(0,tk.END)

Current_row_dataset.insert(0,str(row_plus_one)) start_read()

#Lees de vorige row/site def read_prev():

if int(Current_row_dataset.get()) == 1:

return

if chk_auto_save_var.get():

save_values()

row_minus_one = int(Current_row_dataset.get())-1 Current_row_dataset.delete(0,tk.END)

Current_row_dataset.insert(0,str(row_minus_one)) start_read()

#Exporteer de Pandas dataframe als csv def export_csv():

try:

new_filename = f'{Path(dataset_filepath).stem}_{datetime.now().strftime("%d-%m-%Y_%H-%M -%S")}.csv'

new_path = Path(dataset_filepath).parent / new_filename dataset_pd.to_csv(new_path,index=False,encoding='utf8') except:

return

window = tk.Tk()

window.title("Webcollector Tool")

window.columnconfigure(0, weight=0, minsize=250) window.columnconfigure(1, weight=0, minsize=150) window.rowconfigure(0, weight=0, minsize=50) window.rowconfigure(1, weight=0, minsize=500) window.rowconfigure(2, weight=0, minsize=80)

#Frame for datasets and buttons

Load_datasets = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) Load_datasets.grid(row=0, column=0,padx=5,sticky="nsew")

# Buttons for dataset

frm_load_dataset = tk.Frame(master=Load_datasets)

frm_load_dataset.pack(side='top',anchor='nw',fill=tk.X,padx=5, pady=5) btn_load_dataset = tk.Button(master=frm_load_dataset, text='Load dataset', width = 14,command=open_dataset)

btn_load_dataset.pack(side=tk.LEFT)

lbl_load_dataset= tk.Label(master=frm_load_dataset, text='None loaded',fg='dark red', padx=5, pady=5)

lbl_load_dataset.pack(side=tk.LEFT)

URL_column_dataset = tk.Spinbox(master=frm_load_dataset, from_=0, to=40, width=3)

URL_column_dataset.pack(side=tk.RIGHT)

# Buttons for the webcollection dataset

frm_load_webcollectie = tk.Frame(master=Load_datasets)

frm_load_webcollectie.pack(side='top',anchor='nw',fill=tk.X,padx=5)

96

btn_load_webcollectie = tk.Button(master=frm_load_webcollectie, text='Load webcollectie',width

= 14,

command=open_webcollectie) btn_load_webcollectie.pack(side=tk.LEFT,pady=(0,5))

lbl_load_webcollectie = tk.Label(master=frm_load_webcollectie, text='None loaded',fg='dark red', padx=5, pady=5)

lbl_load_webcollectie.pack(side=tk.LEFT)

URL_column_webcollectie= tk.Spinbox(master=frm_load_webcollectie, from_=0, to=40, width=3)

URL_column_webcollectie.pack(side=tk.RIGHT)

#Buttons for starting, next and prev

check_frame = tk.Frame(master=Load_datasets)

check_frame.pack(side='top',anchor='nw',fill=tk.X, pady=2)

#check_frame.grid(row=2,column=0,padx=20,sticky="nsew")

btn_start = tk.Button(master=check_frame, text='Load row',command=start_read,width = 19) btn_start.pack(side=tk.LEFT,padx=5)

Current_row_dataset = tk.Spinbox(master=check_frame, from_=1, to=999999,width=6) Current_row_dataset.pack(side=tk.LEFT,padx=5)

btn_prev = tk.Button(master=check_frame, text='← Previous',command=read_prev,width = 9) btn_prev.pack(side=tk.LEFT,padx=5)

btn_next = tk.Button(master=check_frame, text='Next →',command=read_next,width = 9) btn_next.pack(side=tk.LEFT,padx=5)

#Frame for autoopen options

auto_frame = tk.Frame(master=Load_datasets)

auto_frame.pack(side='top',anchor='nw',fill=tk.X,pady=5)

#Initialize driver

btn_init_driver = tk.Button(master=auto_frame, text='Start driver',command=init_driver) btn_init_driver.pack(side='left',padx=(5,0))

#Auto-open

lbl_auto_open_url = tk.Label(master=auto_frame, text='Auto-open URL',font=("Arial", 10)) lbl_auto_open_url.pack(side='left',padx=(4,0))

chk_auto_open_url_var = tk.BooleanVar()

chk_auto_open_url = tk.Checkbutton(master=auto_frame, variable=chk_auto_open_url_var,offvalue=F alse,onvalue=True)

chk_auto_open_url.pack(side='left',padx=(0,19))

#Save button

btn_save = tk.Button(master=auto_frame, text='Save values',command=save_values) btn_save.pack(side='left')

#Auto-save

lbl_auto_save = tk.Label(master=auto_frame, text='Auto save',font=("Arial", 10)) lbl_auto_save.pack(side='left',padx=(4,0))

chk_auto_save_var = tk.BooleanVar() chk_auto_save_var.set(True)

chk_auto_save = tk.Checkbutton(master=auto_frame, variable=chk_auto_save_var,offvalue=False,onv alue=True)

chk_auto_save.pack(side='left')

#Frame for the right column

frame_textboxes = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) frame_textboxes.grid(row=1,rowspan=2, column=0,padx=5,sticky='nsew')

97

#Frame for autogenerated values

frame_statics = tk.Frame(master=frame_textboxes, relief=tk.SUNKEN,borderwidth=3) frame_statics.pack(side='top',anchor='nw',fill=tk.X,pady=2)

#Current_URL

lbl_current_URL = tk.Label(master=frame_statics, text='Current URL:',font=("Arial", 10), padx=5, pady=5)

lbl_current_URL.pack(side='top',anchor='nw')

#Archived?

frame_archived = tk.Frame(master=frame_statics) frame_archived.pack(side='top',anchor='nw')

lbl_archived = tk.Label(master=frame_archived, text='Archived in WCT:',font=("Arial", 10), padx=5,pady=5)

lbl_archived_result = tk.Label(master=frame_archived, text=' ',font=("Arial", 10)) lbl_archived.pack(side='left')

lbl_archived_result.pack(side='left')

#Naam

lbl_name = tk.Label(master=frame_textboxes, text='1. Name',font=("Arial", 10), padx=10, pady=5)

lbl_name.pack(side='top',anchor='nw')

entry_name = tk.Entry(master=frame_textboxes,width=57) entry_name.pack(side='top',anchor='nw',padx=10, pady=2)

#URL

lbl_URL = tk.Label(master=frame_textboxes, text='2. URL',font=("Arial", 10), padx=10, pady=2)

lbl_URL.pack(side='top',anchor='nw')

entry_URL = tk.Entry(master=frame_textboxes,width=57) entry_URL.pack(side='top',anchor='nw',padx=10, pady=3)

#Frame for dates

frame_dates = tk.Frame(master=frame_textboxes) frame_dates.pack(side='top',anchor='nw')

#Online?

frame_online = tk.Frame(master=frame_dates) frame_online.pack(side='left')

lbl_online = tk.Label(master=frame_online, text='3. Online',font=("Arial", 10), padx=4, pady=2)

lbl_online.pack(side='top',anchor='nw') chk_online_var = tk.BooleanVar()

chk_online = tk.Checkbutton(master=frame_online, variable=chk_online_var,offvalue=False,onvalue

=True)

chk_online.pack(side='top',anchor='center',padx=4, pady=3)

#Access_datum

frame_access = tk.Frame(master=frame_dates) frame_access.pack(side='left')

lbl_Access = tk.Label(master=frame_access, text='4. Access date',font=("Arial", 10), padx=4, pady=2)

lbl_Access.pack(side='top',anchor='nw')

entry_Access = tk.Entry(master=frame_access,width=14) entry_Access.pack(side='top',anchor='nw',padx=4, pady=3)

#Language

frame_lang = tk.Frame(master=frame_dates)

98

frame_lang.pack(side='left')

lbl_lang = tk.Label(master=frame_lang, text='5. Language(s) [ , as div]',font=("Arial", 10), padx=4, pady=2)

lbl_lang.pack(side='top',anchor='nw')

entry_lang = tk.Entry(master=frame_lang,width=30) entry_lang.pack(side='top',anchor='nw',padx=4, pady=3)

#Slogan

frame_slogan = tk.Frame(master=frame_textboxes) frame_slogan.pack(side='top',anchor='nw')

lbl_slogan = tk.Label(master=frame_slogan, text='6. Slogan',font=("Arial", 10)) lbl_slogan.pack(side='left',padx=(10,12), pady=5)

entry_slogan = tk.Entry(master=frame_slogan,width=44) entry_slogan.pack(side='left', pady=3)

#Description (extern)

lbl_description_extern = tk.Label(master=frame_textboxes, text='7. Description (external)',font

=("Arial", 10),

padx=10, pady=2)

lbl_description_extern.pack(side='top',anchor='nw')

text_description_extern = tk.Text(master=frame_textboxes,width=43,height=5) text_description_extern.pack(side='top',anchor='nw',padx=10, pady=3)

#Comments (intern)

lbl_description_intern = tk.Label(master=frame_textboxes, text='8. Comments (internal)',font=("

Arial", 10),

padx=10, pady=2)

lbl_description_intern.pack(side='top',anchor='nw')

text_description_intern = tk.Text(master=frame_textboxes,width=43,height=7) text_description_intern.pack(side='top',anchor='nw',padx=10, pady=3)

#Location frame

frame_location = tk.Frame(master=frame_textboxes)

frame_location.pack(side='top',anchor='nw',padx=10, pady=3)

#Provincie

frame_province = tk.Frame(master=frame_location) frame_province.pack(side='left',padx=0, pady=3) provincie_var = tk.StringVar()

provincies = [None,'Drenthe','Flevoland','Friesland','Gelderland','Groningen', 'Limburg','Noord-Brabant','Noord-Holland','Overijssel','Utrecht', 'Zeeland','Zuid-Holland']

provincie_var.set(None)

lbl_provincie = tk.Label(master=frame_province, text='9. Province',font=("Arial", 10), pady=2)

lbl_provincie.pack(side='top',anchor='nw')

option_province = tk.OptionMenu(frame_province, provincie_var, *provincies) option_province.config(width=14,direction='above')

option_province.pack(side='top',anchor='nw')

#Locatie

frame_locatie = tk.Frame(master=frame_location) frame_locatie.pack(side='left',padx=5, pady=3)

lbl_locatie = tk.Label(master=frame_locatie, text='10. Location',font=("Arial", 10), padx=15, pady=2)

lbl_locatie.pack(side='top',anchor='nw')

entry_locatie = tk.Entry(master=frame_locatie,width=30) entry_locatie.pack(side='top',anchor='nw',padx=15, pady=3)

99

#Frame for email and export button

frame_email_export = tk.Frame(master=frame_textboxes)

#Email adres

frame_email = tk.Frame(master=frame_email_export)

lbl_Email = tk.Label(master=frame_email, text='11. Email address',font=("Arial", 10), padx=10, pady=2)

lbl_Email.pack(side='top',anchor='nw')

entry_Email = tk.Entry(master=frame_email,width=30) entry_Email.pack(side='top',anchor='nw',padx=10, pady=2) frame_email.pack(side='left')

#Export button

frame_export = tk.Frame(master=frame_email_export)

btn_export = tk.Button(master=frame_export, text='Export CSV',command=export_csv,height = 2, wi dth = 19)

button_font = tkFont.Font(size=10) btn_export['font'] = button_font

btn_export.pack(side='top',anchor='nw',padx=(15,0)) frame_export.pack(side='left')

frame_email_export.pack(side='top',anchor='nw')

####The categories frame

frame_categories = tk.Frame(master=window, relief=tk.SUNKEN, borderwidth=5) frame_categories.grid(row=0,rowspan=2, column=1, pady=5,sticky="ns")

#1_Meta_category

lbl_1_meta = tk.Label(master=frame_categories, text='15. Category 1: Meta-type',font=("Arial", 10))

lbl_1_meta.pack(side='top',anchor='nw',padx=5,pady=5) frame_1_meta = tk.Frame(master=frame_categories) frame_1_meta.pack(side='top',anchor='nw',padx=(30,0)) meta_types = ['Religieus','Spiritueel','Seculier']

meta_types_values_list = []

for value in meta_types:

lbl_check = tk.Label(master=frame_1_meta, text=value,font=("Arial", 10)) lbl_check.pack(side='left',anchor='nw')

meta_var = tk.BooleanVar()

chk_meta = tk.Checkbutton(master=frame_1_meta, variable=meta_var,offvalue=False,onvalue=Tru e)

chk_meta.pack(side='left')

meta_types_values_list.append([meta_var,value])

#Tab section for checkboxes

Category_tabs = ttk.Notebook(master=frame_categories)

Tab_C2_Movement = ttk.Frame(master=Category_tabs)

Tab_C3_Representatives = ttk.Frame(master=Category_tabs) Tab_C4_Goals = ttk.Frame(master=Category_tabs)

Tab_C5_Functions = ttk.Frame(master=Category_tabs) Tab_C6_SM = ttk.Frame(master=Category_tabs)

Category_tabs.add(Tab_C2_Movement, text='16. Movement') Category_tabs.add(Tab_C3_Representatives, text='17. Repr.') Category_tabs.add(Tab_C4_Goals, text='18. Goals')

Category_tabs.add(Tab_C5_Functions, text='19. Funct.')

100

Category_tabs.add(Tab_C6_SM, text='20. SM')

Category_tabs.pack(expand=1,fill="both",side='top',anchor='nw')

#Tab 1 movement

Movement_canvas = tk.Canvas(master=Tab_C2_Movement)

Movement_scrollbar = ttk.Scrollbar(Tab_C2_Movement, orient="vertical", command=Movement_canvas.

yview)

Movement_scrollbar_frame = ttk.Frame(Movement_canvas)

Movement_scrollbar_frame.bind("<Configure>",lambda e: Movement_canvas.configure(

scrollregion=Movement_canvas.bbox("all")))

Movement_canvas.create_window((0, 0), window=Movement_scrollbar_frame, anchor="nw") Movement_canvas.configure(yscrollcommand=Movement_scrollbar.set)

#Create movement categories

movement_dict, movements_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarc hivering_Project/Notebooks_Codes/Source files/cat2_stromingen.txt', Movement_scrollbar_frame) for check_set in movements_checkboxes:

check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], movements_c heckboxes))

Movement_canvas.pack(side="left", fill="both") Movement_scrollbar.pack(side="right", fill="y")

#Tab 2 representatives

Repr_canvas = tk.Canvas(master=Tab_C3_Representatives)

Repr_scrollbar = ttk.Scrollbar(Tab_C3_Representatives, orient="vertical", command=Repr_canvas.y view)

Repr_scrollbar_frame = ttk.Frame(Repr_canvas)

Repr_scrollbar_frame.bind("<Configure>",lambda e: Repr_canvas.configure(

scrollregion=Repr_canvas.bbox("all")))

Repr_canvas.create_window((0, 0), window=Repr_scrollbar_frame, anchor="nw") Repr_canvas.configure(yscrollcommand=Repr_scrollbar.set)

#Create Repr categories

repr_dict, repr_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchivering_

Project/Notebooks_Codes/Source files/cat3_vertegenwoordigers.txt', Repr_scrollbar_frame) for check_set in repr_checkboxes:

check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], repr_checkb oxes))

Repr_canvas.pack(side="left", fill="both") Repr_scrollbar.pack(side="right", fill="y")

#Tab 3 goals

Goals_canvas = tk.Canvas(master=Tab_C4_Goals)

Goals_scrollbar = ttk.Scrollbar(Tab_C4_Goals, orient="vertical", command=Goals_canvas.yview) Goals_scrollbar_frame = ttk.Frame(Goals_canvas)

Goals_scrollbar_frame.bind("<Configure>",lambda e: Goals_canvas.configure(

scrollregion=Goals_canvas.bbox("all")))

Goals_canvas.create_window((0, 0), window=Goals_scrollbar_frame, anchor="nw") Goals_canvas.configure(yscrollcommand=Goals_scrollbar.set)

101

#Create goals categories

goals_dict, goals_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchiverin g_Project/Notebooks_Codes/Source files/cat4_doelen.txt', Goals_scrollbar_frame)

for check_set in goals_checkboxes:

check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], goals_check boxes))

Goals_canvas.pack(side="left", fill="both") Goals_scrollbar.pack(side="right", fill="y")

#Tab 4 functions

Functions_canvas = tk.Canvas(master=Tab_C5_Functions)

Functions_scrollbar = ttk.Scrollbar(Tab_C5_Functions, orient="vertical", command=Functions_canv as.yview)

Functions_scrollbar_frame = ttk.Frame(Functions_canvas)

Functions_scrollbar_frame.bind("<Configure>",lambda e: Functions_canvas.configure(

scrollregion=Functions_canvas.bbox("all")))

Functions_canvas.create_window((0, 0), window=Functions_scrollbar_frame, anchor="nw") Functions_canvas.configure(yscrollcommand=Functions_scrollbar.set)

#Create Functions categories

functions_dict, functions_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webar chivering_Project/Notebooks_Codes/Source files/cat5_functies.txt', Functions_scrollbar_frame) for check_set in functions_checkboxes:

check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], functions_c heckboxes))

Functions_canvas.pack(side="left", fill="both") Functions_scrollbar.pack(side="right", fill="y")

#Tab 5 social media

SM_canvas = tk.Canvas(master=Tab_C6_SM)

SM_scrollbar = ttk.Scrollbar(Tab_C6_SM, orient="vertical", command=SM_canvas.yview) SM_scrollbar_frame = ttk.Frame(SM_canvas)

SM_scrollbar_frame.bind("<Configure>",lambda e: SM_canvas.configure(

scrollregion=SM_canvas.bbox("all")))

SM_canvas.create_window((0, 0), window=SM_scrollbar_frame, anchor="nw") SM_canvas.configure(yscrollcommand=SM_scrollbar.set)

#Create SM categories

SM_dict, SM_checkboxes = create_category_list('C:/Users/Gebruiker/Documents/Webarchivering_Proj ect/Notebooks_Codes/Source files/cat6_social_media.txt', SM_scrollbar_frame)

for check_set in SM_checkboxes:

check_set[2].configure(command=partial(de_check_parent_kid_boxes, check_set[0], SM_checkbox es))

SM_canvas.pack(side="left", fill="both") SM_scrollbar.pack(side="right", fill="y")

# Section for selection and reason

frame_selection = tk.Frame(master=window, relief=tk.FLAT, borderwidth=5) frame_selection.grid(row=2, column=1,padx=5, pady=2,sticky="nsew")

102

#Frame for selection boxes

frame_selection_boxes = tk.Frame(master=frame_selection) frame_selection_boxes.pack(side='top',anchor='nw')

#Select?

frame_selected = tk.Frame(master=frame_selection_boxes) frame_selected.pack(side='left',padx=2)

lbl_selected = tk.Label(master=frame_selected, text='12. Include in collection',font=("Arial", 10),

padx=4) lbl_selected.pack(side='left') chk_selected_var = tk.BooleanVar()

chk_selected = tk.Checkbutton(master=frame_selected, variable=chk_selected_var,offvalue=False,o nvalue=True)

chk_selected.pack(side='left',padx=2)

#Noteworthy?

frame_noteworthy = tk.Frame(master=frame_selection_boxes) frame_noteworthy.pack(side='left',padx=2)

lbl_noteworthy = tk.Label(master=frame_noteworthy, text='13. Special interest',font=("Arial", 1 0),

padx=2) lbl_noteworthy.pack(side='left') chk_noteworthy_var = tk.BooleanVar()

chk_noteworthy = tk.Checkbutton(master=frame_noteworthy, variable=chk_noteworthy_var,offvalue=F alse,onvalue=True)

chk_noteworthy.pack(side='left',padx=2)

#Reason for selection

lbl_selection_reason = tk.Label(master=frame_selection, text='14. Reason for selection / reject ion',font=("Arial", 10),

padx=5, pady=2)

lbl_selection_reason.pack(side='top',anchor='nw')

text_selection_reason = tk.Text(master=frame_selection,width=41,height=3) text_selection_reason.pack(side='top',anchor='nw',padx=15, pady=2)

window.mainloop()