• No results found

Prerequisites

Import relevant libraries

In [ ]:

Mount drive

In [ ]:

Prepare macro data using JST

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Create master country data

import pandas as pd import numpy as np import datetime as dt

import matplotlib.pyplot as plt import os

import statsmodels.formula.api as smf

%cd "C:\Users\Calvin\iCloudDrive\Documents\Finance\Master's Thesis\DATA"

# Read data

jst = pd.read_excel("JSTdatasetR5.xlsx", sheet_name="Data")

# Filter variables

jst = jst[["year", "iso","rgdpmad", "cpi", "iy", "tloans", "expenditure", "revenue", "gdp", "exports", "imports"]]

# Restrict data to 1985 and later jst = jst[jst['year']>1984]

# Calculate log returns

jst['gdpgrowth'] = jst.groupby("iso")['rgdpmad'].apply(lambda x: np.log(x) - np.log(x.shift()))*100

# Calculate economic openness according to Bae et al. (2021) jst['openness'] = (jst['exports'] + jst['imports'])/jst['gdp']

# Calculate government expenditures by gdp jst['exp_gdp'] = jst['expenditure']/jst['gdp']

# Calculate domestic credit by gdp

jst['credit_gdp'] = jst['tloans']/jst['gdp']

# Replace missing values with NaN jst = jst.replace('..', np.nan)

# Change variable name jst['loc']=jst['iso']

# Drop unnecessary variables

jst = jst[['year','loc','rgdpmad', 'iy', 'credit_gdp', 'exp_gdp', 'openness', 'gdpgrowth']]

#jst = jst[jst['loc']!='GBR']

# Set multiindex

jst = jst.set_index(['loc','year'])

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Concentration data

In [ ]:

US stock data

def end_year_data(df):

"""

Convert DataFrame to end of year data args:

df(DataFrame): DataFrame that needs to be converted returns:

df_price (DataFrame): DataFrame with only end-of-year values """

df = df.dropna()

df['date'] = pd.to_datetime(df['datadate'].astype(str), format= '%Y%m%d') df['year'] = df['date'].dt.year

df['month_year'] = df['date'].dt.to_period('M')

df_price = pd.DataFrame(df.groupby(['gvkey','year']).last()) return df_price

# Loop through files and append to list

for i in ['JPN','AUS','DEU','FRA', 'CHE', 'NLD','BEL', 'NOR', 'ESP','DNK','FIN','ITA','PRT','SWE','IRL','NA']

file_name = "NA{}.csv"

df_list = []

for i in range(1,9):

df_list.append(pd.read_csv(file_name.format(i), low_memory=False))

# Create list of DataFrames df_list2 = []

for i in df_list:

df_list2.append(end_year_data(i))

# Concatenate country returns df_uk = pd.concat(df_list2)

countries = ['USA','Canada','Sweden','Portugal','Ireland','Italy','Finland','Denmark','Belgium','Spain','Switzerland','Aus file_name = "{}.csv"

df_list = []

for i in countries:

df_list.append(pd.read_csv(file_name.format(i), low_memory=False)) countries_data = pd.concat(df_list)

countries_data = countries_data.set_index('gvkey') countries_data.to_csv('Master.csv')

# Lists to use

countries = ['USA','GBR','DEU','FIN','IRL','FRA','CAN','NLD','ESP','ITA','NOR','SWE','CHE','AUS','JPN','BEL','DNK','PRT']

list_tech_oregon = ['3570', '3571', '3572','3575','3576','3577','3578','3579','3600','3612','3613','3620','3621','3630','3 list_tech_census = ['2833', '2834','2835','2836','3570','3571','3572','3575','3577','3578','3579','3661','3663','3669','36

In [ ]:

Concentration top 20 In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Concentration top 15 In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Concentration 10 In [ ]:

In [ ]:

# Read file

US_stock_data = pd.read_csv('US stock data.csv', low_memory=False)

# Format date

US_stock_data['date'] = pd.to_datetime(US_stock_data['date'], format="%Y%m%d")

# Get year and month

US_stock_data['year'] = US_stock_data['date'].dt.year US_stock_data['month'] = US_stock_data['date'].dt.month

# Drop negative stock values

US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.PRC<0].index)

# Drop missing data

US_stock_data = US_stock_data.dropna()

# Calculate market cap per firm per index

US_stock_data['Market_cap'] = US_stock_data['SHROUT']*US_stock_data['PRC']

# Keep end year values

US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.month!=12].index)

# Drop dates before 1962

US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.year<1985].index)

# Set index to year

US_stock_data= US_stock_data.set_index('year')

# Assign rank to largest 5 firms for the year

US_stock_data['rank'] = US_stock_data.groupby('year')['Market_cap'].rank(method='max', ascending = False) US_stock_data['total_cap'] = US_stock_data.groupby('year')['Market_cap'].sum()

US_stock_data['loc'] = 'USA'

# Filter top 20

market_20 = US_stock_data[US_stock_data['rank']<21]

# Calculate total market concentration

total_cap2 = pd.DataFrame(US_stock_data.groupby(['loc','year'])['Market_cap'].sum()) total_cap2.columns=['total_cap']

market_20['tech'] = market_20.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)

market_20['market_tech_20_sum'] = market_20.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_20['market_non_tech_20_sum'] = market_20.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())

# Calculate total market capitalization of top 20 tech firms

total_cap2['market_tech_20_sum'] = market_20.groupby(['loc','year'])['market_tech_20_sum'].mean()

# Calculate total market capitalization of top 20 non-tech firms

total_cap2['market_non_tech_20_sum'] = market_20.groupby(['loc','year'])['market_non_tech_20_sum'].mean()

# Filter top 15

market_15 = US_stock_data[US_stock_data['rank']<16]

market_15['tech'] = market_15.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)

market_15['market_tech_15_sum'] = market_15.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_15['market_non_tech_15_sum'] = market_15.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())

# Calculate total market capitalization of top 15 tech firms

total_cap2['market_tech_15_sum'] = market_15.groupby(['loc','year'])['market_tech_15_sum'].mean()

# Calculate total market capitalization of top 15 non-tech firms

total_cap2['market_non_tech_15_sum'] = market_15.groupby(['loc','year'])['market_non_tech_15_sum'].mean() total_cap2

total_cap2.rename(columns={'total_cap':'market_cap'}, inplace=True)

# Filter top 10

market_10 = US_stock_data[US_stock_data['rank']<11]

market_10['tech'] = market_10.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)

market_10['market_tech_10_sum'] = market_10.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_10['market_non_tech_10_sum'] = market_10.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())

In [ ]:

In [ ]:

Concentration 5 In [ ]:

In [ ]:

In [ ]:

In [ ]:

Non-US stock data

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Create regression dataset In [ ]:

Market concentration for 20 firms In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

# Calculate total market capitalization of top 10 tech firms

total_cap2['market_tech_10_sum'] = market_10.groupby(['loc','year'])['market_tech_10_sum'].mean()

# Calculate total market capitalization of top 10 non-tech firms

total_cap2['market_non_tech_10_sum'] = market_10.groupby(['loc','year'])['market_non_tech_10_sum'].mean()

# Filter top 15

market_5 = US_stock_data[US_stock_data['rank']<6]

market_5['tech'] = market_5.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)

market_5['market_tech_5_sum'] = market_5.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_5['market_non_tech_5_sum'] = market_5.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())

# Calculate total market capitalization of top 15 tech firms

total_cap2['market_tech_5_sum'] = market_5.groupby(['loc','year'])['market_tech_5_sum'].mean()

# Calculate total market capitalization of top 15 non-tech firms

total_cap2['market_non_tech_5_sum'] = market_5.groupby(['loc','year'])['market_non_tech_5_sum'].mean()

master = pd.read_csv("Master.csv")

# Calculate market cap of company each year in each country master['market_cap'] = master['cshoc']*master['prccd']

# Assign ranks per year per country to companies

master['rank'] = master.groupby(['year','loc'])['market_cap'].rank(method='max', ascending = False)

# Domestic firms only

master = master[master['loc'].isin(countries)]

# Exclude 2022

master = master[master['year']!=2022]

# Make dataframe of total market cap by year and country

total_cap=pd.DataFrame(master.groupby(['loc','year'])['market_cap'].sum())

# Make DataFrame of largest five companies

master_20 = master.drop(master[master['rank']>20].index) master_20

master_20['sic']=master_20['sic'].astype(int) master_20['sic']=master_20['sic'].astype(str)

master_20['tech'] = master_20.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_20

list_master = []

for i in countries:

list_master.append(master_20[master_20['loc']==i])

def market_con_maker(df):

df = df.set_index('year')

df['market_tech_20_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_20_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df

market = []

In [ ]:

Market concentration top 15 In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Concentration top 10 In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Concentration top 5 In [ ]:

In [ ]:

df_market

# Make DataFrame of largest 15 companies

master_15 = master.drop(master[master['rank']>15].index) master_15

master_15['sic']=master_15['sic'].astype(int) master_15['sic']=master_15['sic'].astype(str)

master_15['tech'] = master_15.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_15

list_master = []

for i in countries:

list_master.append(master_15[master_15['loc']==i])

def market_con_maker_15(df):

df = df.set_index('year')

df['market_tech_15_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_15_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df

market_15 = []

for i in list_master:

market_15.append(market_con_maker_15(i)) df_market_15 = pd.DataFrame()

df_market_15 = pd.concat(market_15) df_market_15

# Make DataFrame of largest 10 companies

master_10 = master.drop(master[master['rank']>10].index) master_10

master_10['sic']=master_10['sic'].astype(int) master_10['sic']=master_10['sic'].astype(str)

master_10['tech'] = master_10.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_15

list_master = []

for i in countries:

list_master.append(master_10[master_10['loc']==i])

def market_con_maker_10(df):

df = df.set_index('year')

df['market_tech_10_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_10_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df

market_10 = []

for i in list_master:

market_10.append(market_con_maker_10(i)) df_market_10 = pd.DataFrame()

df_market_10 = pd.concat(market_10)

# Make DataFrame of largest 5 companies

master_5 = master.drop(master[master['rank']>5].index) master_5

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Final dataset In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

master_5['sic']=master_5['sic'].astype(int) master_5['sic']=master_5['sic'].astype(str)

master_5['tech'] = master_5.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_5

list_master = []

for i in countries:

list_master.append(master_5[master_5['loc']==i])

def market_con_maker_5(df):

df = df.set_index('year')

df['market_tech_5_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_5_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df

market_5 = []

for i in list_master:

market_5.append(market_con_maker_5(i)) df_market_5 = pd.DataFrame()

df_market_5 = pd.concat(market_5)

total_cap['market_tech_20_sum'] = df_market.groupby(['loc','year'])['market_tech_20_sum'].mean()

total_cap['market_non_tech_20_sum'] = df_market.groupby(['loc','year'])['market_non_tech_20_sum'].mean() total_cap['market_tech_15_sum'] = df_market_15.groupby(['loc','year'])['market_tech_15_sum'].mean()

total_cap['market_non_tech_15_sum'] = df_market_15.groupby(['loc','year'])['market_non_tech_15_sum'].mean() total_cap['market_tech_10_sum'] = df_market_10.groupby(['loc','year'])['market_tech_10_sum'].mean()

total_cap['market_non_tech_10_sum'] = df_market_10.groupby(['loc','year'])['market_non_tech_10_sum'].mean() total_cap['market_tech_5_sum'] = df_market_5.groupby(['loc','year'])['market_tech_5_sum'].mean()

total_cap['market_non_tech_5_sum'] = df_market_5.groupby(['loc','year'])['market_non_tech_5_sum'].mean()

# Remove US

total_cap.drop('USA', inplace=True)

# Add US

total_cap = total_cap.append(total_cap2)

total_cap['market_cap_20'] = total_cap[['market_non_tech_20_sum','market_tech_20_sum']].sum(axis=1) total_cap['market_con_tech_20'] = total_cap['market_tech_20_sum']/total_cap['market_cap']

total_cap['market_con_non_tech_20'] = total_cap['market_non_tech_20_sum']/total_cap['market_cap']

total_cap['market_con_20'] = total_cap['market_cap_20']/total_cap['market_cap']

total_cap['market_cap_15'] = total_cap[['market_non_tech_15_sum','market_tech_15_sum']].sum(axis=1) total_cap['market_con_tech_15'] = total_cap['market_tech_15_sum']/total_cap['market_cap']

total_cap['market_con_non_tech_15'] = total_cap['market_non_tech_15_sum']/total_cap['market_cap']

total_cap['market_con_15'] = total_cap['market_cap_15']/total_cap['market_cap']

total_cap['market_cap_10'] = total_cap[['market_non_tech_10_sum','market_tech_10_sum']].sum(axis=1) total_cap['market_con_tech_10'] = total_cap['market_tech_10_sum']/total_cap['market_cap']

total_cap['market_con_non_tech_10'] = total_cap['market_non_tech_10_sum']/total_cap['market_cap']

total_cap['market_con_10'] = total_cap['market_cap_10']/total_cap['market_cap']

total_cap['market_cap_5'] = total_cap[['market_non_tech_5_sum','market_tech_5_sum']].sum(axis=1) total_cap['market_con_tech_5'] = total_cap['market_tech_5_sum']/total_cap['market_cap']

total_cap['market_con_non_tech_5'] = total_cap['market_non_tech_5_sum']/total_cap['market_cap']

total_cap['market_con_5'] = total_cap['market_cap_5']/total_cap['market_cap']

total_cap

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Turnover data

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Merge datasets

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

inflation = pd.read_csv("Inflation rates.csv") inflation = inflation[['LOCATION', 'TIME', 'Value']]

inflation['TIME'] = pd.to_datetime(inflation['TIME'], format= '%Y') inflation['TIME'] = inflation['TIME'].dt.year

inflation['year'] = inflation['TIME']

inflation['loc'] = inflation['LOCATION']

inflation = inflation[['loc','year','Value']]

#inflation = inflation[inflation['loc']!='GBR']

inflation = inflation.set_index(['loc','year']) inflation

turnover = pd.read_excel("Turnover.xlsx")

#turnover = turnover[turnover['loc']!='GBR']

turnover = turnover.set_index(['loc','year']) turnover = turnover.replace('..', np.nan) turnover

# Add market concentration to macro data

df_dataset = pd.concat([jst, total_cap, inflation, turnover], axis = 1) df_dataset.to_csv('final_dataset.csv')

df_dataset = pd.read_csv('final_dataset.csv') df_dataset = df_dataset.set_index(['loc','year'])

# Create lag variables

df_dataset['market_con_tech_20_lag'] = df_dataset.groupby('loc')['market_con_tech_20'].shift(5) df_dataset['credit_lag'] = df_dataset.groupby('loc')['credit_gdp'].shift(5)

df_dataset['turnover_lag'] = df_dataset.groupby('loc')['turnover'].shift(5)

df_dataset['market_con_non_tech_20_lag'] = df_dataset.groupby('loc')['market_con_non_tech_20'].shift(5) df_dataset['market_cap_lag'] = df_dataset.groupby('loc')['market_cap_gdp'].shift(5)

df_dataset['market_cap_lag'] = df_dataset.market_cap_lag.astype(float)

df_dataset['market_con_20_lag'] = df_dataset.groupby('loc')['market_con_20'].shift(5)

df_dataset['market_con_non_tech_15_lag'] = df_dataset.groupby('loc')['market_con_non_tech_15'].shift(5) df_dataset['market_con_tech_15_lag'] = df_dataset.groupby('loc')['market_con_tech_15'].shift(5)

df_dataset['market_con_15_lag'] = df_dataset.groupby('loc')['market_con_15'].shift(5)

df_dataset['market_con_non_tech_10_lag'] = df_dataset.groupby('loc')['market_con_non_tech_10'].shift(5) df_dataset['market_con_tech_10_lag'] = df_dataset.groupby('loc')['market_con_tech_10'].shift(5)

df_dataset['market_con_10_lag'] = df_dataset.groupby('loc')['market_con_10'].shift(5)

df_dataset['market_con_non_tech_5_lag'] = df_dataset.groupby('loc')['market_con_non_tech_5'].shift(5) df_dataset['market_con_tech_5_lag'] = df_dataset.groupby('loc')['market_con_tech_5'].shift(5)

df_dataset['market_con_5_lag'] = df_dataset.groupby('loc')['market_con_5'].shift(5)

# Rename inflation variable

df_dataset = df_dataset.rename(columns={'Value':'inflation'})

# Filter variables

df_dataset2 = df_dataset[['market_con_tech_20_lag', 'market_con_non_tech_20_lag','market_con_20_lag','market_con_tech_15_l

# Turn percentages into normal numbers

df_dataset2['gdpgrowth'] = df_dataset2['gdpgrowth']/100 df_dataset2['turnover_lag'] = df_dataset2['turnover_lag']/100 df_dataset2['inflation'] = df_dataset2['inflation']/100

df_dataset2['market_cap_lag'] = df_dataset2['market_cap_lag']/100

Descriptive statistics

Summary statistics

In [ ]:

In [ ]:

In [ ]:

In [ ]:

Graphs

In [ ]:

In [ ]:

Figure 1a In [ ]:

Figure 2a In [ ]:

Figure 2b In [ ]:

df_dataset.groupby('loc')[['market_con_tech_10', 'market_con_non_tech_10','market_con_10','market_con_tech_5','market_con_

# Turn percentages into normal numbers

df_dataset['gdpgrowth'] = df_dataset['gdpgrowth']/100 df_dataset['turnover_lag'] = df_dataset['turnover_lag']/100 df_dataset['inflation'] = df_dataset['inflation']/100

df_dataset['market_cap_lag'] = df_dataset['market_cap_lag']/100

df_dataset2.groupby('loc')['market_cap_lag','gdpgrowth','iy','credit_lag','openness','exp_gdp','turnover_lag','inflation']

df_dataset3 = df_dataset2[['market_con_tech_5_lag', 'market_con_non_tech_5_lag','market_con_5_lag','market_con_tech_10_la

# Average market concentration picture = df_dataset.reset_index()

picture['mktcontech10'] = picture['market_con_tech_10']

picture['mktconnontech10'] = picture['market_con_non_tech_10']

mconmean=pd.DataFrame(picture.groupby('year')['market_con_tech_10'].mean()) mconmean['non_tech10']=picture.groupby('year')['market_con_non_tech_10'].mean() mconmean['tech5']=picture.groupby('year')['market_con_tech_5'].mean()

mconmean['non_tech5']=picture.groupby('year')['market_con_non_tech_5'].mean() mconmean['all5']=picture.groupby('year')['market_con_5'].mean()

mconmean['all10']=picture.groupby('year')['market_con_10'].mean() picture

figure = plt.figure(figsize=(8,6))

plt.plot(mconmean['market_con_tech_10'], label='Tech firms in top 10') plt.plot(mconmean['non_tech10'], label='Non-tech firms in top 10') plt.plot(mconmean['all10'], label='Top 10 firms')

plt.plot(mconmean['tech5'], label='Tech firms in top 5')

plt.plot(mconmean['non_tech5'], label='Non-tech firms in top 5') plt.plot(mconmean['all5'], label='Top 5 firms')

plt.xlabel('Year')

plt.ylabel(r'Market concentration$_{t-5}$') plt.legend()

plt.savefig('averagemktcon.jpg') plt.show()

figure =plt.figure(figsize=(8,6)) for i in countries:

plt.plot(picture[picture['loc']==i][['year','market_con_tech_5']].tail(35).set_index('year'),label=i) plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1))

plt.xlabel('Year')

plt.ylabel('Market concentration of top 5 firms at t-5') plt.savefig('tech5.jpg',replace=True)

plt.show()

figure =plt.figure(figsize=(8,6)) for i in countries:

plt.plot(picture[picture['loc']==i][['year','market_con_non_tech_5']].tail(34).set_index('year'),label=i) plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1))

plt.xlabel('Year')

Figure 3a In [ ]:

Figure 3b In [ ]:

Correlations In [ ]:

In [ ]:

In [ ]:

GERELATEERDE DOCUMENTEN