Prerequisites
Import relevant libraries
In [ ]:
Mount drive
In [ ]:
Prepare macro data using JST
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Create master country data
import pandas as pd import numpy as np import datetime as dt
import matplotlib.pyplot as plt import os
import statsmodels.formula.api as smf
%cd "C:\Users\Calvin\iCloudDrive\Documents\Finance\Master's Thesis\DATA"
# Read data
jst = pd.read_excel("JSTdatasetR5.xlsx", sheet_name="Data")
# Filter variables
jst = jst[["year", "iso","rgdpmad", "cpi", "iy", "tloans", "expenditure", "revenue", "gdp", "exports", "imports"]]
# Restrict data to 1985 and later jst = jst[jst['year']>1984]
# Calculate log returns
jst['gdpgrowth'] = jst.groupby("iso")['rgdpmad'].apply(lambda x: np.log(x) - np.log(x.shift()))*100
# Calculate economic openness according to Bae et al. (2021) jst['openness'] = (jst['exports'] + jst['imports'])/jst['gdp']
# Calculate government expenditures by gdp jst['exp_gdp'] = jst['expenditure']/jst['gdp']
# Calculate domestic credit by gdp
jst['credit_gdp'] = jst['tloans']/jst['gdp']
# Replace missing values with NaN jst = jst.replace('..', np.nan)
# Change variable name jst['loc']=jst['iso']
# Drop unnecessary variables
jst = jst[['year','loc','rgdpmad', 'iy', 'credit_gdp', 'exp_gdp', 'openness', 'gdpgrowth']]
#jst = jst[jst['loc']!='GBR']
# Set multiindex
jst = jst.set_index(['loc','year'])
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Concentration data
In [ ]:
US stock data
def end_year_data(df):
"""
Convert DataFrame to end of year data args:
df(DataFrame): DataFrame that needs to be converted returns:
df_price (DataFrame): DataFrame with only end-of-year values """
df = df.dropna()
df['date'] = pd.to_datetime(df['datadate'].astype(str), format= '%Y%m%d') df['year'] = df['date'].dt.year
df['month_year'] = df['date'].dt.to_period('M')
df_price = pd.DataFrame(df.groupby(['gvkey','year']).last()) return df_price
# Loop through files and append to list
for i in ['JPN','AUS','DEU','FRA', 'CHE', 'NLD','BEL', 'NOR', 'ESP','DNK','FIN','ITA','PRT','SWE','IRL','NA']
file_name = "NA{}.csv"
df_list = []
for i in range(1,9):
df_list.append(pd.read_csv(file_name.format(i), low_memory=False))
# Create list of DataFrames df_list2 = []
for i in df_list:
df_list2.append(end_year_data(i))
# Concatenate country returns df_uk = pd.concat(df_list2)
countries = ['USA','Canada','Sweden','Portugal','Ireland','Italy','Finland','Denmark','Belgium','Spain','Switzerland','Aus file_name = "{}.csv"
df_list = []
for i in countries:
df_list.append(pd.read_csv(file_name.format(i), low_memory=False)) countries_data = pd.concat(df_list)
countries_data = countries_data.set_index('gvkey') countries_data.to_csv('Master.csv')
# Lists to use
countries = ['USA','GBR','DEU','FIN','IRL','FRA','CAN','NLD','ESP','ITA','NOR','SWE','CHE','AUS','JPN','BEL','DNK','PRT']
list_tech_oregon = ['3570', '3571', '3572','3575','3576','3577','3578','3579','3600','3612','3613','3620','3621','3630','3 list_tech_census = ['2833', '2834','2835','2836','3570','3571','3572','3575','3577','3578','3579','3661','3663','3669','36
In [ ]:
Concentration top 20 In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Concentration top 15 In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Concentration 10 In [ ]:
In [ ]:
# Read file
US_stock_data = pd.read_csv('US stock data.csv', low_memory=False)
# Format date
US_stock_data['date'] = pd.to_datetime(US_stock_data['date'], format="%Y%m%d")
# Get year and month
US_stock_data['year'] = US_stock_data['date'].dt.year US_stock_data['month'] = US_stock_data['date'].dt.month
# Drop negative stock values
US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.PRC<0].index)
# Drop missing data
US_stock_data = US_stock_data.dropna()
# Calculate market cap per firm per index
US_stock_data['Market_cap'] = US_stock_data['SHROUT']*US_stock_data['PRC']
# Keep end year values
US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.month!=12].index)
# Drop dates before 1962
US_stock_data = US_stock_data.drop(US_stock_data[US_stock_data.year<1985].index)
# Set index to year
US_stock_data= US_stock_data.set_index('year')
# Assign rank to largest 5 firms for the year
US_stock_data['rank'] = US_stock_data.groupby('year')['Market_cap'].rank(method='max', ascending = False) US_stock_data['total_cap'] = US_stock_data.groupby('year')['Market_cap'].sum()
US_stock_data['loc'] = 'USA'
# Filter top 20
market_20 = US_stock_data[US_stock_data['rank']<21]
# Calculate total market concentration
total_cap2 = pd.DataFrame(US_stock_data.groupby(['loc','year'])['Market_cap'].sum()) total_cap2.columns=['total_cap']
market_20['tech'] = market_20.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)
market_20['market_tech_20_sum'] = market_20.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_20['market_non_tech_20_sum'] = market_20.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())
# Calculate total market capitalization of top 20 tech firms
total_cap2['market_tech_20_sum'] = market_20.groupby(['loc','year'])['market_tech_20_sum'].mean()
# Calculate total market capitalization of top 20 non-tech firms
total_cap2['market_non_tech_20_sum'] = market_20.groupby(['loc','year'])['market_non_tech_20_sum'].mean()
# Filter top 15
market_15 = US_stock_data[US_stock_data['rank']<16]
market_15['tech'] = market_15.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)
market_15['market_tech_15_sum'] = market_15.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_15['market_non_tech_15_sum'] = market_15.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())
# Calculate total market capitalization of top 15 tech firms
total_cap2['market_tech_15_sum'] = market_15.groupby(['loc','year'])['market_tech_15_sum'].mean()
# Calculate total market capitalization of top 15 non-tech firms
total_cap2['market_non_tech_15_sum'] = market_15.groupby(['loc','year'])['market_non_tech_15_sum'].mean() total_cap2
total_cap2.rename(columns={'total_cap':'market_cap'}, inplace=True)
# Filter top 10
market_10 = US_stock_data[US_stock_data['rank']<11]
market_10['tech'] = market_10.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)
market_10['market_tech_10_sum'] = market_10.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_10['market_non_tech_10_sum'] = market_10.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())
In [ ]:
In [ ]:
Concentration 5 In [ ]:
In [ ]:
In [ ]:
In [ ]:
Non-US stock data
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Create regression dataset In [ ]:
Market concentration for 20 firms In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
# Calculate total market capitalization of top 10 tech firms
total_cap2['market_tech_10_sum'] = market_10.groupby(['loc','year'])['market_tech_10_sum'].mean()
# Calculate total market capitalization of top 10 non-tech firms
total_cap2['market_non_tech_10_sum'] = market_10.groupby(['loc','year'])['market_non_tech_10_sum'].mean()
# Filter top 15
market_5 = US_stock_data[US_stock_data['rank']<6]
market_5['tech'] = market_5.SICCD.apply(lambda x: 1 if x in list_tech_census else 0)
market_5['market_tech_5_sum'] = market_5.groupby('year').apply(lambda x: x[x['tech']==1]['Market_cap'].sum()) market_5['market_non_tech_5_sum'] = market_5.groupby('year').apply(lambda x: x[x['tech']==0]['Market_cap'].sum())
# Calculate total market capitalization of top 15 tech firms
total_cap2['market_tech_5_sum'] = market_5.groupby(['loc','year'])['market_tech_5_sum'].mean()
# Calculate total market capitalization of top 15 non-tech firms
total_cap2['market_non_tech_5_sum'] = market_5.groupby(['loc','year'])['market_non_tech_5_sum'].mean()
master = pd.read_csv("Master.csv")
# Calculate market cap of company each year in each country master['market_cap'] = master['cshoc']*master['prccd']
# Assign ranks per year per country to companies
master['rank'] = master.groupby(['year','loc'])['market_cap'].rank(method='max', ascending = False)
# Domestic firms only
master = master[master['loc'].isin(countries)]
# Exclude 2022
master = master[master['year']!=2022]
# Make dataframe of total market cap by year and country
total_cap=pd.DataFrame(master.groupby(['loc','year'])['market_cap'].sum())
# Make DataFrame of largest five companies
master_20 = master.drop(master[master['rank']>20].index) master_20
master_20['sic']=master_20['sic'].astype(int) master_20['sic']=master_20['sic'].astype(str)
master_20['tech'] = master_20.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_20
list_master = []
for i in countries:
list_master.append(master_20[master_20['loc']==i])
def market_con_maker(df):
df = df.set_index('year')
df['market_tech_20_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_20_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df
market = []
In [ ]:
Market concentration top 15 In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Concentration top 10 In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Concentration top 5 In [ ]:
In [ ]:
df_market
# Make DataFrame of largest 15 companies
master_15 = master.drop(master[master['rank']>15].index) master_15
master_15['sic']=master_15['sic'].astype(int) master_15['sic']=master_15['sic'].astype(str)
master_15['tech'] = master_15.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_15
list_master = []
for i in countries:
list_master.append(master_15[master_15['loc']==i])
def market_con_maker_15(df):
df = df.set_index('year')
df['market_tech_15_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_15_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df
market_15 = []
for i in list_master:
market_15.append(market_con_maker_15(i)) df_market_15 = pd.DataFrame()
df_market_15 = pd.concat(market_15) df_market_15
# Make DataFrame of largest 10 companies
master_10 = master.drop(master[master['rank']>10].index) master_10
master_10['sic']=master_10['sic'].astype(int) master_10['sic']=master_10['sic'].astype(str)
master_10['tech'] = master_10.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_15
list_master = []
for i in countries:
list_master.append(master_10[master_10['loc']==i])
def market_con_maker_10(df):
df = df.set_index('year')
df['market_tech_10_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_10_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df
market_10 = []
for i in list_master:
market_10.append(market_con_maker_10(i)) df_market_10 = pd.DataFrame()
df_market_10 = pd.concat(market_10)
# Make DataFrame of largest 5 companies
master_5 = master.drop(master[master['rank']>5].index) master_5
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Final dataset In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
master_5['sic']=master_5['sic'].astype(int) master_5['sic']=master_5['sic'].astype(str)
master_5['tech'] = master_5.sic.apply(lambda x: 1 if x in list_tech_census else 0) master_5
list_master = []
for i in countries:
list_master.append(master_5[master_5['loc']==i])
def market_con_maker_5(df):
df = df.set_index('year')
df['market_tech_5_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==1]['market_cap'].sum()) df['market_non_tech_5_sum'] = df.groupby('year').apply(lambda x: x[x['tech']==0]['market_cap'].sum()) return df
market_5 = []
for i in list_master:
market_5.append(market_con_maker_5(i)) df_market_5 = pd.DataFrame()
df_market_5 = pd.concat(market_5)
total_cap['market_tech_20_sum'] = df_market.groupby(['loc','year'])['market_tech_20_sum'].mean()
total_cap['market_non_tech_20_sum'] = df_market.groupby(['loc','year'])['market_non_tech_20_sum'].mean() total_cap['market_tech_15_sum'] = df_market_15.groupby(['loc','year'])['market_tech_15_sum'].mean()
total_cap['market_non_tech_15_sum'] = df_market_15.groupby(['loc','year'])['market_non_tech_15_sum'].mean() total_cap['market_tech_10_sum'] = df_market_10.groupby(['loc','year'])['market_tech_10_sum'].mean()
total_cap['market_non_tech_10_sum'] = df_market_10.groupby(['loc','year'])['market_non_tech_10_sum'].mean() total_cap['market_tech_5_sum'] = df_market_5.groupby(['loc','year'])['market_tech_5_sum'].mean()
total_cap['market_non_tech_5_sum'] = df_market_5.groupby(['loc','year'])['market_non_tech_5_sum'].mean()
# Remove US
total_cap.drop('USA', inplace=True)
# Add US
total_cap = total_cap.append(total_cap2)
total_cap['market_cap_20'] = total_cap[['market_non_tech_20_sum','market_tech_20_sum']].sum(axis=1) total_cap['market_con_tech_20'] = total_cap['market_tech_20_sum']/total_cap['market_cap']
total_cap['market_con_non_tech_20'] = total_cap['market_non_tech_20_sum']/total_cap['market_cap']
total_cap['market_con_20'] = total_cap['market_cap_20']/total_cap['market_cap']
total_cap['market_cap_15'] = total_cap[['market_non_tech_15_sum','market_tech_15_sum']].sum(axis=1) total_cap['market_con_tech_15'] = total_cap['market_tech_15_sum']/total_cap['market_cap']
total_cap['market_con_non_tech_15'] = total_cap['market_non_tech_15_sum']/total_cap['market_cap']
total_cap['market_con_15'] = total_cap['market_cap_15']/total_cap['market_cap']
total_cap['market_cap_10'] = total_cap[['market_non_tech_10_sum','market_tech_10_sum']].sum(axis=1) total_cap['market_con_tech_10'] = total_cap['market_tech_10_sum']/total_cap['market_cap']
total_cap['market_con_non_tech_10'] = total_cap['market_non_tech_10_sum']/total_cap['market_cap']
total_cap['market_con_10'] = total_cap['market_cap_10']/total_cap['market_cap']
total_cap['market_cap_5'] = total_cap[['market_non_tech_5_sum','market_tech_5_sum']].sum(axis=1) total_cap['market_con_tech_5'] = total_cap['market_tech_5_sum']/total_cap['market_cap']
total_cap['market_con_non_tech_5'] = total_cap['market_non_tech_5_sum']/total_cap['market_cap']
total_cap['market_con_5'] = total_cap['market_cap_5']/total_cap['market_cap']
total_cap
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Turnover data
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Merge datasets
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
inflation = pd.read_csv("Inflation rates.csv") inflation = inflation[['LOCATION', 'TIME', 'Value']]
inflation['TIME'] = pd.to_datetime(inflation['TIME'], format= '%Y') inflation['TIME'] = inflation['TIME'].dt.year
inflation['year'] = inflation['TIME']
inflation['loc'] = inflation['LOCATION']
inflation = inflation[['loc','year','Value']]
#inflation = inflation[inflation['loc']!='GBR']
inflation = inflation.set_index(['loc','year']) inflation
turnover = pd.read_excel("Turnover.xlsx")
#turnover = turnover[turnover['loc']!='GBR']
turnover = turnover.set_index(['loc','year']) turnover = turnover.replace('..', np.nan) turnover
# Add market concentration to macro data
df_dataset = pd.concat([jst, total_cap, inflation, turnover], axis = 1) df_dataset.to_csv('final_dataset.csv')
df_dataset = pd.read_csv('final_dataset.csv') df_dataset = df_dataset.set_index(['loc','year'])
# Create lag variables
df_dataset['market_con_tech_20_lag'] = df_dataset.groupby('loc')['market_con_tech_20'].shift(5) df_dataset['credit_lag'] = df_dataset.groupby('loc')['credit_gdp'].shift(5)
df_dataset['turnover_lag'] = df_dataset.groupby('loc')['turnover'].shift(5)
df_dataset['market_con_non_tech_20_lag'] = df_dataset.groupby('loc')['market_con_non_tech_20'].shift(5) df_dataset['market_cap_lag'] = df_dataset.groupby('loc')['market_cap_gdp'].shift(5)
df_dataset['market_cap_lag'] = df_dataset.market_cap_lag.astype(float)
df_dataset['market_con_20_lag'] = df_dataset.groupby('loc')['market_con_20'].shift(5)
df_dataset['market_con_non_tech_15_lag'] = df_dataset.groupby('loc')['market_con_non_tech_15'].shift(5) df_dataset['market_con_tech_15_lag'] = df_dataset.groupby('loc')['market_con_tech_15'].shift(5)
df_dataset['market_con_15_lag'] = df_dataset.groupby('loc')['market_con_15'].shift(5)
df_dataset['market_con_non_tech_10_lag'] = df_dataset.groupby('loc')['market_con_non_tech_10'].shift(5) df_dataset['market_con_tech_10_lag'] = df_dataset.groupby('loc')['market_con_tech_10'].shift(5)
df_dataset['market_con_10_lag'] = df_dataset.groupby('loc')['market_con_10'].shift(5)
df_dataset['market_con_non_tech_5_lag'] = df_dataset.groupby('loc')['market_con_non_tech_5'].shift(5) df_dataset['market_con_tech_5_lag'] = df_dataset.groupby('loc')['market_con_tech_5'].shift(5)
df_dataset['market_con_5_lag'] = df_dataset.groupby('loc')['market_con_5'].shift(5)
# Rename inflation variable
df_dataset = df_dataset.rename(columns={'Value':'inflation'})
# Filter variables
df_dataset2 = df_dataset[['market_con_tech_20_lag', 'market_con_non_tech_20_lag','market_con_20_lag','market_con_tech_15_l
# Turn percentages into normal numbers
df_dataset2['gdpgrowth'] = df_dataset2['gdpgrowth']/100 df_dataset2['turnover_lag'] = df_dataset2['turnover_lag']/100 df_dataset2['inflation'] = df_dataset2['inflation']/100
df_dataset2['market_cap_lag'] = df_dataset2['market_cap_lag']/100
Descriptive statistics
Summary statistics
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Graphs
In [ ]:
In [ ]:
Figure 1a In [ ]:
Figure 2a In [ ]:
Figure 2b In [ ]:
df_dataset.groupby('loc')[['market_con_tech_10', 'market_con_non_tech_10','market_con_10','market_con_tech_5','market_con_
# Turn percentages into normal numbers
df_dataset['gdpgrowth'] = df_dataset['gdpgrowth']/100 df_dataset['turnover_lag'] = df_dataset['turnover_lag']/100 df_dataset['inflation'] = df_dataset['inflation']/100
df_dataset['market_cap_lag'] = df_dataset['market_cap_lag']/100
df_dataset2.groupby('loc')['market_cap_lag','gdpgrowth','iy','credit_lag','openness','exp_gdp','turnover_lag','inflation']
df_dataset3 = df_dataset2[['market_con_tech_5_lag', 'market_con_non_tech_5_lag','market_con_5_lag','market_con_tech_10_la
# Average market concentration picture = df_dataset.reset_index()
picture['mktcontech10'] = picture['market_con_tech_10']
picture['mktconnontech10'] = picture['market_con_non_tech_10']
mconmean=pd.DataFrame(picture.groupby('year')['market_con_tech_10'].mean()) mconmean['non_tech10']=picture.groupby('year')['market_con_non_tech_10'].mean() mconmean['tech5']=picture.groupby('year')['market_con_tech_5'].mean()
mconmean['non_tech5']=picture.groupby('year')['market_con_non_tech_5'].mean() mconmean['all5']=picture.groupby('year')['market_con_5'].mean()
mconmean['all10']=picture.groupby('year')['market_con_10'].mean() picture
figure = plt.figure(figsize=(8,6))
plt.plot(mconmean['market_con_tech_10'], label='Tech firms in top 10') plt.plot(mconmean['non_tech10'], label='Non-tech firms in top 10') plt.plot(mconmean['all10'], label='Top 10 firms')
plt.plot(mconmean['tech5'], label='Tech firms in top 5')
plt.plot(mconmean['non_tech5'], label='Non-tech firms in top 5') plt.plot(mconmean['all5'], label='Top 5 firms')
plt.xlabel('Year')
plt.ylabel(r'Market concentration$_{t-5}$') plt.legend()
plt.savefig('averagemktcon.jpg') plt.show()
figure =plt.figure(figsize=(8,6)) for i in countries:
plt.plot(picture[picture['loc']==i][['year','market_con_tech_5']].tail(35).set_index('year'),label=i) plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
plt.xlabel('Year')
plt.ylabel('Market concentration of top 5 firms at t-5') plt.savefig('tech5.jpg',replace=True)
plt.show()
figure =plt.figure(figsize=(8,6)) for i in countries:
plt.plot(picture[picture['loc']==i][['year','market_con_non_tech_5']].tail(34).set_index('year'),label=i) plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
plt.xlabel('Year')
Figure 3a In [ ]:
Figure 3b In [ ]:
Correlations In [ ]:
In [ ]:
In [ ]: