# Pandas for managing datasets
import pandas as pd

# Display floats with 2 decimal places
pd.options.display.float_format = '{:,.2f}'.format

# Expand display limits
pd.options.display.max_rows = 200
pd.options.display.max_columns = 100

# Read BNC2 sample dataset
df = pd.read_csv('BNC2_sample.csv',
                 names=['Code', 'Date', 'Open', 'High', 'Low', 
                        'Close', 'Volume', 'VWAP', 'TWAP'])
 
# Display first 5 observations
df.head()

# Unique codes in the dataset
print( df.Code.unique() )

['GWA_BTC' 'GWA_ETH' 'GWA_LTC' 'GWA_XLM' 'GWA_XRP' 'MWA_BTC_CNY'
 'MWA_BTC_EUR' 'MWA_BTC_GBP' 'MWA_BTC_JPY' 'MWA_BTC_USD' 'MWA_ETH_CNY'
 'MWA_ETH_EUR' 'MWA_ETH_GBP' 'MWA_ETH_JPY' 'MWA_ETH_USD' 'MWA_LTC_CNY'
 'MWA_LTC_EUR' 'MWA_LTC_GBP' 'MWA_LTC_JPY' 'MWA_LTC_USD' 'MWA_XLM_CNY'
 'MWA_XLM_EUR' 'MWA_XLM_USD' 'MWA_XRP_CNY' 'MWA_XRP_EUR' 'MWA_XRP_GBP'
 'MWA_XRP_JPY' 'MWA_XRP_USD']

# Example of GWA and MWA relationship
df[df.Code.isin(['GWA_BTC', 'MWA_BTC_JPY', 'MWA_BTC_EUR']) & (df.Date == '2018-01-01')]

# Number of observations in dataset
print( 'Before:', len(df) )
 
# Get all the GWA codes
gwa_codes = [code for code in df.Code.unique() if 'GWA_' in code]
 
# Only keep GWA observations
df = df[df.Code.isin(gwa_codes)]
 
# Number of observations left
print( 'After:', len(df) )

Before: 31761
After: 6309

# Pivot dataset
pivoted_df = df.pivot(index='Date', columns='Code', values='VWAP')
 
# Display examples from pivoted dataset
pivoted_df.tail()

print( pivoted_df.tail(3) )

Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-22 11,397.52 1,038.21   184.92     0.47     1.43
2018-01-23 10,921.00   992.05   176.95     0.47     1.42

print( pivoted_df.tail(3).shift(1) )

Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21       nan      nan      nan      nan      nan
2018-01-22 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-23 11,397.52 1,038.21   184.92     0.47     1.43

# Calculate returns over 7 days prior
delta_7 = pivoted_df / pivoted_df.shift(7) - 1.0
 
# Display examples
delta_7.tail()

# Calculate returns over each window and store them in dictionary
delta_dict = {}
for offset in [7, 14, 21, 28]:
    delta_dict['delta_{}'.format(offset)] = pivoted_df / pivoted_df.shift(offset) - 1.0

# Display result "delta_dict"
delta_dict

{'delta_7': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.18    -0.17    -0.18    -0.21    -0.22
 2018-01-20    -0.13    -0.19    -0.18    -0.23    -0.29
 2018-01-21    -0.15    -0.20    -0.22    -0.22    -0.30
 2018-01-22    -0.21    -0.24    -0.24    -0.25    -0.32
 2018-01-23    -0.11    -0.12    -0.13    -0.02    -0.04
 
 [1394 rows x 5 columns],
 'delta_14': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.29     0.05    -0.23    -0.27    -0.41
 2018-01-20    -0.26     0.13    -0.29    -0.26    -0.42
 2018-01-21    -0.29    -0.01    -0.32    -0.31    -0.51
 2018-01-22    -0.29    -0.13    -0.30    -0.28    -0.52
 2018-01-23    -0.31    -0.22    -0.32    -0.24    -0.48
 
 [1394 rows x 5 columns],
 'delta_21': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.22     0.42    -0.24     0.88     0.02
 2018-01-20    -0.05     0.60    -0.09     0.57    -0.26
 2018-01-21    -0.11     0.51    -0.12     0.47    -0.28
 2018-01-22    -0.19     0.36    -0.19     0.05    -0.35
 2018-01-23    -0.25     0.13    -0.29    -0.10    -0.39
 
 [1394 rows x 5 columns],
 'delta_28': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.17     0.57    -0.24     1.30     0.65
 2018-01-20    -0.13     0.60    -0.27     1.23     0.53
 2018-01-21    -0.12     0.65    -0.27     1.22     0.45
 2018-01-22    -0.20     0.38    -0.33     1.09     0.30
 2018-01-23    -0.31     0.29    -0.37     1.16     0.29
 
 [1394 rows x 5 columns]}

# Melt delta_7 returns
melted_7 = delta_7.reset_index().melt(id_vars=['Date'], value_name='delta_7')
 
# Melted dataframe examples
melted_7.tail()

# Melt all the delta dataframes and store in list
melted_dfs = []
for key, delta_df in delta_dict.items():
    melted_dfs.append( delta_df.reset_index().melt(id_vars=['Date'], value_name=key) )

melted_dfs

[            Date     Code  delta_7
 0     2014-04-01  GWA_BTC      nan
 1     2014-04-02  GWA_BTC      nan
 2     2014-04-03  GWA_BTC      nan
 3     2014-04-04  GWA_BTC      nan
 4     2014-04-05  GWA_BTC      nan
 ...          ...      ...      ...
 6965  2018-01-19  GWA_XRP    -0.22
 6966  2018-01-20  GWA_XRP    -0.29
 6967  2018-01-21  GWA_XRP    -0.30
 6968  2018-01-22  GWA_XRP    -0.32
 6969  2018-01-23  GWA_XRP    -0.04
 
 [6970 rows x 3 columns],             Date     Code  delta_14
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP     -0.41
 6966  2018-01-20  GWA_XRP     -0.42
 6967  2018-01-21  GWA_XRP     -0.51
 6968  2018-01-22  GWA_XRP     -0.52
 6969  2018-01-23  GWA_XRP     -0.48
 
 [6970 rows x 3 columns],             Date     Code  delta_21
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP      0.02
 6966  2018-01-20  GWA_XRP     -0.26
 6967  2018-01-21  GWA_XRP     -0.28
 6968  2018-01-22  GWA_XRP     -0.35
 6969  2018-01-23  GWA_XRP     -0.39
 
 [6970 rows x 3 columns],             Date     Code  delta_28
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP      0.65
 6966  2018-01-20  GWA_XRP      0.53
 6967  2018-01-21  GWA_XRP      0.45
 6968  2018-01-22  GWA_XRP      0.30
 6969  2018-01-23  GWA_XRP      0.29
 
 [6970 rows x 3 columns]]

# Calculate 7-day returns after the date
return_df = pivoted_df.shift(-7) / pivoted_df - 1.0
 
# Melt the return dataset and append to list
melted_dfs.append( return_df.reset_index().melt(id_vars=['Date'], value_name='return_7') )

# Merge two dataframes
pd.merge(melted_dfs[0], melted_dfs[1], on=['Date', 'Code']).tail()

from functools import reduce

# Grab features from original dataset
base_df = df[['Date', 'Code', 'Volume', 'VWAP']]
 
# Create a list with all the feature dataframes
feature_dfs = [base_df] + melted_dfs

# Reduce-merge features into analytical base table
abt = reduce(lambda left,right: pd.merge(left,right,on=['Date', 'Code']), feature_dfs)
 
# Display examples from the ABT
abt.tail(10)

# Data from Sept 1st, 2017
abt[abt.Date == '2017-09-01']

max_momentum_id = abt[abt.Date == '2017-09-01'].delta_28.idxmax()
abt.loc[max_momentum_id, ['Code','return_7']]

Code        GWA_LTC
return_7        NaN
Name: 3543, dtype: object

# Create 'month' feature
abt['month'] = abt.Date.apply(lambda x: x[:7])
 
# Group by 'Code' and 'month' and keep first date
gb_df = abt.groupby(['Code', 'month']).first().reset_index()
 
# Display examples
gb_df.tail()

	Code	Date	Open	High	Low	Close	Volume	VWAP	TWAP
0	GWA_BTC	2014-04-01	467.28	488.62	467.28	479.56	74,776.48	482.76	482.82
1	GWA_BTC	2014-04-02	479.20	494.30	431.32	437.08	114,052.96	460.19	465.93
2	GWA_BTC	2014-04-03	437.33	449.74	414.41	445.60	91,415.08	432.29	433.28
3	GWA_BTC	2014-04-04	445.18	456.10	429.16	449.81	51,147.27	443.46	443.93
4	GWA_BTC	2014-04-05	450.08	464.09	445.16	461.70	28,449.19	452.53	452.95

	Code	Date	Open	High	Low	Close	Volume	VWAP	TWAP
1371	GWA_BTC	2018-01-01	14,505.89	14,505.89	13,617.46	14,092.74	225,906.21	14,103.18	14,093.73
9074	MWA_BTC_EUR	2018-01-01	11,859.35	11,859.35	11,111.07	11,403.92	14,933.73	11,488.45	11,478.08
11838	MWA_BTC_JPY	2018-01-01	1,674,341.45	1,678,567.55	1,572,173.90	1,632,657.51	68,611.95	1,632,994.40	1,631,407.66

Code	GWA_BTC	GWA_ETH	GWA_LTC	GWA_XLM	GWA_XRP
Date
2018-01-19	11,826.36	1,068.45	195.00	0.51	1.82
2018-01-20	13,062.68	1,158.71	207.58	0.52	1.75
2018-01-21	12,326.23	1,108.90	197.36	0.48	1.55
2018-01-22	11,397.52	1,038.21	184.92	0.47	1.43
2018-01-23	10,921.00	992.05	176.95	0.47	1.42

Code	GWA_BTC	GWA_ETH	GWA_LTC	GWA_XLM	GWA_XRP
Date
2018-01-19	-0.18	-0.17	-0.18	-0.21	-0.22
2018-01-20	-0.13	-0.19	-0.18	-0.23	-0.29
2018-01-21	-0.15	-0.20	-0.22	-0.22	-0.30
2018-01-22	-0.21	-0.24	-0.24	-0.25	-0.32
2018-01-23	-0.11	-0.12	-0.13	-0.02	-0.04

	Date	Code	delta_7
6965	2018-01-19	GWA_XRP	-0.22
6966	2018-01-20	GWA_XRP	-0.29
6967	2018-01-21	GWA_XRP	-0.30
6968	2018-01-22	GWA_XRP	-0.32
6969	2018-01-23	GWA_XRP	-0.04

	Date	Code	Volume	VWAP	delta_7	delta_14	delta_21	delta_28	return_7_x	return_7_y
6299	2018-01-14	GWA_XRP	912,107,674.18	2.20	-0.31	0.02	1.06	1.97	-0.30	-0.30
6300	2018-01-15	GWA_XRP	823,491,754.55	2.11	-0.29	-0.04	0.92	1.84	-0.32	-0.32
6301	2018-01-16	GWA_XRP	3,872,977,355.95	1.48	-0.46	-0.36	0.35	0.85	-0.04	-0.04
6302	2018-01-17	GWA_XRP	5,111,390,628.85	1.20	-0.47	-0.57	-0.06	0.58	nan	nan
6303	2018-01-18	GWA_XRP	5,156,172,462.44	1.68	-0.22	-0.51	0.23	0.59	nan	nan
6304	2018-01-19	GWA_XRP	2,126,239,927.56	1.82	-0.22	-0.41	0.02	0.65	nan	nan
6305	2018-01-20	GWA_XRP	1,346,913,296.52	1.75	-0.29	-0.42	-0.26	0.53	nan	nan
6306	2018-01-21	GWA_XRP	1,886,060,450.81	1.55	-0.30	-0.51	-0.28	0.45	nan	nan
6307	2018-01-22	GWA_XRP	1,784,992,299.63	1.43	-0.32	-0.52	-0.35	0.30	nan	nan
6308	2018-01-23	GWA_XRP	2,118,335,564.32	1.42	-0.04	-0.48	-0.39	0.29	nan	nan

	Date	Code	Volume	VWAP	delta_7	delta_14	delta_21	delta_28	return_7_x	return_7_y
1249	2017-09-01	GWA_BTC	275,034.79	4,798.06	0.10	0.12	0.35	0.69	-0.09	-0.09
2149	2017-09-01	GWA_ETH	2,076,778.42	387.55	0.17	0.28	0.29	0.72	-0.21	-0.21
3543	2017-09-01	GWA_LTC	18,553,463.67	78.76	0.55	0.71	0.68	0.82	-0.10	-0.10
4770	2017-09-01	GWA_XLM	372,143,342.95	0.02	0.19	0.34	0.05	0.10	-0.19	-0.19
6164	2017-09-01	GWA_XRP	1,138,500,431.07	0.25	0.15	0.57	0.39	0.44	-0.14	-0.14

	Code	month	Date	Volume	VWAP	delta_7	delta_14	delta_21	delta_28	return_7_x	return_7_y
204	GWA_XRP	2017-09	2017-09-01	1,138,500,431.07	0.25	0.15	0.57	0.39	0.44	-0.14	-0.14
205	GWA_XRP	2017-10	2017-10-01	242,650,661.68	0.20	0.12	0.12	-0.05	-0.13	0.31	0.31
206	GWA_XRP	2017-11	2017-11-01	344,634,241.50	0.20	-0.03	-0.11	-0.25	-0.05	0.07	0.07
207	GWA_XRP	2017-12	2017-12-01	705,807,839.14	0.25	0.03	0.09	0.17	0.19	-0.00	-0.00
208	GWA_XRP	2018-01	2018-01-01	813,772,647.20	2.20	1.01	1.97	7.96	7.77	0.35	0.35