首页 > 解决方案 > 在约翰森测试中获得相同的临界值

问题描述

所以我正在尝试制作一个测试协整的算法:

import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from datetime import date
import numpy as np
import os
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.vector_ar.vecm import coint_johansen

df = pd.DataFrame()
data = []
csv_file = pd.read_csv(os.path.expanduser("/Users/benitocano/Downloads/copyOfSandP500.csv"), delimiter = ',')
tickers = pd.read_csv("/Users/benitocano/Downloads/copyOfSandP500.csv", delimiter=',', names = ['Symbol', 'Name', 'Sector'])
#Get the asset data
def get_data():
    start = dt.datetime(2020, 5, 30)
    end = dt.datetime.now()
    for ticker in tickers['Symbol'][:3]:
        try:
            df1 = web.DataReader(ticker, 'yahoo', start, end)
            df1.drop(['High', 'Low', 'Open', 'Close', 'Volume'], axis=1, inplace=True)     
            data.append(df1)
        except KeyError:
            pass
        for stock in data:     
            series = pd.Series(stock['Adj Close'])
            df[ticker] = series
        df.fillna(method='ffill', inplace=True)

#test if each asset is Nonstationary or not>
def ADF_Test():
    for ticker, prices in df.iteritems():
        adf_statistic = ts.adfuller(prices, 1)
        for key, value in adf_statistic[4].items():
            value = value
        test_stat = value < adf_statistic[0]
        pvalue_stat = adf_statistic[1] > .05
        if pvalue_stat==False and test_stat==False:
            df.drop(ticker, axis=1, inplace=True)
            

def get_spread():
    pass
    data1 = []
    a = 0
    b = a
    
    for y in range (a, len(df.columns)):
        b = a+1

        for e in range (b, len(df.columns)):
            if e == y:
                pass
            else:    
                ticker_1 = df.columns[e]
                ticker_2 = df.columns[y]
                ticker_dict = {
                ticker_1 : df[ticker_1],
                ticker_2 : df[ticker_2]
                }
                ticker_frame = pd.DataFrame(ticker_dict)
                result = coint_johansen(ticker_frame, 0, 1)
                trace_statistic = result.lr1[0]
                critical_value = result.cvt
                print("trace statistic: {}".format(trace_statistic))
                print("Critcal values: {}".format(critical_value))


        b = b+1


get_data()
ADF_Test()
get_spread()

该算法正在做的是从 csv 文件中收集数据,然后将所有数据放入一个数据帧中。然后决定哪些公司是静止的,哪些不是。最后,它运行一个 for 循环,在其中创建所有可能的对并将它们放入单独的数据帧中。然后最后在这些数据帧上运行 Johansen 测试。问题是当我打印出代码时,所有的关键值都是相同的。输出是:

trace statistic: 9.736276687870111
Critcal values: [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
trace statistic: 14.374497415885136
Critcal values: [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
trace statistic: 8.388038242212934
Critcal values: [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
trace statistic: 8.388038242212934
Critcal values: [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]

鉴于这在统计上几乎是不可能的,为什么所有的值都一样?此外,为简单起见,我只索引了前 3 家运行此代码的公司,但我仍然遇到同样的问题并不重要。谢谢!

可以在以下位置找到 csv 文件:https ://datahub.io/core/s-and-p-500-companies

标签: pythonpandasdataframestatistical-test

解决方案


推荐阅读