K-Nearest Neighbour Algo – Find Closest Period in History

Happy New Year! This post is going to continue on from my last post on the K-nearest algo http://gekkoquant.com/2013/12/02/k-nearest-neighbour-algo-fail/.

In the last post I speculated that the poor performance of the algo was potentially down to trying to compare the current day and find the most similar days in history, rather we should try to take the last N days and find the most similar period in history.

The code below does exactly that use windowSize to control how big the periods are that we compare.

gspc knearest groupsize50 windowsize10 sharpe -0.537182

Sharpe ratio: -0.537182 gspc knearest groupsize50 windowsize30 sharpe -0.5918642

Sharpe ratio: -0.591864

The performance is still poor, perhaps the similarity measure i’m using is rubbish. Maybe using implied vol would be good for identifying market regimes and should be used in the similarity measure.

Unfortunately this algo is very very slow (and gets worse over time since we have more history to look back over), this makes it difficult / time consuming to optimise variables.

Onto the code:

?View Code RSPLUS
marketSymbol <- "^GSPC"
nFastLookback <- 30 #The fast signal lookback used in linear regression curve
nSlowLookback <- 50 #The slow signal lookback used in linear regression curve
nFastVolLookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowVolLookback <- 50 #The slow signal lookback used calculate the stdev
nFastRSILookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowRSILookback <- 50 #The slow signal lookback used calculate the stdev
kNearestGroupSize <- 30 #How many neighbours to use
normalisedStrengthVolWeight <- 2 #Make some signals more important than others in the MSE
normalisedStrengthRegressionWeight <- 1
fastRSICurveWeight <- 2
slowRSICurveWeight <- 0.8
windowSize <- 10 #Compare the last 10 days with the most similar 10 day period in history
#Specify dates for downloading data, training models and running simulation
startDate = as.Date("2006-08-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
stockCleanNameFunc <- function(name){
getSymbols(marketSymbol, env = symbolData, src = "yahoo", from = startDate)
cleanName <- stockCleanNameFunc(marketSymbol)
mktData <- get(cleanName,symbolData)
linearRegressionCurve <- function(data,n){
    regression <- function(dataBlock){
           fit <-lm(dataBlock~seq(1,length(dataBlock),1))
    return (rollapply(data,width=n,regression,align="right",by.column=FALSE,na.pad=TRUE))
volCurve <- function(data,n){
    stdev <- function(dataBlock){
    return (rollapply(data,width=n,stdev,align="right",by.column=FALSE,na.pad=TRUE))^2
fastRegression <- linearRegressionCurve(Cl(mktData),nFastLookback)
slowRegression <- linearRegressionCurve(Cl(mktData),nSlowLookback)
normalisedStrengthRegression <- slowRegression / (slowRegression+fastRegression)
fastVolCurve <- volCurve(Cl(mktData),nFastVolLookback)
slowVolCurve <- volCurve(Cl(mktData),nSlowVolLookback)
normalisedStrengthVol <- slowVolCurve / (slowVolCurve+fastVolCurve)
fastRSICurve <-RSI(Cl(mktData),nFastRSILookback)/100 #rescale it to be in the same range as the other indicators
slowRSICurve <-RSI(Cl(mktData),nSlowRSILookback)/100
      #Lets plot the signals just to see what they look like
#DataMeasure will be used to determine how similar other days are to today
#It is used later on for calculate the days which are most similar to today according to MSE measure
dataMeasure <- cbind(normalisedStrengthVol*normalisedStrengthVolWeight,normalisedStrengthRegression*normalisedStrengthRegression,fastRSICurve*fastRSICurveWeight,slowRSICurve*slowRSICurveWeight)
colnames(dataMeasure) <- c("normalisedStrengthVol","normalisedStrengthRegression","fastRSICurve","slowRSICurve")
#Finds the nearest neighbour and calculates the trade signal
calculateNearestNeighbourTradeSignal <- function(dataMeasure,K,mktReturns,windowSize){
        findKNearestNeighbours <- function(dataMeasure,K,windowSize){
             calculateMSE <- function(dataA,dataB){
             if(length(dataA) != length(dataB)){ return (Inf) }
                            se <- ((as.vector(as.matrix(dataA)) - as.vector(as.matrix(dataB)))^2)
                            res <- mean(se)
                              res <- Inf
                            return (res)
              mseScores <- as.data.frame(dataMeasure[,1])
              mseScores[,1] <- Inf #Default all the mse scores to inf (we've not calculated them yet)
             colnames(mseScores) <- c("MSE")
              indexOfTheMostRecentWindowSizeDays <- seq(max(1,length(dataMeasure[,1])-windowSize),length(dataMeasure[,1]))
              mostRecentWindowDataMeasure <- dataMeasure[indexOfTheMostRecentWindowSizeDays,]
              for(i in seq(1,length(dataMeasure[,1]))){
                 indexHistoricalWindowDataMeasure <- seq(max(1,i-windowSize),i)
                 historicalWindowDataMeasure <- dataMeasure[indexHistoricalWindowDataMeasure,]
                  mseScores[i,1] <- calculateMSE(mostRecentWindowDataMeasure,historicalWindowDataMeasure)
                # print(paste("MSE is",mseScores[i,1]))
             rowNum <- seq(1,length(dataMeasure[,1]),1)
             tmp <- c("MSE", colnames(dataMeasure))
             dataMeasureWithMse <- as.data.frame(cbind(mseScores[,1],dataMeasure))
             colnames(dataMeasureWithMse) <- tmp
             tmp <- c("rowNum", colnames(dataMeasureWithMse))
             dataMeasureWithMse <- cbind(rowNum,dataMeasureWithMse)
             colnames(dataMeasureWithMse) <- tmp
            dataMeasureWithMse <- dataMeasureWithMse[order(dataMeasureWithMse[,"MSE"]),]
             #Starting from the 2nd item as the 1st is the current day (MSE will be 0) want to drop it
             return (dataMeasureWithMse[seq(2,min(K,length(dataMeasureWithMse[,1]))),])
    calculateTradeSignalFromKNeighbours <- function(mktReturns,kNearestNeighbours){
         rowNums <- kNearestNeighbours[,"rowNum"]
         rowNums <- na.omit(rowNums)
         if(length(rowNums) <= 1) { return (0) }
         print("The kNearestNeighbours are:")
         #So lets see what happened on the day AFTER our nearest match
         mktRet <- mktReturns[rowNums+1]
        # return (sign(sum(mktRet)))
        return (SharpeRatio.annualized(mktRet))
    kNearestNeighbours <- findKNearestNeighbours(dataMeasure,K,windowSize)
    tradeSignal <- calculateTradeSignalFromKNeighbours(mktReturns,kNearestNeighbours)
ret <- (Cl(mktData)/Op(mktData))-1
signalLog <- as.data.frame(ret)
signalLog[,1] <- 0
colnames(signalLog) <- c("TradeSignal")
#Loop through all the days we have data for, and calculate a signal for them using nearest neighbour
for(i in seq(1,length(ret))){
    print (paste("Simulating trading for day",i,"out of",length(ret),"@",100*i/length(ret),"%"))
    index <- seq(1,i)
    signal <- calculateNearestNeighbourTradeSignal(dataMeasure[index,],kNearestGroupSize,ret,windowSize)
    signalLog[i,1] <- signal
tradeRet <- Lag(signalLog[,1])*ret[,1] #Combine todays signal with tomorrows return (no lookforward issues)
totalRet <- cbind(tradeRet,ret)
colnames(totalRet) <- c("Algo",paste(marketSymbol," Long OpCl Returns"))
charts.PerformanceSummary(totalRet,main=paste("K nearest trading algo for",marketSymbol,"kNearestGroupSize=",kNearestGroupSize,"windowSize=",windowSize),geometric=FALSE)