K-Nearest Neighbour Algo – Find Closest Period in History

Happy New Year! This post is going to continue on from my last post on the K-nearest algo http://gekkoquant.com/2013/12/02/k-nearest-neighbour-algo-fail/.

In the last post I speculated that the poor performance of the algo was potentially down to trying to compare the current day and find the most similar days in history, rather we should try to take the last N days and find the most similar period in history.

The code below does exactly that use windowSize to control how big the periods are that we compare.

gspc knearest groupsize50 windowsize10 sharpe -0.537182

Sharpe ratio: -0.537182 gspc knearest groupsize50 windowsize30 sharpe -0.5918642

Sharpe ratio: -0.591864

The performance is still poor, perhaps the similarity measure i’m using is rubbish. Maybe using implied vol would be good for identifying market regimes and should be used in the similarity measure.

Unfortunately this algo is very very slow (and gets worse over time since we have more history to look back over), this makes it difficult / time consuming to optimise variables.

Onto the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library("zoo")
 
#INPUTS
marketSymbol <- "^GSPC"
 
nFastLookback <- 30 #The fast signal lookback used in linear regression curve
nSlowLookback <- 50 #The slow signal lookback used in linear regression curve
 
nFastVolLookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowVolLookback <- 50 #The slow signal lookback used calculate the stdev
 
nFastRSILookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowRSILookback <- 50 #The slow signal lookback used calculate the stdev
 
kNearestGroupSize <- 30 #How many neighbours to use
normalisedStrengthVolWeight <- 2 #Make some signals more important than others in the MSE
normalisedStrengthRegressionWeight <- 1
fastRSICurveWeight <- 2
slowRSICurveWeight <- 0.8
 
 
windowSize <- 10 #Compare the last 10 days with the most similar 10 day period in history
 
 
#Specify dates for downloading data, training models and running simulation
startDate = as.Date("2006-08-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
 
stockCleanNameFunc <- function(name){
     return(sub("^","",name,fixed=TRUE))
}
 
getSymbols(marketSymbol, env = symbolData, src = "yahoo", from = startDate)
cleanName <- stockCleanNameFunc(marketSymbol)
mktData <- get(cleanName,symbolData)
 
linearRegressionCurve <- function(data,n){
    regression <- function(dataBlock){
           fit <-lm(dataBlock~seq(1,length(dataBlock),1))
           return(last(fit$fitted.values))
    }
    return (rollapply(data,width=n,regression,align="right",by.column=FALSE,na.pad=TRUE))
}
 
volCurve <- function(data,n){
    stdev <- function(dataBlock){
           sd(dataBlock)
    }
    return (rollapply(data,width=n,stdev,align="right",by.column=FALSE,na.pad=TRUE))^2
}
 
fastRegression <- linearRegressionCurve(Cl(mktData),nFastLookback)
slowRegression <- linearRegressionCurve(Cl(mktData),nSlowLookback)
normalisedStrengthRegression <- slowRegression / (slowRegression+fastRegression)
 
fastVolCurve <- volCurve(Cl(mktData),nFastVolLookback)
slowVolCurve <- volCurve(Cl(mktData),nSlowVolLookback)
normalisedStrengthVol <- slowVolCurve / (slowVolCurve+fastVolCurve)
 
fastRSICurve <-RSI(Cl(mktData),nFastRSILookback)/100 #rescale it to be in the same range as the other indicators
slowRSICurve <-RSI(Cl(mktData),nSlowRSILookback)/100
 
      #Lets plot the signals just to see what they look like
      dev.new()
      par(mfrow=c(2,2))
      plot(normalisedStrengthVol,type="l")
      plot(normalisedStrengthRegression,type="l")
      plot(fastRSICurve,type="l")
      plot(slowRSICurve,type="l")
 
 
 
#DataMeasure will be used to determine how similar other days are to today
#It is used later on for calculate the days which are most similar to today according to MSE measure
dataMeasure <- cbind(normalisedStrengthVol*normalisedStrengthVolWeight,normalisedStrengthRegression*normalisedStrengthRegression,fastRSICurve*fastRSICurveWeight,slowRSICurve*slowRSICurveWeight)
colnames(dataMeasure) <- c("normalisedStrengthVol","normalisedStrengthRegression","fastRSICurve","slowRSICurve")
 
 
 
#Finds the nearest neighbour and calculates the trade signal
calculateNearestNeighbourTradeSignal <- function(dataMeasure,K,mktReturns,windowSize){
        findKNearestNeighbours <- function(dataMeasure,K,windowSize){
             calculateMSE <- function(dataA,dataB){
 
             if(length(dataA) != length(dataB)){ return (Inf) }
                            se <- ((as.vector(as.matrix(dataA)) - as.vector(as.matrix(dataB)))^2)
                            res <- mean(se)
                            if(is.na(res)){
                              res <- Inf
                            }
                            return (res)
             }
 
              mseScores <- as.data.frame(dataMeasure[,1])
              mseScores[,1] <- Inf #Default all the mse scores to inf (we've not calculated them yet)
             colnames(mseScores) <- c("MSE")
              indexOfTheMostRecentWindowSizeDays <- seq(max(1,length(dataMeasure[,1])-windowSize),length(dataMeasure[,1]))
              mostRecentWindowDataMeasure <- dataMeasure[indexOfTheMostRecentWindowSizeDays,]
 
              for(i in seq(1,length(dataMeasure[,1]))){
                 indexHistoricalWindowDataMeasure <- seq(max(1,i-windowSize),i)
                 historicalWindowDataMeasure <- dataMeasure[indexHistoricalWindowDataMeasure,]
                  mseScores[i,1] <- calculateMSE(mostRecentWindowDataMeasure,historicalWindowDataMeasure)
                # print(paste("MSE is",mseScores[i,1]))
              }
 
             rowNum <- seq(1,length(dataMeasure[,1]),1)
             tmp <- c("MSE", colnames(dataMeasure))
             dataMeasureWithMse <- as.data.frame(cbind(mseScores[,1],dataMeasure))
             colnames(dataMeasureWithMse) <- tmp
            #print(head(mseScores))
             #print(head(dataMeasureWithMse))
             tmp <- c("rowNum", colnames(dataMeasureWithMse))
             dataMeasureWithMse <- cbind(rowNum,dataMeasureWithMse)
             colnames(dataMeasureWithMse) <- tmp
            dataMeasureWithMse <- dataMeasureWithMse[order(dataMeasureWithMse[,"MSE"]),]
             #Starting from the 2nd item as the 1st is the current day (MSE will be 0) want to drop it
             return (dataMeasureWithMse[seq(2,min(K,length(dataMeasureWithMse[,1]))),])
 
        }
 
    calculateTradeSignalFromKNeighbours <- function(mktReturns,kNearestNeighbours){
         rowNums <- kNearestNeighbours[,"rowNum"]
         rowNums <- na.omit(rowNums)
         if(length(rowNums) <= 1) { return (0) }
         print("The kNearestNeighbours are:")
         print(rowNums)
 
         #So lets see what happened on the day AFTER our nearest match
         mktRet <- mktReturns[rowNums+1]
 
        # return (sign(sum(mktRet)))
        return (SharpeRatio.annualized(mktRet))
    }
 
    kNearestNeighbours <- findKNearestNeighbours(dataMeasure,K,windowSize)
    tradeSignal <- calculateTradeSignalFromKNeighbours(mktReturns,kNearestNeighbours)
    return(tradeSignal)
 
}
 
ret <- (Cl(mktData)/Op(mktData))-1
signalLog <- as.data.frame(ret)
signalLog[,1] <- 0
colnames(signalLog) <- c("TradeSignal")
 
#Loop through all the days we have data for, and calculate a signal for them using nearest neighbour
for(i in seq(1,length(ret))){
    print (paste("Simulating trading for day",i,"out of",length(ret),"@",100*i/length(ret),"%"))
    index <- seq(1,i)
    signal <- calculateNearestNeighbourTradeSignal(dataMeasure[index,],kNearestGroupSize,ret,windowSize)
    signalLog[i,1] <- signal
}
 
dev.new()
tradeRet <- Lag(signalLog[,1])*ret[,1] #Combine todays signal with tomorrows return (no lookforward issues)
totalRet <- cbind(tradeRet,ret)
colnames(totalRet) <- c("Algo",paste(marketSymbol," Long OpCl Returns"))
charts.PerformanceSummary(totalRet,main=paste("K nearest trading algo for",marketSymbol,"kNearestGroupSize=",kNearestGroupSize,"windowSize=",windowSize),geometric=FALSE)
print(SharpeRatio.annualized(tradeRet))
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2014/01/gspc-knearest-groupsize50-windowsize10-sharpe-0.537182-1024x521.pngDigg ThisSubmit to redditShare via email

K-Nearest Neighbour Algo : Fail

In this post I’m going to look at an implementation of the k-nearest neighbour algorithm.

The algorithm is very simple and can be split into 3 components:

1. A Data Measure – What features / observation describe the current trading day? (vol, rsi, moving avg etc…, don’t forget to normalise your measurements) (variable dataMeasure)

2. Error Measure – How to measure the similarity between data measures (just use MSE) this identifies the K-most similar trading days to today (function calculateMSE)

3. Convert k-nearest neighbors to trading signal (function calculateTradeSignalFromKNeighbours)

In the data measure we look to come up with some quantitative measures that capture information about the current trading today. In the example presented below I’ve used a normalised volatility measure (vol(fast)/(vol(fast)+vol(slow)) where fast and slow indicate the window size, slower = longer window. The same procedure but for linear regression curves is used, additionally i’ve included a fast / slow rsi. We take this measure and compare it to the measures on all of the previous trading days, trying to identify the most similar K days in history.

You should look to normalise your signals in some fashion. The reason you need to do this is so that during the MSE calculation you haven’t unexpectedly put a large weight on one of your measurement variables.

Now that you’ve found a set of trading days that are most similar to your current trading day you still have to determine how to convert those days into a trading signal. In the code I take the Knearest neighbours and look at what occurred on the day after after them. I take the open close return and calculate the sharpe ratio of the K neighbors and use this as the number of contracts to buy the following day. If the K neighbors are unrelated their trading will be erratic and the sharpe ratio close to 0, hence we will only trade a small number of contracts.

This algo is potentially interesting when using vol as one of the data measures, it naturally captures the different regimes in the market. If today is a high vol day, it’ll be compared to the historical days that also have high vol. It is hoped that todays market still behaves in the same fashion as in a historically similar day.

gspc trading knearest neighbors

ftse trading knearest neighbors

 

arm trading knearest neighbors

Sadly the performance of this strategy is terrible (could just be poor input parameter selection / poor data measures). I suspect that there are better forms of K-nearest neighbour to use. I take today, and compare it to single days in history. There could be significant gains to be had if I take 1 month of data and find the historical most similar month. This will identify patterns of similar behavior which may be more tradeable. I will investigate this in my next post.

On to the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library("zoo")
 
#INPUTS
marketSymbol <- "ARM.L"
 
nFastLookback <- 30 #The fast signal lookback used in linear regression curve
nSlowLookback <- 50 #The slow signal lookback used in linear regression curve
 
nFastVolLookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowVolLookback <- 50 #The slow signal lookback used calculate the stdev
 
nFastRSILookback <- 30 #The fast signal lookback used to calculate the stdev
nSlowRSILookback <- 50 #The slow signal lookback used calculate the stdev
 
kNearestGroupSize <- 50 #How many neighbours to use
normalisedStrengthVolWeight <- 2 #Make some signals more important than others in the MSE
normalisedStrengthRegressionWeight <- 1
fastRSICurveWeight <- 2
slowRSICurveWeight <- 0.8
 
 
#Specify dates for downloading data, training models and running simulation
startDate = as.Date("2006-08-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
 
stockCleanNameFunc <- function(name){
     return(sub("^","",name,fixed=TRUE))
}
 
getSymbols(marketSymbol, env = symbolData, src = "yahoo", from = startDate)
cleanName <- stockCleanNameFunc(marketSymbol)
mktData <- get(cleanName,symbolData)
 
linearRegressionCurve <- function(data,n){
    regression <- function(dataBlock){
           fit <-lm(dataBlock~seq(1,length(dataBlock),1))
           return(last(fit$fitted.values))
    }
    return (rollapply(data,width=n,regression,align="right",by.column=FALSE,na.pad=TRUE))
}
 
volCurve <- function(data,n){
    stdev <- function(dataBlock){
           sd(dataBlock)
    }
    return (rollapply(data,width=n,stdev,align="right",by.column=FALSE,na.pad=TRUE))^2
}
 
fastRegression <- linearRegressionCurve(Cl(mktData),nFastLookback)
slowRegression <- linearRegressionCurve(Cl(mktData),nSlowLookback)
normalisedStrengthRegression <- slowRegression / (slowRegression+fastRegression)
 
fastVolCurve <- volCurve(Cl(mktData),nFastVolLookback)
slowVolCurve <- volCurve(Cl(mktData),nSlowVolLookback)
normalisedStrengthVol <- slowVolCurve / (slowVolCurve+fastVolCurve)
 
fastRSICurve <-RSI(Cl(mktData),nFastRSILookback)/100 #rescale it to be in the same range as the other indicators
slowRSICurve <-RSI(Cl(mktData),nSlowRSILookback)/100
 
      #Lets plot the signals just to see what they look like
      dev.new()
      par(mfrow=c(2,2))
      plot(normalisedStrengthVol,type="l")
      plot(normalisedStrengthRegression,type="l")
      plot(fastRSICurve,type="l")
      plot(slowRSICurve,type="l")
 
 
 
#DataMeasure will be used to determine how similar other days are to today
#It is used later on for calculate the days which are most similar to today according to MSE measure
dataMeasure <- cbind(normalisedStrengthVol*normalisedStrengthVolWeight,normalisedStrengthRegression*normalisedStrengthRegression,fastRSICurve*fastRSICurveWeight,slowRSICurve*slowRSICurveWeight)
colnames(dataMeasure) <- c("normalisedStrengthVol","normalisedStrengthRegression","fastRSICurve","slowRSICurve")
 
#Finds the nearest neighbour and calculates the trade signal
calculateNearestNeighbourTradeSignal <- function(dataMeasure,K,mktReturns){
        findKNearestNeighbours <- function(dataMeasure,K){
              calculateMSE <- function(dataMeasure){
                      calculateMSEInner <- function(dataA,dataB){
                        se <- ((as.matrix(dataA) - as.matrix(dataB))^2)
                        apply(se,1,mean)
                      }
 
                      #Repeat the last row of dataMeasure multiple times
                      #This is so we can compare dataMeasure[today] with all the previous dates
                      lastMat <- last(dataMeasure)
                      setA <-  lastMat[rep(1, length(dataMeasure[,1])),]
                      setB <- dataMeasure
 
                      mse <- calculateMSEInner(setB,setA)
                      mse[is.na(mse)] <- Inf #Give it a terrible MSE if it's NA
                      colName <-  c(colnames(dataMeasure),"MSE")
                      dataMeasure <- cbind(dataMeasure,mse)
                      colnames(dataMeasure) <- colName
                      return (dataMeasure)
              }
 
             rowNum <- seq(1,length(dataMeasure[,1]),1)
             dataMeasureWithMse <- as.data.frame(calculateMSE(dataMeasure))
             tmp <- c("rowNum", colnames(dataMeasureWithMse))
             dataMeasureWithMse <- cbind(rowNum,dataMeasureWithMse)
             colnames(dataMeasureWithMse) <- tmp
             dataMeasureWithMse <- dataMeasureWithMse[order(dataMeasureWithMse[,"MSE"]),]
             #Starting from the 2nd item as the 1st is the current day (MSE will be 0) want to drop it
             return (dataMeasureWithMse[seq(2,min(K,length(dataMeasureWithMse[,1]))),])
    }
 
    calculateTradeSignalFromKNeighbours <- function(mktReturns,kNearestNeighbours){
         rowNums <- kNearestNeighbours[,"rowNum"]
         rowNums <- na.omit(rowNums)
         if(length(rowNums) <= 1) { return (0) }
         print("The kNearestNeighbours are:")
         print(rowNums)
 
         #So lets see what happened on the day AFTER our nearest match
         mktRet <- mktReturns[rowNums+1]
 
         #return (sign(sum(mktRet)))
         return (SharpeRatio.annualized(mktRet))
    }
 
    kNearestNeighbours <- findKNearestNeighbours(dataMeasure,K)
    tradeSignal <- calculateTradeSignalFromKNeighbours(mktReturns,kNearestNeighbours)
    return(tradeSignal)
 
}
 
ret <- (Cl(mktData)/Op(mktData))-1
signalLog <- as.data.frame(ret)
signalLog[,1] <- 0
colnames(signalLog) <- c("TradeSignal")
 
#Loop through all the days we have data for, and calculate a signal for them using nearest neighbour
for(i in seq(1,length(ret))){
    print (paste("Simulating trading for day",i,"out of",length(ret),"@",100*i/length(ret),"%"))
    index <- seq(1,i)
    signal <- calculateNearestNeighbourTradeSignal(dataMeasure[index,],kNearestGroupSize,ret)
    signalLog[i,1] <- signal
}
 
dev.new()
tradeRet <- Lag(signalLog[,1])*ret[,1] #Combine todays signal with tomorrows return (no lookforward issues)
totalRet <- cbind(tradeRet,ret)
colnames(totalRet) <- c("Algo",paste(marketSymbol," Long OpCl Returns"))
charts.PerformanceSummary(totalRet,main=paste("K nearest trading algo for",marketSymbol),geometric=FALSE)
print(SharpeRatio.annualized(tradeRet))
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/12/gspc-trading-knearest-neighbors-1024x521.jpegDigg ThisSubmit to redditShare via email

Linear Regression Curves vs Bollinger Bands

In my last post I showed what a linear regression curve was, this post will use it as part of a mean reverting trading strategy.

The strategy is simple:

  • Calculate a rolling ‘average’ and a rolling ‘deviation’
  • If the Close price is greater than the average+n*deviation go short (and close when you cross the mean)
  • If the Close price is less than the average-n*deviation go long (and close when you cross the mean)

Two cases will be analysed, one strategy will use a simple moving average(SMA), the other will use the linear regression curve(LRC) for the average. The deviation function will be Standard Devation, Average True Range, and LRCDeviation (same as standard deviation but replace the mean with the LRC).

Results (Lookback = 20 and Deviation Multiplier = 2:

mean reversion linear regression curves

Annualized Sharpe Ratio (Rf=0%)

  • GSPC = 0.05257118
  • Simple Moving Avg – Standard Deviation = 0.2535342
  • Simple Moving Avg – Average True Range = 0.1165512
  • Simple Moving Avg – LRC Deviation 0.296234
  • Linear Regression Curve – Standard Deviation = 0.2818447
  • Linear Regression Curve – Average True Range = 0.5824727
  • Linear Regression Curve – LRC Deviation = 0.04672071

Optimisation analysis:

Annoyingly the colour scale is different between the two charts, however the sharpe ratio is written in each cell. Lighter colours indicate better performance.

Over a 13year period and trading the GSPC the LRC achieved a sharpe of ~0.6 where as the SMA achieved a sharpe of ~0.3. The LRC appears superior to the SMA.

Mean Reversion LRC STDEV Mean Reversion SMA STDEVI will update this post at a later point in time when my optimisation has finished running for the other strategies.

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library("zoo")
library("gplots")
 
#INPUTS
marketSymbol <- "^GSPC"
 
nLookback <- 20 #The lookback to calcute the moving average / linear regression curve / average true range / standard deviation
nDeviation <- 2
 
#Specify dates for downloading data, training models and running simulation
startDate = as.Date("2000-01-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
 
stockCleanNameFunc <- function(name){
     return(sub("^","",name,fixed=TRUE))
}
 
getSymbols(marketSymbol, env = symbolData, src = "yahoo", from = startDate)
cleanName <- stockCleanNameFunc(marketSymbol)
mktData <- get(cleanName,symbolData)
 
linearRegressionCurve <- function(data,n){
 
    regression <- function(dataBlock){
           fit <-lm(dataBlock~seq(1,length(dataBlock),1))
           return(last(fit$fitted.values))
    }
    return (rollapply(data,width=n,regression,align="right",by.column=FALSE,na.pad=TRUE))
}
 
linearRegressionCurveStandardDeviation <- function(data,n){
 
    deviation <- function(dataBlock){
        fit <-lm(dataBlock~seq(1,length(dataBlock),1))
        quasiMean <- (last(fit$fitted.values))
        quasiMean <- rep(quasiMean,length(dataBlock))
        stDev <- sqrt((1/length(dataBlock))* sum((dataBlock - quasiMean)^2))
        return (stDev)
    }
    return (rollapply(data,width=n,deviation,align="right",by.column=FALSE,na.pad=TRUE))
}
 
reduceLongTradeEntriesToTradOpenOrClosedSignal <- function(trades){
    #Takes something like
    #000011110000-1-1000011 (1 = go long, -1 = go short)
    #and turns it into
    #00001111111100000011
 
    #trades[is.na(trades)] <- 0
    out <- trades #copy the datastructure over
    currentPos <-0
    for(i in 1:length(out[,1])){
      if((currentPos == 0) & (trades[i,1]==1)){
        currentPos <- 1
        out[i,1] <- currentPos
        next
      }
      if((currentPos == 1) & (trades[i,1]==-1)){
        currentPos <- 0
        out[i,1] <- currentPos
        next
      }
      out[i,1] <- currentPos
    }
 
    return(out)
}
 
reduceShortTradeEntriesToTradOpenOrClosedSignal <- function(trades){
    return(-1*reduceLongTradeEntriesToTradOpenOrClosedSignal(-1*trades))
}
 
generateTradingReturns <- function(mktPrices, nLookback, nDeviation, avgFunction, deviationFunction,title,showGraph=TRUE){
    quasiMean <- avgFunction(mktPrices,n=nLookback)
    quasiDeviation <- deviationFunction(mktPrices,n=nLookback)
    colnames(quasiMean) <- "QuasiMean"
    colnames(quasiDeviation) <- "QuasiDeviation"
    price <- Cl(mktPrices)
 
    upperThreshold = quasiMean + nDeviation*quasiDeviation
    lowerThreshold = quasiMean - nDeviation*quasiDeviation
 
    aboveUpperBand <- price>upperThreshold
    belowLowerBand <- price<lowerThreshold
 
    aboveMAvg <- price>quasiMean
    belowMAvg <- price<quasiMean
 
    aboveUpperBand[is.na(aboveUpperBand)]<-0
    belowLowerBand[is.na(belowLowerBand)]<-0
    aboveMAvg[is.na(aboveMAvg)]<-0
    belowMAvg[is.na(belowMAvg)]<-0
 
 
    rawShort <- (-1)*aboveUpperBand+belowMAvg
    shortPositions <- reduceShortTradeEntriesToTradOpenOrClosedSignal(rawShort)
    rawLong <- (-1)*aboveMAvg+belowLowerBand
    longPositions <- reduceLongTradeEntriesToTradOpenOrClosedSignal(rawLong)
    positions = longPositions + shortPositions
 
    signal <- positions
 
   if(showGraph){
      dev.new()
      par(mfrow=c(2,1))
      plot(Cl(mktPrices),type="l",main=paste(marketSymbol, "close prices"))
      lines(upperThreshold,col="red",type="l")
      lines(lowerThreshold,col="red",type="l")
      lines(quasiMean,col="blue",type="l")
      legend('bottomright',c("Close",paste("Band - ",title),paste("Average - ",title)),lty=1, col=c('black', 'red', 'blue'), bty='n', cex=.75)
      plot(signal)
    }
 
    mktReturns <- Cl(mktPrices)/Lag(Cl(mktPrices)) - 1
    tradingReturns <- Lag(signal)*mktReturns
    tradingReturns[is.na(tradingReturns)] <- 0
    colnames(tradingReturns) <- title
    return (tradingReturns)
}
 
strategySMAandSTDEV <- function(mktData,nLookback,nDeviation){
       generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { SMA(Cl(x),n) },function(x,n) { rollapply(Cl(x),width=n, align="right",sd) },"Simple Moving Avg - Standard Deviation",FALSE)
}
 
strategySMAandATR <- function(mktData,nLookback,nDeviation){
       generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { SMA(Cl(x),n) },function(x,n) { atr <- ATR(x,n); return(atr$atr) },"Simple Moving Avg - Average True Range",FALSE)
}
 
strategySMAandLRCDev <- function(mktData,nLookback,nDeviation){
        generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { SMA(Cl(x),n) },function(x,n) { linearRegressionCurveStandardDeviation(Cl(x),n) },"Simple Moving Avg - LRC Deviation",FALSE)
}
 
strategyLRCandSTDEV <- function(mktData,nLookback,nDeviation){
       generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { linearRegressionCurve(Cl(x),n) },function(x,n) { rollapply(Cl(x),width=n, align="right",sd) },"Linear Regression Curve - Standard Deviation",FALSE)
}
 
strategyLRCandATR <- function(mktData,nLookback,nDeviation){
       generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { linearRegressionCurve(Cl(x),n) },function(x,n) { atr <- ATR(x,n); return(atr$atr) },"Linear Regression Curve - Average True Range",FALSE)
}
 
strategyLRCandLRCDev <- function(mktData,nLookback,nDeviation){
       generateTradingReturns(mktData,nLookback,nDeviation,function(x,n) { linearRegressionCurve(Cl(x),n) },function(x,n) { linearRegressionCurveStandardDeviation(Cl(x),n) },"Linear Regression Curve - LRC Deviation",FALSE)
}
 
if(TRUE){
bollingerBandsSMAandSTDEVTradingReturns <- strategySMAandSTDEV(mktData,nLookback,nDeviation)
bollingerBandsSMAandATRTradingReturns <- strategySMAandATR(mktData,nLookback,nDeviation)
bollingerBandsSMAandLRCDevTradingReturns <- strategySMAandLRCDev(mktData,nLookback,nDeviation)
 
bollingerBandsLRCandSTDEVTradingReturns <- strategyLRCandSTDEV(mktData,nLookback,nDeviation)
bollingerBandsLRCandATRTradingReturns <- strategyLRCandATR(mktData,nLookback,nDeviation)
bollingerBandsLRCandLRCDevTradingReturns <- strategyLRCandLRCDev(mktData,nLookback,nDeviation)
 
 
mktClClRet <- Cl(mktData)/Lag(Cl(mktData))-1
tradingReturns <- merge(as.zoo(mktClClRet),
                  as.zoo(bollingerBandsSMAandSTDEVTradingReturns),
                  as.zoo(bollingerBandsSMAandATRTradingReturns),
                  as.zoo(bollingerBandsSMAandLRCDevTradingReturns),
                  as.zoo(bollingerBandsLRCandSTDEVTradingReturns),
                  as.zoo(bollingerBandsLRCandATRTradingReturns),
                  as.zoo(bollingerBandsLRCandLRCDevTradingReturns))
 
dev.new()
charts.PerformanceSummary(tradingReturns,main=paste("Mean Reversion using nLookback",nLookback,"and nDeviation",nDeviation,"bands"),geometric=FALSE)
print(table.Stats(tradingReturns))
cat("Sharpe Ratio")
print(SharpeRatio.annualized(tradingReturns))
 }
 
 
colorFunc <- function(x){
  x <- max(-4,min(4,x))
  if(x > 0){
  colorFunc <- rgb(0,(255*x/4)/255 , 0/255, 1)
  } else {
  colorFunc <- rgb((255*(-1*x)/4)/255,0 , 0/255, 1)
  }
}
 
optimiseTradingStrat <- function(mktData,lookbackStart,lookbackEnd,lookbackStep,deviationStart,deviationEnd,deviationStep,strategy,title){
      lookbackRange <- seq(lookbackStart,lookbackEnd,lookbackStep)
      deviationRange <- seq(deviationStart,deviationEnd,deviationStep)
      combinations <- length(lookbackRange)*length(deviationRange)
      combLookback <- rep(lookbackRange,each=combinations/length(lookbackRange))
      combDeviation <- rep(deviationRange,combinations/length(deviationRange))
 
      optimisationMatrix <- t(rbind(t(combLookback),t(combDeviation),rep(NA,combinations),rep(NA,combinations),rep(NA,combinations)))
      colnames(optimisationMatrix) <- c("Lookback","Deviation","SharpeRatio","CumulativeReturns","MaxDrawDown")
 
        for(i in 1:length(optimisationMatrix[,1])){
            print(paste("On run",i,"out of",length(optimisationMatrix[,1]),"nLookback=",optimisationMatrix[i,"Lookback"],"nDeviation=",optimisationMatrix[i,"Deviation"]))
            runReturns <- strategy(mktData,optimisationMatrix[i,"Lookback"],optimisationMatrix[i,"Deviation"])
            optimisationMatrix[i,"SharpeRatio"] <- SharpeRatio.annualized(runReturns)
            optimisationMatrix[i,"CumulativeReturns"] <- sum(runReturns)
            optimisationMatrix[i,"MaxDrawDown"] <-  maxDrawdown(runReturns,geometric=FALSE)
            print(optimisationMatrix)
          }
          print(optimisationMatrix)
 
 
 
          dev.new()
          z <- matrix(optimisationMatrix[,"SharpeRatio"],nrow=length(lookbackRange),ncol=length(deviationRange),byrow=TRUE)
          colors <- colorFunc(optimisationMatrix[,"SharpeRatio"])
 
          rownames(z) <- lookbackRange
          colnames(z) <-deviationRange
          heatmap.2(z, key=TRUE,trace="none",cellnote=round(z,digits=2),Rowv=NA, Colv=NA, scale="column", margins=c(5,10),xlab="Deviation",ylab="Lookback",main=paste("Sharpe Ratio for Strategy",title))
 
}
 
if(FALSE){
  dev.new()
  plot(Cl(mktData),type="l",main=paste(marketSymbol, "close prices"))
  lines(SMA(Cl(mktData),n=50),col="red",type="l")
  lines(linearRegressionCurve(Cl(mktData),n=50),col="blue",type="l")
  legend('bottomright',c("Close",paste("Simple Moving Average Lookback=50"),paste("Linear Regression Curve Lookback=50")),lty=1, col=c('black', 'red', 'blue'), bty='n', cex=.75)
}
 
nLookbackStart <- 20
nLookbackEnd <- 200
nLookbackStep <- 20
nDeviationStart <- 1
nDeviationEnd <- 2.5
nDeviationStep <- 0.1
#optimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategySMAandSTDEV,"AvgFunc=SMA and DeviationFunc=STDEV")
#optimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategySMAandATR,"AvgFunc=SMA and DeviationFunc=ATR")
#optimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategySMAandLRCDev,"AvgFunc=SMA and DeviationFunc=LRCDev")
#optimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategyLRCandSTDEV,"AvgFunc=LRC and DeviationFunc=STDEV")
#optimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategyLRCandATR,"AvgFunc=LRC and DeviationFunc=ATR")
#doptimiseTradingStrat(mktData,nLookbackStart,nLookbackEnd,nLookbackStep,nDeviationStart,nDeviationEnd,nDeviationStep,strategyLRCandLRCDev,"AvgFunc=LRC and DeviationFunc=LRCDev")
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/09/mean-reversion-linear-regression-curves-1024x521.jpegDigg ThisSubmit to redditShare via email

Genetic Algorithm in R – Trend Following

This post is going to explain what genetic algorithms are, it will also present R code for performing genetic optimisation.

A genetic algo consists of three things:

  1. A gene
  2. A fitness function
  3. Methods to breed/mate genes

The Gene

The gene is typically a binary number, each bit in the binary number controls various parts of your trading strategy. The gene below contains 4 sub gene, a stock gene to select what stock to trade, a strategy gene to select what strategy to use, paramA sets a parameter used in your strategy and paramB sets another parameter to use in your strategy.

Gene = [StockGene,StrategyGene,ParamA,ParamB]

Stock Gene
00 Google
01 Facebook
10 IBM
11 LinkedIn

Strategy Gene
0 Simple Moving Average
1 Exponential Moving Average

ParamA Gene – Moving Average 1 Lookback
00 10
01 20
10 30
11 40

ParamB Gene – Moving Average 2 Lookback
00 15
01 25
10 35
11 45

So Gene = [01,1,00,11]

Would be stock=Facebook, strategy=Exponential Moving Average,paramA=10,paramB=45].

The strategy rules are simple, if the moving average(length=paramA) > moving average(length=paramB) then go long, and vice versa.

The fitness function

A gene is quantified as a good or bad gene using a fitness function. The success of a genetic trading strategy depends heavily upon your choice of fitness function and whether it makes sense with the strategies you intend to use. You will trade each of the strategies outlined by your active genes and then rank them by their fitness. A good starting point would be to use the sharp ratio as the fitness function.

You need to be careful that you apply the fitness function to statistically significant data. For example if you used a mean reverting strategy that might trade once a month (or what ever your retraining window is), then your fitness is determined by 1 or 2 datapoints!!! This will result in poor genetic optimisation (in my code i’ve commented out a mean reversion strategy test for yourself). Typically what happens is your sharpe ratio from 2 datapoints is very very high merely down to luck. You then mark this as a good gene and trade it the next month with terrible results.

Breeding Genes

With a genetic algo you need to breed genes, for the rest of this post i’ll assume you are breeding once a month. During breeding you take all of the genes in your gene pool and rank them according to the fitness function. You then select the top N genes and breed them (discard all the other genes they’re of no use).

Breeding consists of two parts:

Hybridisation – Take a gene and cut a chunk out of it, you can use whatever random number generator you want to determine the cut locations, swap this chunk with a corresponding chunk from another gene.

Eg.
Old gene: 00110010 and 11100110 (red is the randomly select bits to cut)
New gene: 00100110 and 11110010

You do this for every possible pair of genes in your top N list.

Mutation – After hybridisation go through all your genes and randomly flip the bits with an fixed probability. The mutation prevents your strategy from getting locked into an every shrinking gene pool.

For a more detailed explanation with diagrams please see:

http://blog.equametrics.com/ scroll down to Genetic Algorithms and its Application in Trading

genetic algo sharpe 1.14

Annualized Sharpe Ratio (Rf=0%) 1.15

On to the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library("zoo")
 
#INPUTS
topNToSelect <- 5   #Top n genes are selected during the mating, these will be mated with each other
mutationProb <- 0.05 #A mutation can occur during the mating, this is the probability of a mutation for individual chromes
symbolLst <- c("^GDAXI","^FTSE","^GSPC","^NDX","AAPL","ARMH","JPM","GS")
#symbolLst <- c("ADN.L","ADM.L","AGK.L","AMEC.L","AAL.L","ANTO.L","ARM.L","ASHM.L","ABF.L","AZN.L","AV.L","BA.L","BARC.L","BG.L","BLT.L","BP.L","BATS.L","BLND.L","BSY.L","BNZL.L","BRBY.L","CSCG.L","CPI.L","CCL.L","CNA.L","CPG.L","CRH.L","CRDA.L","DGE.L","ENRC.L","EXPN.L","FRES.L","GFS.L","GKN.L","GSK.L","HMSO.L","HL.L","HSBA.L","IAP.L","IMI.L","IMT.L","IHG.L","IAG.L","IPR.L","ITRK.L","ITV.L","JMAT.L","KAZ.L","KGF.L","LAND.L","LGEN.L","LLOY.L","EMG.L","MKS.L","MGGT.L","MRW.L","NG.L","NXT.L","OML.L","PSON.L","PFC.L","PRU.L","RRS.L","RB.L","REL.L","RSL.L","REX.L","RIO.L","RR.L","RBS.L","RDSA.L","RSA.L","SAB.L","SGE.L","SBRY.L","SDR.L","SRP.L","SVT.L","SHP.L","SN.L","SMIN.L","SSE.L","STAN.L","SL.L","TATE.L","TSCO.L","TLW.L","ULVR.L","UU.L","VED.L","VOD.L","WEIR.L","WTB.L","WOS.L","WPP.L","XTA.L")
 
#END INPUTS
 
 
 
#Stock gene
stockGeneLength <- 3 #8stocks
#stockGeneLength<-6 #Allows 2^6 stocks (64)
 
#Strategy gene
strateyGeneLength<-2
 
#Paramter lookback gene
parameterLookbackGeneLength<-6
 
#Calculate the length of our chromozone, chromozone=[gene1,gene2,gene3...]
chromozoneLength <- stockGeneLength+strateyGeneLength+parameterLookbackGeneLength
 
#TradingStrategies
signalMACross <- function(mktdata, paramA, paramB, avgFunc=SMA){
 signal = avgFunc(mktdata,n=paramA)/avgFunc(mktdata,n=paramB)
 signal[is.na(signal)] <- 0
 signal <- (signal>1)*1 #converts bools into ints
 signal[signal==0] <- (-1)
 return (signal)
}
 
signalBollingerReversion <- function(mktdata, paramA, paramB){
  avg <- SMA(mktdata,paramB)
  std <- 1*rollapply(mktdata, paramB,sd,align="right")
  shortSignal <- (mktdata > avg+std)*-1
  longSignal <- (mktdata < avg-std)*1
  signal <- shortSignal+longSignal
  signal[is.na(signal)]<-0
  return (signal)
}
 
signalRSIOverBoughtOrSold <- function(mktdata, paramA, paramB){
  upperLim <- min(60*(1+paramB/100),90)
  lowerLim <- max(40*(1-paramB/100),10)
  rsisignal <- RSI(mktdata,paramB)
  signal <- ((rsisignal>upperLim)*-1)+((rsisignal<lowerLim)*1)
  return (signal)
}
 
 
#Gene = [StockGene,StrategyGene,ParamAGene,ParamBGene]
#The following functions extract specific parts of the gene
getStockGeneFromChromozone <- function(chrome){
     return(chrome[,seq(1,stockGeneLength)])
}
 
getStrategyGeneFromChromozone <- function(chrome){
     return(chrome[,seq(stockGeneLength+1,stockGeneLength+strateyGeneLength)])
}
 
getParameterLookbackGeneFromChromozone <- function(chrome){
     return(chrome[,seq(stockGeneLength+strateyGeneLength+1,stockGeneLength+strateyGeneLength+parameterLookbackGeneLength)])
}
 
#Once parts of the gene have been extracted they are then converted into
#lookback values, what stocks to trade, or what strategy to use
getStockDataFromChromozone<- function(chrome){
      #Basically a binary number to decimal converter
      gene <- getStockGeneFromChromozone(chrome)
      index <-sum(chrome*(2^(seq(1,length(gene),1)-1)))+1 #The +1 is to stop 0 since not a valid index
      cleanName <- sub("^","",symbolLst[index],fixed=TRUE)
     return (get(cleanName,symbolData))
}
 
getStrategyFromChromozone <- function(chrome){
      gene <- matrix(getStrategyGeneFromChromozone(chrome))
 
      if(all(gene==matrix(c(0,0)))){
       return (signalMACross)
      }
 
      if(all(gene==matrix(c(0,1)))){
       return (function(mktdata,paramA,paramB) {signalMACross(mktdata,paramA,paramB,avgFunc=EMA)})
      }
 
      if(all(gene==matrix(c(1,0)))){
       return (function(mktdata,paramA,paramB) {signalMACross(mktdata,paramA,paramB,avgFunc=ZLEMA)})
      # return (signalBollingerReversion)
      }
 
      if(all(gene==matrix(c(1,1)))){
       return (function(mktdata,paramA,paramB) {signalMACross(mktdata,paramA,paramB,avgFunc=WMA)})
      # return (signalRSIOverBoughtOrSold)
      }
      print("nothing found")
 
}
 
getLookbackAFromChromozone <- function(chrome){
    gene <- getParameterLookbackGeneFromChromozone(chrome)
    gene <- gene[,1:3]
    gene <- matrix(gene)
 
      if(all(gene==matrix(c(0,0,0)))){
        return (10)
      }
      if(all(gene==matrix(c(0,0,1)))){
        return (20)
      }
      if(all(gene==matrix(c(0,1,0)))){
        return (30)
      }
      if(all(gene==matrix(c(0,1,1)))){
        return (40)
      }
      if(all(gene==matrix(c(1,0,0)))){
        return (50)
      }
      if(all(gene==matrix(c(1,0,1)))){
        return (60)
      }
      if(all(gene==matrix(c(1,1,0)))){
        return (70)
      }
      if(all(gene==matrix(c(1,1,1)))){
        return (80)
      }
 
}
 
getLookbackBFromChromozone <- function(chrome){
    gene <- getParameterLookbackGeneFromChromozone(chrome)
    gene <- gene[,4:6]
    gene <- matrix(gene)
 
      if(all(gene==matrix(c(0,0,0)))){
        return (15)
      }
      if(all(gene==matrix(c(0,0,1)))){
        return (25)
      }
      if(all(gene==matrix(c(0,1,0)))){
        return (35)
      }
      if(all(gene==matrix(c(0,1,1)))){
        return (45)
      }
      if(all(gene==matrix(c(1,0,0)))){
        return (55)
      }
      if(all(gene==matrix(c(1,0,1)))){
        return (65)
      }
      if(all(gene==matrix(c(1,1,0)))){
        return (75)
      }
      if(all(gene==matrix(c(1,1,1)))){
        return (85)
      }
}
 
#The more positive the fitness, the better the gene
calculateGeneFitnessFromTradingReturns <- function(tradingRet){
  tradingFitness <- SharpeRatio.annualized(tradingRet)
  #tradingFitness <- SharpeRatio.annualized(tradingRet) * (1/maxDrawdown(tradingRet))
  #tradingFitness <- max(cumsum(tradingRet))/maxDrawdown(tradingRet)
  #tradingFitness <- sum((tradingRet>0)*1)/length(tradingRet) #% of trades profitable
  #tradingFitness <- -1*maxDrawdown(tradingRet)
  return(tradingFitness)
}
 
#This function performs the mating between two chromozones
genetricMating <- function(chromozoneFitness,useTopNPerformers,mutationProb){
        selectTopNPerformers <- function(chromozoneFitness,useTopNPerformers){
              #Ranks the chromozones by their fitness and select the topNPerformers
              orderedChromozones <- order(chromozoneFitness[,"Fitness"],decreasing=TRUE)
              orderedChromozones <- chromozoneFitness[orderedChromozones,]
 
              ##Often there are lots of overlapping strategies with the same fitness
              ##We should filter by unique fitness to stop the overweighting of lucky high fitness
              orderedChromozones <- subset(orderedChromozones, !duplicated(Fitness))
 
              print(orderedChromozones)
              return(orderedChromozones[seq(1,min(nrow(orderedChromozones),useTopNPerformers)),])
        }
 
        hybridize <- function(topChromozones,mutationProb){
            crossoverFunc <- function(chromeA,chromeB){
 
            chromeA <- chromeA[,!colnames(chromeA) %in% c("Fitness")]
            chromeB <- chromeB[,!colnames(chromeB) %in% c("Fitness")]
 
                  #Takes a number of chromes from B and swaps them in to A
                  nCross <- runif(min=0,max=ncol(chromeA)-1,1) #the number of individual chromes to swap
                  swapStartLocation = round(runif(min=1,max=ncol(chromeA),1))
                  swapLocations <- seq(swapStartLocation,swapStartLocation+nCross) #Can run over the end of our vector, need to wrap around back to start
                  swapLocations <- swapLocations %% ncol(chromeA)+1 #Performs the wrapping
                  chromeA[1,swapLocations] <- chromeB[1,swapLocations] #Performs the swap
                  return (chromeA)
            }
 
            mutateFunc <- function(chrome,mutationProb){
                return((round(runif(min=0,max=1,ncol(chrome))<mutationProb)+chrome) %% 2)
            }
 
            #Take each chromozone and mate it with all the others (and it's self)
            a <- topChromozones[rep(seq(1,nrow(topChromozones)),each=nrow(topChromozones)),] #Repeat each row nrow times
            b <- topChromozones[rep(seq(1,nrow(topChromozones)),nrow(topChromozones)),] #Repeat whole matrix nrow times
 
 
            #Can this be vectorised (not huge amounts of data anyway so probs not an issue)?
            res <- matrix(nrow=0,ncol=ncol(a)-1) #The minus 1 is to drop the "Fitness" column
            for(i in 1:nrow(a)){
                res <- rbind(res,mutateFunc(crossoverFunc(a[i,],b[i,]),mutationProb))
            }
            return (res)
        }
 
        topChromozones <- selectTopNPerformers(chromozoneFitness,useTopNPerformers)
        #return ((hybridize(topChromozones,mutationProb))) #You may want duplicates to give more weight to 'good' genes
        return (unique(hybridize(topChromozones,mutationProb))) #Remove duplicate genes
}
 
#This function takes a chrome/gene and does the according trades
#It takes market data and a start and an end date
#It does not take responsibility for the mating and ranking of genes
doGeneticTrading <- function(mktdata,chrome, startDate, endDate){
    signalFunc <-getStrategyFromChromozone(chrome)
    paramA <- getLookbackAFromChromozone(chrome)
    paramB <- getLookbackBFromChromozone(chrome)
 
    signal <- signalFunc(Op(mktdata),paramA,paramB)
    opClRet <- (Cl(mktdata)/Op(mktdata)) - 1
    tradingReturns = opClRet * signal
    dataWin <- (paste(startDate,"::",endDate,sep=""))
    tradingReturns <- tradingReturns[dataWin]
    colnames(tradingReturns) <- c("TradingRet")
    return(tradingReturns)
}
 
#This function mates genes every month
#It also passes those genes into the doGeneticTrading function
doTrading <- function(chromelist){
  #Function for taking a year and a month and spitting out a clean date
  cleanDate <- function(y,m){
      if(m == 13){
       m <- 1
       y <- y+1
      }
       if(m < 10){
         return(paste(y,paste("0",m,sep=""),sep="-"))
       } else {
         return(paste(y,m,sep="-"))
       }
  }
  year <- 2002
  month <- 1
  totalRet <- 0
  fitnessEvoltion <- 0
 
  dev.new()
  par(mfrow=c(2,1))
  #Loop through many years and months
  for(y in 2002:2010){
    for(m in 1:12){
        chromeFitness <-  as.data.frame(matrix(nrow=0,ncol=ncol(chromelist)))
 
        startD <- cleanDate(y-2,m) #Subtracting off 2 years to ensure we pass enough data in(should really be calculated from MA lookback)
        liveStart <- cleanDate(y,m)
        liveEnd <- cleanDate(y,m+1)
        print(paste("Start",startD,"LiveStart",liveStart,"LiveEnd",liveEnd))
        dataWin <- (paste(startD,"::",liveEnd,sep=""))
        monthReturn <- data.frame()
        #Look through all the active chromes and use them for trading
        for(cn in 1:nrow(chromelist)){
        #USE a try catch just incase there are data issue etc...
         try({
            mktdata <- getStockDataFromChromozone(chromelist[cn,])
            tradingRet <- doGeneticTrading(mktdata[dataWin],chromelist[cn,],liveStart,liveEnd)
            tradingRet <- tradingRet*(1/nrow(chromelist)) #even money given to each strategy
            tradingFitness <- calculateGeneFitnessFromTradingReturns(tradingRet)
            if(!is.nan(tradingFitness) && !is.nan(max(tradingRet)) && !is.nan(min(tradingRet))){
              if(length(monthReturn) == 0 ){
               monthReturn <- tradingRet
              } else {
               monthReturn <- cbind(monthReturn,tradingRet)
              }
 
            res <- cbind(chromelist[cn,],tradingFitness)
            colnames(res) <- c(colnames(chromelist[cn,]),"Fitness")
            chromeFitness <- rbind(chromeFitness,res)
            }
           },silent=FALSE)
        }
        print("Month return")
        #Collapse all the trades from each chromozone into a single P&L for each day in the month
        monthReturn <- apply(monthReturn,1,sum,na.rm=TRUE)
        print(monthReturn)
        currentMonthFitness <- calculateGeneFitnessFromTradingReturns(monthReturn)
        #Update the running total of P&L
        totalRet <- c(totalRet,monthReturn)
        fitnessEvoltion <- c(fitnessEvoltion,currentMonthFitness)
        plot(cumsum(totalRet))
        plot(fitnessEvoltion)
        #print(chromeFitness)
        #print(chromeFitness[,"Fitness"])
        chromelist <- genetricMating(chromeFitness,topNToSelect,mutationProb)
        print(paste("There are",nrow(chromelist), "chromes active"))
        print(paste("Min Fitness:",min(chromeFitness[,"Fitness"])))
        print(paste("Max Fitness:",max(chromeFitness[,"Fitness"])))
        print(paste("Average Fitness:",mean(chromeFitness[,"Fitness"])))
        print(paste("Current Month Fitness:",currentMonthFitness))
 
 
    }
  }
   return (totalRet)
 
}
 
 
 
#Specify dates for downloading data, training models and running simulation
startDate = as.Date("2000-01-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
getSymbols(symbolLst, env = symbolData, src = "yahoo", from = startDate)
 
 
#Create some genes at random
#Make a diag matrix so that each chrome gets activated atleast once
startingChromozones <- diag(chromozoneLength)
rownames(startingChromozones) <- apply(t(seq(1,chromozoneLength)),2,function(x) { paste("Chrome",x,sep="") } )
fitness <- matrix(runif(min=-1,max=1,nrow(startingChromozones)),nrow=nrow(startingChromozones),ncol=1)
colnames(fitness) <- c("Fitness")
startingChromozones <- as.data.frame(cbind(startingChromozones,fitness))
 
 
 
 
print("Before mating")
print(startingChromozones)
print("After mating")
startingChromozones <- genetricMating(startingChromozones,topNToSelect,mutationProb)
print(startingChromozones)
 
 
tradingReturns <- doTrading(startingChromozones)
tradingReturns <- as.data.frame((as.matrix(tradingReturns[-1])))
tradingReturns<-as.zoo(tradingReturns)
 
dev.new()
charts.PerformanceSummary(tradingReturns,main=paste("Arithmetic Genetic Trading Returns"),geometric=FALSE)
print(table.Stats(tradingReturns))
cat("Sharpe Ratio")
print(SharpeRatio.annualized(tradingReturns))
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/08/genetic-algo-sharpe-1.14.jpegDigg ThisSubmit to redditShare via email

Is ‘risk’ rewarded in the equity markets?

This post looks to examine if the well known phrase “the higher the risk the higher the reward” applies to the FTSE 100 constituents. Numerous models have tried to capture risk reward metrics, the best known is the Capital Allocation Pricing Model (CAPM). CAPM tries to quantify the return on an investment an investor must receive in order to be adequately compensated for the risk they’ve taken.

The code below calculates the rolling standard deviation of returns, ‘the risk’, for the FTSE 100 constituents. It then groups stocks into quartiles by this risk metric, the groups are updated daily. Quartile 1 is the lowest volatility stocks, quartile 2 the highest. An equally weighted ($ amt) index is created for each quartile. According to the above theory Q4 (high vol) should produce the highest cumulative returns.

When using a 1 month lookback for the stdev calculation there is a clear winning index, the lowest vol index (black). Interestingly the 2nd best index is the highest vol index (blue). The graph above is calculated using arithmetic returns.

When using a longer lookback of 250 days, a trading year, the highest vol index is the best performer and the lowest vol index the worst performer.

For short lookback (30days) low vol index was the best performer

For long lookback (250days) high vol index was the best performer

One possible explanation (untested) is that for a short lookback the volatility risk metric is more sensitive to moves in the stock and hence on a news announcement / earnings the stock has a higher likelihood of moving from it’s current index into a higher vol index. Perhaps it isn’t unreasonable to assume that the high vol index contains only the stocks that have had a recent announcement / temporary volatility and are in a period of consolidation or mean reversion. Or to put it another way for short lookbacks the high vol index doesn’t contain the stocks that are permanently highly vol, whereas for long lookbacks any temporary vol deviations are smoothed out.

Below are the same charts as above but for geometric returns.

On to the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library("zoo")
 
#Script parameters
symbolLst <- c("ADN.L","ADM.L","AGK.L","AMEC.L","AAL.L","ANTO.L","ARM.L","ASHM.L","ABF.L","AZN.L","AV.L","BA.L","BARC.L","BG.L","BLT.L","BP.L","BATS.L","BLND.L","BSY.L","BNZL.L","BRBY.L","CSCG.L","CPI.L","CCL.L","CNA.L","CPG.L","CRH.L","CRDA.L","DGE.L","ENRC.L","EXPN.L","FRES.L","GFS.L","GKN.L","GSK.L","HMSO.L","HL.L","HSBA.L","IAP.L","IMI.L","IMT.L","IHG.L","IAG.L","IPR.L","ITRK.L","ITV.L","JMAT.L","KAZ.L","KGF.L","LAND.L","LGEN.L","LLOY.L","EMG.L","MKS.L","MGGT.L","MRW.L","NG.L","NXT.L","OML.L","PSON.L","PFC.L","PRU.L","RRS.L","RB.L","REL.L","RSL.L","REX.L","RIO.L","RR.L","RBS.L","RDSA.L","RSA.L","SAB.L","SGE.L","SBRY.L","SDR.L","SRP.L","SVT.L","SHP.L","SN.L","SMIN.L","SSE.L","STAN.L","SL.L","TATE.L","TSCO.L","TLW.L","ULVR.L","UU.L","VED.L","VOD.L","WEIR.L","WTB.L","WOS.L","WPP.L","XTA.L")
#Specify dates for downloading data
startDate = as.Date("2000-01-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
clClRet <- new.env()
downloadedSymbols <- list()
for(i in 1:length(symbolLst)){
  #Download one stock at a time
  print(paste(i,"/",length(symbolLst),"Downloading",symbolLst[i]))
  tryCatch({
    getSymbols(symbolLst[i], env = symbolData, src = "yahoo", from = startDate)
     cleanName <- sub("^","",symbolLst[i],fixed=TRUE)
     mktData <- get(cleanName,symbolData)
     print(paste("-Calculating close close returns for:",cleanName))
      ret <-(Cl(mktData)/Lag(Cl(mktData)))-1
      if(max(abs(ret),na.rm=TRUE)>0.5){
      print("-There is a abs(return) > 50% the data is odd lets not use this stock")
      next;
      }
      downloadedSymbols <- c(downloadedSymbols,symbolLst[i])
 
      assign(cleanName,ret,envir = clClRet)
    }, error = function(e) {
    print(paste("Couldn't download: ", symbolLst[i]))
    })
 
 
}
 
 
#Combine all the returns into a zoo object (joins the returns by date)
#Not a big fan of this loop, think it's suboptimal
zooClClRet <- zoo()
for(i in 1:length(downloadedSymbols)){
  cleanName <- sub("^","",downloadedSymbols[i],fixed=TRUE)
  print(paste("Combining the close close returns to the zoo:",cleanName))
  if(length(zooClClRet)==0){
    zooClClRet <- as.zoo(get(cleanName,clClRet))
  } else {
    zooClClRet <- merge(zooClClRet,as.zoo(get(cleanName,clClRet)))
  }
}
print(head(zooClClRet))
 
 
#This will take inzoo or data frame
#And convert each row into quantiles
#Quantile 1 = 0-0.25
#Quantile 2 = 0.25-0.5 etc...
quasiQuantileFunction <- function(dataIn){
    quantileFun <- function(rowIn){
        quant <- quantile(rowIn,na.rm=TRUE)
        #print(quant)
        a <- (rowIn<=quant[5])
        b <- (rowIn<=quant[4])
        c <- (rowIn<=quant[3])
        d <- (rowIn<=quant[2])
        rowIn[a] <- 4
        rowIn[b] <- 3
        rowIn[c] <- 2
        rowIn[d] <- 1
        return(rowIn)
    }
 
  return (apply(dataIn,2,quantileFun))
}
 
avgReturnPerQuantile <- function(returnsData,quantileData){
      q1index <- (clClQuantiles==1)
      q2index <- (clClQuantiles==2)
      q3index <- (clClQuantiles==3)
      q4index <- (clClQuantiles==4)
 
      q1dat <- returnsData
      q1dat[!q1index] <- NaN
      q2dat <- returnsData
      q2dat[!q2index] <- NaN
      q3dat <- returnsData
      q3dat[!q3index] <- NaN
      q4dat <- returnsData
      q4dat[!q4index] <- NaN
 
      avgFunc <- function(x) {
           #apply(x,1,median,na.rm=TRUE) #median is more resistant to outliers
            apply(x,1,mean,na.rm=TRUE)
      }
      res <- returnsData[,1:4] #just to maintain the time series (there must be a better way)
      res[,1] <- avgFunc(q1dat)
      res[,2] <- avgFunc(q2dat)
      res[,3] <- avgFunc(q3dat)
      res[,4] <- avgFunc(q4dat)
 
      colnames(res) <- c("Q1","Q2","Q3","Q4")
      return(res)
}
 
nLookback <- 250 #~1year trading calendar
clClVol <- rollapply(zooClClRet,nLookback,sd,na.rm=TRUE)
clClQuantiles <- quasiQuantileFunction(clClVol)
returnPerVolQuantile <- avgReturnPerQuantile(zooClClRet,clClQuantiles)
colnames(returnPerVolQuantile) <- c("Q1 min vol","Q2","Q3","Q4 max vol")
returnPerVolQuantile[is.nan(returnPerVolQuantile)]<-0 #Assume if there is no return data that it's return is 0
#returnPerVolQuantile[returnPerVolQuantile>0.2] <- 0 #I was having data issues leading to days with 150% returns! This filters them out
cumulativeReturnsByQuantile <- apply(returnPerVolQuantile,2,cumsum)
dev.new()
charts.PerformanceSummary(returnPerVolQuantile,main=paste("Arithmetic Cumulative Returns per Vol Quantile - Lookback=",nLookback),geometric=FALSE)
print(table.Stats(returnPerVolQuantile))
cat("Sharpe Ratio")
print(SharpeRatio.annualized(returnPerVolQuantile))
 
dev.new()
par(oma=c(0,0,2,0))
par(mfrow=c(3,3))
 
for(i in seq(2012,2004,-1)){
print(as.Date(paste(i,"-01-01",sep="")))
print(as.Date(paste(i+1,"-01-01",sep="")))
  windowedData <- window(as.zoo(returnPerVolQuantile),start=as.Date(paste(i,"-01-01",sep="")),end=as.Date(paste(i+1,"-01-01",sep="")))
  chart.CumReturns(windowedData,main=paste("Year",i,"to",i+1),geometric=FALSE)
}
title(main=paste("Arithmetic Cumulative Returns per Vol Quantile - Lookback=",nLookback),outer=T)
 
dev.new()
charts.PerformanceSummary(returnPerVolQuantile,main=paste("Geometric Cumulative Returns per Vol Quantile - Lookback=",nLookback),geometric=TRUE)
print(table.Stats(returnPerVolQuantile))
cat("Sharpe Ratio")
print(SharpeRatio.annualized(returnPerVolQuantile))
 
dev.new()
par(oma=c(0,0,2,0))
par(mfrow=c(3,3))
 
for(i in seq(2012,2004,-1)){
print(as.Date(paste(i,"-01-01",sep="")))
print(as.Date(paste(i+1,"-01-01",sep="")))
  windowedData <- window(as.zoo(returnPerVolQuantile),start=as.Date(paste(i,"-01-01",sep="")),end=as.Date(paste(i+1,"-01-01",sep="")))
  chart.CumReturns(windowedData,main=paste("Year",i,"to",i+1),geometric=TRUE)
}
title(main=paste("Geometric Cumulative Returns per Vol Quantile - Lookback=",nLookback),outer=T)
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/07/arithmetic30-1024x512.pngDigg ThisSubmit to redditShare via email

Analysis of returns after n consecutive up/down days – Predicting the Sign of Open to Close Returns

This weekend I was spammed for a “binary option trading system with 90% accuracy”. The advert caught my curiosity  in essence it detailed a method that was a variation of the well known roulette playing strategy that mathematically guarantees a profit (assuming infinite money, and no table limit).

Roulette Strategy

If you double your bet size after a loss and repeat the same bet you are guaranteed a profit, your next winning will cover all the preceding loses.

e.g Bet $1 on red, lose, Bet $2 on red, Win get $4 back ($2 is your stake, and $1 covers the loss from your first bet) giving $1 profit.

Exponential growth of lot size, no thanks :)

Binary options are analogous to betting on red, they offer virtually fixed odds for up or down directional bets. Naturally I want to know whats is the maximum number of consecutive up or down days in the market, how much pain would I have to suffer with this strategy.

Occurrences of n Consecutive Up or Down Days

Analysing the last 12 years of returns data for the S&P 500, the maximum consecutive number of up days is 9 (occurred in 2004-2005), the maximum consecutive number of down days is -8 which, you guessed it, occurred in 2008-2009.

So 9 days of pain should we always short the direction of the market.

Instead of enduring the 9 days of draw down, it is interesting to see what the consecutive number of up/down days says about the probability the next day is an up day. The maximum likelihood probability of an up day is count(up days)/count(up and down days). Naturally we will condition this data on the consecutive number of up/down days.

Consecutive Up or Down Days vs Maximum Likelihood Probability the next day is up

This data is fairly nice looking, for example in 2012-2013 there is a clear relationship. The more down days in a row the higher the likelihood of an up day. 6 down days implied the probability of an up day is 80%! I must raise a note of caution here, 6 down days in a row was seen less than 5 times in the year. Hence the probability estimate is based on 5 points and not statistically significant, perhaps looking at 5 years of returns might be better.

I appreciate that most people don’t trade binary options can we trade the index/stock out right, it is interesting to see what the consecutive number of up/down days says about future Open to Close Returns. The image below regresses Open to Close Returns (time t) with Consecutive Up/Down days (time t-1).

Consecutive Up or Down Days vs Next Day Open to Close Returns

Very disappointing chart, doesn’t really show much relationship between returns and consecutive up/down days. For some of the data points the up move is more probable than a down move but the magnitude of the up moves are significantly smaller than that of the down moves. These charts vary greatly by asset class and by security, single stocks have much more favorable plots.

Prediction Accuracy

The plot below shows the accuracy of using this maximum likelihood estimate approach. The model takes the last 250 days of returns and calculates the probability of an up move given that the current day has seen n consecutive days of trading. If the prob of an up move or is over a certain threshold go long, if its below a certain threshold go short.

Heatmap of Accuracy vs Model Parameters

 

The histogram on the heatmap shows that approximately half of the parameter combinations can predict the direction with accuracy greater than 50%.

Final Comments

The beauty of this approach is that it’s simple, it can be applied to any asset class but most importantly it can be applied across different time frames.

Onto the code:

?View Code RSPLUS
library("quantmod")
library("reshape")
library("gplots")
 
#Control Parameters
dataStartDate = as.Date("2000-01-01")
symbol<- "^GSPC"
longThreshold <- 0.70  #If the probability of an upday is greater than this limit go long
shortThreshold <- 1-longThreshold  #If the probability of an upday is lower than this limit go short
nLookback<-250 #Days to look back when generating the rolling probability distribution (approx 250 trading days in a year)
 
#Function to turn a boolean vector into a vector containing the consecutive num of trues or falses seen
#Will be used to calculate the consecutive number of up and down days
consecutiveTruesExtractor <- function(data){
        genNumOfConsecutiveTrues <- function(x, y) { (x+y)*y  } #Y is either 0 or 1
        upDaysCount <- Reduce(genNumOfConsecutiveTrues,data,accumulate=TRUE)
        upDaysCount <- as.vector(Lag(upDaysCount))
        upDaysCount[is.na(upDaysCount)] <- 0
 
 
        downDaysCount <- Reduce(genNumOfConsecutiveTrues,!data,accumulate=TRUE)
        downDaysCount <- as.vector(Lag(downDaysCount))
        downDaysCount[is.na(downDaysCount)] <- 0
        consecutiveTruesExtractor <- upDaysCount-downDaysCount
}
 
#Function to plot data and add regression line
doPlot <- function(x,y,title,xlabel,ylabel,ylimit){
  x<-as.vector(x)
  y<-as.vector(y)
  boxplot(y~x,main=title, xlab=xlabel,ylab=ylabel,ylim=ylimit)
  abline(lm(y~x),col="red",lwd=1.5)
}
 
#Function to calculate the percentage of updays from returns set
calcPercentageOfUpdaysFromReturnsSet <- function(data){
  calcPercentageOfUpdaysFromReturnsSet <- sum(data>0)/length(data)
}
 
#Function takes a set of returns and consecutive up down day data and aggregates it into a probability distribution
#Generated a matrix of consecutive Direction vs prob of up move
generateProbOfUpDayDistribution <- function(dataBlock){
 
 y <- as.matrix(by(dataBlock[,"OpClRet"],list(ConsecutiveDir = dataBlock[,"ConsecutiveDir"]),calcPercentageOfUpdaysFromReturnsSet)) #Prob of upmove
 x <- as.matrix(as.numeric(as.matrix(rownames(y))))
 
 res <- cbind(x,y)
 colnames(res) <- c("ConsecutiveDir","Prob")
 generateProbOfUpDayDistribution <- res
}
 
#Given current consecutive up down day data, what is the probability the current day is an up day
#For use with the rollapply function (since needs to use the past n days worth of data for generating probability distribution)
probOfUpDayForUseWithRollApply <- function(dataBlock){
  dist <- generateProbOfUpDayDistribution(head(dataBlock,-1)) #Use head to drop the last row, prevents a lookforward issue
 
  currentConsecutiveRun <- last(dataBlock[,"ConsecutiveDir"])
  probOfUpDay <- dist[dist[,"ConsecutiveDir"] == rep(coredata(currentConsecutiveRun), length(dist[,"ConsecutiveDir"])),"Prob"]
  if(!is.numeric(probOfUpDay)) {probOfUpDay <- 0.5 } #Never this many consecutive days before, dont know what will happen make up and down events equally likely
  #print(paste("Current Run:",coredata(currentConsecutiveRun),"Prob of up day:",probOfUpDay ))
  probOfUpDayForUseWithRollApply <- probOfUpDay
}
 
#Just a quick test to check that the consecutiveTruesExtractor is working as expected
#Define the input data, and define the expected output
#Check that the output of the function equals the expected output
data <-           c(0, 0, 0, 1,0, 1,1,0,0)  #0 is down day, 1 is up day
expectedOutput <- c(0,-1,-2,-3,1,-1,1,2,-1)
res <- consecutiveTruesExtractor(data)
if( identical(res,expectedOutput)){
  print("Match consecutiveTruesExtractor is correct")
} else {
  print("Error consecutiveTruesExtractor contains bugs")
}
 
 
 
 
#Download the data
symbolData <- new.env() #Make a new environment for quantmod to store data in
getSymbols(symbol, env = symbolData, src = "yahoo", from = dataStartDate)
mktdata <- eval(parse(text=paste("symbolData$",sub("^","",symbol,fixed=TRUE))))
opClRet <- (Cl(mktdata)/Op(mktdata))-1
consecutiveDir <- consecutiveTruesExtractor(as.matrix(opClRet>0))
completeData<- cbind(opClRet,consecutiveDir)
colnames(completeData) <- c("OpClRet","ConsecutiveDir")
 
 
#Plot of consecutive up down days vs next day Open Close Returns
dev.new()
par(oma=c(0,0,2,0))
par(mfrow=c(3,3))
 
for(i in seq(2012,2004,-1)){
  windowedData <- window(completeData,start=as.Date(paste(i,"-01-01",sep="")),end=as.Date(paste(i+1,"-01-01",sep="")))
  doPlot(windowedData$ConsecutiveDir,windowedData$OpClRet,paste("Consecutive Up / Down days vs Open close Return (",i,",",i+1,")"),"Consecutive Up or Down days","Open Close Returns",c(-0.07,0.07))
}
title(main=paste("Symbol",symbol),outer=T)
 
#Plot of consecutive up down days vs the maximum likelihood probability that the next day is an up day
dev.new()
par(oma=c(0,0,2,0))
par(mfrow=c(3,3))
 
for(i in seq(2012,2004,-1)){
  windowedData <- window(completeData,start=as.Date(paste(i,"-01-01",sep="")),end=as.Date(paste(i+1,"-01-01",sep="")))
  y <-as.matrix(by(as.vector(windowedData$OpClRet),list(ConsecutiveDir = windowedData$ConsecutiveDir),calcPercentageOfUpdaysFromReturnsSet)) #Prob of upmove
  x <- as.matrix(as.numeric(as.matrix(rownames(y)))) #Consecutive up or down days
  plot(cbind(x,y),main=paste("Consecutive Up / Down days vs Next day Dir (",i,",",i+1,")"), xlab="Consecutive Up or Down days",ylab="Conditional Probability of an Up day")
  abline(lm(y~x),col="red",lwd=1.5)
}
title(main=paste("Symbol",symbol),outer=T)
 
#Plot of consecutive up down days vs the number of occurences seen
dev.new()
par(oma=c(0,0,2,0))
par(mfrow=c(3,3))
 
for(i in seq(2012,2004,-1)){
  windowedData <- window(completeData,start=as.Date(paste(i,"-01-01",sep="")),end=as.Date(paste(i+1,"-01-01",sep="")))
  y <-abs(as.matrix(by(windowedData$ConsecutiveDir,list(ConsecutiveDir = windowedData$ConsecutiveDir),sum))) #Count the number of occurences of each consecutive run
  x <- as.matrix(as.numeric(as.matrix(rownames(y))))
  plot(y,xaxt="n",main=paste("Consecutive Up / Down days vs Next day Dir (",i,",",i+1,")"), xlab="Consecutive Up or Down days",ylab="Occurences of consecutive run",type="l")
  axis(1, at=1:length(x),labels=x)
}
title(main=paste("Symbol",symbol),outer=T)
 
predictionPerformance <- function(completeData,longThreshold,shortThreshold,nLookback,displayPlot){
    #Calcuate the probabiliy of an up day using a nLookback day window
    rollingProbOfAnUpday <- rollapply(completeData,FUN=probOfUpDayForUseWithRollApply,align="right",fill=NA,width=nLookback,by.column=FALSE)
    rollingProbOfAnUpday <- as.matrix(rollingProbOfAnUpday)
    print(head(rollingProbOfAnUpday))
    colnames(rollingProbOfAnUpday) <- "ProbTodayIsAnUpDay"
    completeData <- cbind(completeData,rollingProbOfAnUpday)
 
    suggestedTradeDir <- rollingProbOfAnUpday #Just to copy the structure
    colnames(suggestedTradeDir) <- "SuggestedTradeDir"
    suggestedTradeDir[rollingProbOfAnUpday>longThreshold] <- 1  #Long Trade
    suggestedTradeDir[rollingProbOfAnUpday<shortThreshold] <- -1 #Short Trade
    suggestedTradeDir[rollingProbOfAnUpday<longThreshold & rollingProbOfAnUpday>shortThreshold] <- 0 #Do nothing
    completeData <- cbind(completeData,suggestedTradeDir)
 
    isPredictionCorrect <- suggestedTradeDir #Just to copy structure
    isPredictionCorrect <- sign(completeData$SuggestedTradeDir * completeData$OpClRet) #sign(0) is 0 so will capture no trades as well
    isPredictionCorrect[is.na(isPredictionCorrect)] <- 0
    isPredictionCorrect[is.nan(isPredictionCorrect)] <- 0
    if(displayPlot){
      dev.new()
      plot(cumsum(isPredictionCorrect), main=paste("Market Direction Prediction Performance for",symbol,"(Probability Threshold, Long=",longThreshold,"Short=",shortThreshold,"Lookback=",nLookback,")"),xlab="Date",ylab="Cumulative Sum of Correct (+1) and Wrong(-1) Predictions")
      msgIncorrectPred <- (paste("Incorrect Predictions (out of the days when a prediction was made)",100*abs(sum(isPredictionCorrect[isPredictionCorrect==-1]))/length(isPredictionCorrect[isPredictionCorrect!=0]),"%"))
      msgCorrectPred <- (paste("Correct Predictions (out of the days when a prediction was made)",100*sum(isPredictionCorrect[isPredictionCorrect==1])/length(isPredictionCorrect[isPredictionCorrect!=0]),"%"))
      msgPercOfDaysWithPred <- (paste("Percent of days when a prediction was made",100*sum(abs(isPredictionCorrect[isPredictionCorrect!=0]))/length(isPredictionCorrect),"%"))
      legend("topleft",c(msgIncorrectPred,msgCorrectPred,msgPercOfDaysWithPred),bg="lightblue")
    }
    predictionPerformance <- sum(isPredictionCorrect[isPredictionCorrect==1])/length(isPredictionCorrect[isPredictionCorrect!=0])
}
 
 
predictionPerformance(completeData,longThreshold,shortThreshold,nLookback,TRUE)
 
#Parameter search
resultsMat <- matrix(numeric(0), 0,3)
colnames(resultsMat) <- c("Lookback","LongProbThreshold","Accuracy")
for(nLookback in seq(30,240,30)){
  for(longThreshold in seq(0.55,1,0.05)){
    shortThreshold <- 1-longThreshold
    accuracy <- predictionPerformance(completeData,longThreshold,shortThreshold,nLookback*2,FALSE)
    resultsMat <- rbind(resultsMat,as.matrix(cbind(nLookback,longThreshold,accuracy)))
    print(resultsMat)
  }
}
print(resultsMat)
tt <-(as.matrix(as.data.frame(cast(as.data.frame(resultsMat),Lookback~LongProbThreshold))))
rownames(tt)<-tt[,"Lookback"]
heatmap.2(tt[,-1],key=TRUE,Rowv=FALSE,Colv=FALSE,xlab="Prob of Up Day Threshold",ylab="Lookback",trace="none",main=paste("Prediction Accuracy (correct predictions as % of all predictions) for ",symbol))

 

Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-includes/images/smilies/icon_smile.gifDigg ThisSubmit to redditShare via email

Skewness Revisited

In my last post http://gekkoquant.com/2013/03/03/trend-following-skewness-signal/ I suggested that measuring the skewness of asset returns can possibly be used to identify trends. Or mathematically more accurately put, it can identify when the return distribution is symmetric and hence NOT in a trending environment (assuming returns are Gaussian with 0 mean).

This post presents a simple regression of Skewness vs Asset returns. The skewness is calculated at time t using OpenCloseReturns[t-1,t-2,.....,t-lookback]. It is then regressed against OpenCloseReturn[t]. Where t donates today, and t-1 donates yesterday.

The data set is 2000 to present.

The red line in the above plot is a linear regression. It’s clear to see that the skewness doesn’t explain the Open Close Returns. I must eat humble pie, the returns seen in the previous post are down to a lucky parameter selection. Thanks must go to Pietro for testing over a longer period.

Onto the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library(e1071)     #For the skewness command
 
 
#Script parameters
symbol <- "^GSPC"     #Symbol
 
#Specify dates for downloading data
startDate = as.Date("2000-01-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
getSymbols(symbol, env = symbolData, src = "yahoo", from = startDate)
mktdata <- eval(parse(text=paste("symbolData$",sub("^","",symbol,fixed=TRUE))))
mktdata <- head(mktdata,-1) #Hack to fix some stupid duplicate date problem with yahoo
OpClRet  <- (Cl(mktdata)/Op(mktdata))    - 1
colnames(OpClRet) <- "OpClRet"
mktdata <- cbind(mktdata,OpClRet)
 
 
getSkewnessFromReturns <- function(mktdata,skewLookback){
   #Calculate the skew
  rollingSkew <- Lag(rollapply(mktdata$OpClRet ,FUN="skewness",width=skewLookback, align="right",na.pad=TRUE),1)
   colnames(rollingSkew) <- "Skew"
  getSkewnessFromReturns <- na.omit(cbind(mktdata$OpClRet,rollingSkew))
}
 
 
 
dev.new()
par(mfrow=c(3,3))
startSkew <-30
stepSize <- 30
numOfSteps <- 9
 
doPlot <- function(x,y,title,xlabel,ylabel){
  #Function to plot data and add regression line
  plotData <-cbind(x,y)
  print(head(plotData))
  colnames(plotData) <-c("x","y")
  plot(coredata(plotData),type="p",main=title, xlab=xlabel,ylab=ylabel)
  abline(lm(y~x),col="red",lwd=1.5)
}
 
 
for(i in seq(startSkew,numOfSteps*stepSize,stepSize)){
dat <- getSkewnessFromReturns(mktdata,i)
colnames(dat) <- c("OpClRet","Skew")
doPlot(dat$Skew,dat$OpClRet,paste("Skewness vs Open Close Returns (lookback=",i,")"),"Skewness","Open Close Returns")
}

 

Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/03/skewness-returns-regression.jpegDigg ThisSubmit to redditShare via email

$20mm seed capital tournament, “The Sharpe Ratio Shootout”

Hi, I’ve been asked to advertise this seed capital tournament. I get asked to advertise all sorts of things but this looks very promising for beginner quant traders to launch their careers.

Looking forwards to competing against some of you :)

Announcing: $20mm seed capital tournament, “The Sharpe Ratio Shootout” Registration ends March 15th, 2013.

Battle-Fin is launching its 4th systematic trading tournament.  Tournament Sponsors have pledged up to $20mm in allocations.

Battle-Fin uses cutting edge real-time tournaments to democratically identify tomorrow’s best trading strategies across asset classes.

The idea is to harness the power of the internet to identify trading strategies.

If you are a quantitative trader with a successful investment strategy, register for our next tournament by March 15th, 2013. There is no cost to enter and we do not ask to own trader IP.

REGISTER AT: www.battlefin.com

You can also contact us at (212) 201-5376 or info@battlefin.com

BATTE-FIN IN THE NEWS

Business Week recently wrote a feature article on Battle-Fin highlighting the 4 winners that are currently trading their allocations.
Click: http://www.businessweek.com/articles/2012-12-20/the-hedge-fund-hunger-games

Here is a recent Bloomberg TV interview on Market Makers with Battle Fin.

http://bloom.bg/XFIluz
Like us on Facebook at: http://www.facebook.com/BattleFin

Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-includes/images/smilies/icon_smile.gifDigg ThisSubmit to redditShare via email

Trend Following – Skewness Signal

Oxford Dictionary Definition of a trend

  • noun: a general direction in which something is developing or changing
Often people try to capture a trend using technical indicators such as moving average cross overs. If there is a trend this technique is generally good at capturing it. However if there isn’t a trend it becomes difficult to identify side ways markets with moving averages (see http://gekkoquant.com/2012/08/29/parameter-optimisation-backtesting-part-2/ moving averages performing excellently well during 2009 when there was a real trend).

Is there a better way to identify a trend? Potentially. A common modelling assumption in finance is to say that stock returns follow Brownian motion and that over short periods the mean return is 0, or in other words daily returns are Gaussian distributed with 0 mean.

This post will use the same assumption, asset returns are Guassian and hence returns are symmetrically distributed around their mean (assume 0 mean). Symmetric distributions have a skewness of 0, any deviation from 0 indicates that one of the tails is beginning to get bigger and possibly that the stock is trending.

Skewness is a measure of assymetery for a probability distribution, the skewness tells us the amount and direction of the skew. This strategy will take a rolling distribution of returns and calculate the skew for those returns. The skew will then be used to take trades.

Strategy:
  • If skewness > UptrendSkewLimit then go Long
  • If skewness < DowntrendSkewLimit then go Short
  • If skewness between UptrendSkewLimit and DowntrendSkewLimit then returns are symmetrical, it’s a sideways market do nothing
 

 

Trend Following - Annualized Sharpe Ratio (Rf=0%) 0.7162438

S&P500 Long Open Close Returns - Annualized Sharpe Ratio (Rf=0%) 0.2129997

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
library(e1071)     #For the skewness command
 
#Model paramters
nLookback <- 30 #When calculating the rolling skew use the nLookback number of days
UptrendSkewLimit <- 0.3 #If the rolling skew is greater than this value go long
DowntrendSkewLimit <- -0.5 #If the rolling skew is lower than this value go short
#If the rolling skew is between the two limits do nothing, the skew is too weak to indicate a trend
 
#Script parameters
symbol <- "^GSPC"     #Symbol
 
#Specify dates for downloading data
startDate = as.Date("2005-01-01") #Specify what date to get the prices from
symbolData <- new.env() #Make a new environment for quantmod to store data in
getSymbols(symbol, env = symbolData, src = "yahoo", from = startDate)
mktdata <- eval(parse(text=paste("symbolData$",sub("^","",symbol,fixed=TRUE))))
mktdata <- head(mktdata,-1) #Hack to fix some stupid duplicate date problem with yahoo
dayOpClRet <- Cl(mktdata)/Op(mktdata)    - 1
cat("About to calculate the rolling skew")
 
#Lets calculate the rolling skew
#Lag the rolling skew by one day so the skew measured at the close of day T is sifted to day T+1
#The skew will be used to determine the trade at the open
rollingSkew <- Lag(rollapply(dayOpClRet,FUN="skewness",width=nLookback, align="right"),1)
 
#Possible improvement - Do a exponential moving average on the skew signal to smooth it
#rollingSkew <- EMA(rollingSkew,n=nLookback)
 
longSignals <- (rollingSkew>UptrendSkewLimit)
longReturns <- longSignals*dayOpClRet
shortSignals <- (rollingSkew<DowntrendSkewLimit)
shortReturns <- -1*shortSignals*dayOpClRet
totalReturns <- longReturns + shortReturns
#Uncomment the line below to increase the position size for larger skews
#totalReturns <- totalReturns * (abs(rollingSkew)+1)
totalReturns[is.na(totalReturns)] <- 0
 
GEKKORed <- rgb(255/255, 0/255, 0/255, 0.1)
GEKKOGreen <- rgb(0/255, 255/255, 0/255, 0.1)
 
dev.new()
par(mfrow=c(3,1))
plot(Cl(mktdata), main="Close of S&P 500")
lines(longSignals*max(Cl(mktdata)), col=(GEKKOGreen),type="h")
lines(shortSignals*max(Cl(mktdata)), col=(GEKKORed),type="h")
plot(rollingSkew)
abline(UptrendSkewLimit,0,col="green")
abline(DowntrendSkewLimit,0,col="red")
plot(cumsum(totalReturns), main="Cumulative Returns - Trend Following")
 
 
#### Performance Analysis ###
colnames(dayOpClRet) <- "Long IndexOpCloseRet"
zooTradeVec <- cbind(as.zoo(totalReturns),as.zoo(dayOpClRet)) #Convert to zoo object
colnames(zooTradeVec) <- c("Trend Following","S&P500 Long Open Close Returns")
zooTradeVec <- na.omit(zooTradeVec)
#Lets see how all the strategies faired against the index
dev.new()
charts.PerformanceSummary(zooTradeVec,main="Performance of Trend Following",geometric=FALSE)
 
#Calculate the sharpe ratio
cat("Sharpe Ratio")
print(SharpeRatio.annualized(zooTradeVec))
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/02/trend-following-trade-signals.jpegDigg ThisSubmit to redditShare via email

Statistical Arbitrage – Trading a cointegrated pair

In my last post http://gekkoquant.com/2012/12/17/statistical-arbitrage-testing-for-cointegration-augmented-dicky-fuller/ I demonstrated cointegration, a mathematical test to identify stationary pairs where the spread by definition must be mean reverting.

In this post I intend to show how to trade a cointegrated pair and will continue analysing Royal Dutch Shell A vs B shares (we know they’re cointegrated from my last post). Trading a cointegrated pair is straight forward, we know the mean and variance of the spread, we know that those values are constant. The entry point for a stat arb is to simply look for a large deviation away from the mean.

A basic strategy is:

  • If spread(t) >= Mean Spread + 2*Standard Deviation then go Short
  • If spread(t) <= Mean Spread – 2*Standard Deviation then go Long
There are many variations of this strategy
Moving average / moving standard deviation (this will be explored later):
  • If spread(t) >= nDay Moving Average + 2*nDay Rolling Standard deviation then go Short
  • If spread(t) <= nDay Moving Average – 2*nDay Rolling Standard deviation then go long
Wait for mean reversion:
  • If spread(t) <= Mean Spread + 2*Std AND spread(t-1)> Mean Spread + 2*Std
  • If spread(t) >= Mean Spread – 2*Std AND spread(t-1)< Mean Spread – 2*Std
  • Advantage is that we only trade when we see the mean reversion, where as the other models are hoping for mean reversion on a large deviation from the mean (is the spread blowing up?)
All the above strategies look to exit their position when the spread has reverted to the mean. Personally I wouldn’t trade any of the above as they don’t specify an exit strategy for adverse trades. Ie if there is a 6 standard deviation move in the spread is this an amazing trade opportunity? OR more likely did the spread just blow up.

This post will look at the moving average and rolling standard deviation model for Royal Dutch Shell A vs B shares, it will use the hedge ratio found in the last post.

Sharpe Ratio Shell A & B Stat Arb Shell A
Annualized Sharpe Ratio (Rf=0%):

Shell A&B Stat Arb 0.8224211

Shell A 0.166307

The stat arb has a Superior Sharpe ratio over simply investing in Shell A. At a first glance the sharpe ratio of 0.8 looks disappointing, however since the strategy spends most of it’s time out of the market it will have a low annualized sharpe ratio. To increase the sharpe ratio one can look at trading higher frequencies or have a portfolio pairs so that more time is spent in the market.

Onto the code:

?View Code RSPLUS
library("quantmod")
library("PerformanceAnalytics")
 
 
 
backtestStartDate = as.Date("2010-01-02") #Starting date for the backtest
 
symbolLst<-c("RDS-A","RDS-B")
title<-c("Royal Dutch Shell A vs B Shares")
 
### SECTION 1 - Download Data & Calculate Returns ###
#Download the data
symbolData <- new.env() #Make a new environment for quantmod to store data in
getSymbols(symbolLst, env = symbolData, src = "yahoo", from = backtestStartDate)
 
#We know this pair is cointegrated from the tutorial
#http://gekkoquant.com/2012/12/17/statistical-arbitrage-testing-for-cointegration-augmented-dicky-fuller/
#The tutorial found the hedge ratio to be 0.9653
stockPair <- list(
 a = coredata(Cl(eval(parse(text=paste("symbolData$\"",symbolLst[1],"\"",sep="")))))   #Stock A
,b = coredata(Cl(eval(parse(text=paste("symbolData$\"",symbolLst[2],"\"",sep=""))))) #Stock B
,hedgeRatio = 0.9653
,name=title)
 
simulateTrading <- function(stockPair){
#Generate the spread
spread <- stockPair$a - stockPair$hedgeRatio*stockPair$b
 
#Strategy is if the spread is greater than +/- nStd standard deviations of it's rolling 'lookback' day standard deviation
#Then go long or short accordingly
lookback <- 90 #look back 90 days
nStd <- 1.5 #Number of standard deviations from the mean to trigger a trade
 
movingAvg = rollmean(spread,lookback, na.pad=TRUE) #Moving average
movingStd = rollapply(spread,lookback,sd,align="right", na.pad=TRUE) #Moving standard deviation / bollinger bands
 
upperThreshold = movingAvg + nStd*movingStd
lowerThreshold = movingAvg - nStd*movingStd
 
aboveUpperBand <- spread>upperThreshold
belowLowerBand <- spread<lowerThreshold
 
aboveMAvg <- spread>movingAvg
belowMAvg <- spread<movingAvg
 
aboveUpperBand[is.na(aboveUpperBand)]<-0
belowLowerBand[is.na(belowLowerBand)]<-0
aboveMAvg[is.na(aboveMAvg)]<-0
belowMAvg[is.na(belowMAvg)]<-0
 
#The cappedCumSum function is where the magic happens
#Its a nice trick to avoid writing a while loop
#Hence since using vectorisation is faster than the while loop
#The function basically does a cumulative sum, but caps the sum to a min and max value
#It's used so that if we get many 'short sell triggers' it will only execute a maximum of 1 position
#Short position - Go short if spread is above upper threshold and go long if below the moving avg
#Note: shortPositionFunc only lets us GO short or close the position
cappedCumSum <- function(x, y,max_value,min_value) max(min(x + y, max_value), min_value)
shortPositionFunc <- function(x,y) { cappedCumSum(x,y,0,-1) }
longPositionFunc <- function(x,y) { cappedCumSum(x,y,1,0) }
shortPositions <- Reduce(shortPositionFunc,-1*aboveUpperBand+belowMAvg,accumulate=TRUE)
longPositions <- Reduce(longPositionFunc,-1*aboveMAvg+belowLowerBand,accumulate=TRUE)
positions = longPositions + shortPositions
 
dev.new()
par(mfrow=c(2,1))
plot(movingAvg,col="red",ylab="Spread",type='l',lty=2)
title("Shell A vs B spread with bollinger bands")
lines(upperThreshold, col="red")
lines(lowerThreshold, col="red")
lines(spread, col="blue")
legend("topright", legend=c("Spread","Moving Average","Upper Band","Lower Band"), inset = .02,
lty=c(1,2,1,1),col=c("blue","red","red","red")) # gives the legend lines the correct color and width
 
plot((positions),type='l')
 
#Calculate spread daily ret
 stockPair$a - stockPair$hedgeRatio*stockPair$b
aRet <- Delt(stockPair$a,k=1,type="arithmetic")
bRet <- Delt(stockPair$b,k=1,type="arithmetic")
dailyRet <- aRet - stockPair$hedgeRatio*bRet
dailyRet[is.na(dailyRet)] <- 0
 
tradingRet <- dailyRet * positions
    simulateTrading <- tradingRet
}
 
tradingRet <- simulateTrading(stockPair)
 
 
#### Performance Analysis ###
#Calculate returns for the index
indexRet <- Delt(Cl(eval(parse(text=paste("symbolData$\"",symbolLst[1],"\"",sep="")))),k=1,type="arithmetic") #Daily returns
indexRet <- as.zoo(indexRet)
tradingRetZoo <- indexRet
tradingRetZoo[,1] <- tradingRet
zooTradeVec <- as.zoo(cbind(tradingRetZoo,indexRet)) #Convert to zoo object
colnames(zooTradeVec) <- c("Shell A & B Stat Arb","Shell A")
zooTradeVec <- na.omit(zooTradeVec)
 
#Lets see how all the strategies faired against the index
dev.new()
charts.PerformanceSummary(zooTradeVec,main="Performance of Shell Statarb Strategy",geometric=FALSE)
 
cat("Sharpe Ratio")
print(SharpeRatio.annualized(zooTradeVec))
Share on FacebookShare on TwitterSubmit to StumbleUponhttp://gekkoquant.com/wp-content/uploads/2013/01/shellAvsB-bollinger-bands.jpegDigg ThisSubmit to redditShare via email