/sscheck

ScalaCheck for Spark

Primary LanguageScalaApache License 2.0Apache-2.0

sscheck

Utilities for using ScalaCheck with Spark and Spark Streaming, based on Specs2

Build Status

Use linear temporal logic to write ScalaCheck properties for Spark Streaming programs, like this one:

def checkExtractBannedUsersList(testSubject : DStream[(UserId, Boolean)] => DStream[UserId]) = {
    val batchSize = 20 
    val (headTimeout, tailTimeout, nestedTimeout) = (10, 10, 5) 
    val (badId, ids) = (15L, Gen.choose(1L, 50L))   
    val goodBatch = BatchGen.ofN(batchSize, ids.map((_, true)))
    val badBatch = goodBatch + BatchGen.ofN(1, (badId, false))
    val gen = BatchGen.until(goodBatch, badBatch, headTimeout) ++ 
               BatchGen.always(Gen.oneOf(goodBatch, badBatch), tailTimeout)
    
    type U = (RDD[(UserId, Boolean)], RDD[UserId])
    val (inBatch, outBatch) = ((_ : U)._1, (_ : U)._2)
    
    val formula : Formula[U] = {
      val badInput : Formula[U] = at(inBatch)(_ should existsRecord(_ == (badId, false)))
      val allGoodInputs : Formula[U] = at(inBatch)(_ should foreachRecord(_._2 == true))
      val badIdBanned : Formula[U] = at(outBatch)(_ should existsRecord(_ == badId))
      
      ( allGoodInputs until badIdBanned on headTimeout ) and
      ( always { badInput ==> (always(badIdBanned) during nestedTimeout) } during tailTimeout )  
    }  
    
    DStreamProp.forAll(gen)(testSubject)(formula)
  }

See the Quickstart for more details on the temporal logic, and for using this library with Spark core.

Acknowledgements

This work has been partially supported by MICINN Spanish project StrongSoft (TIN2012-39391-C04-04), by the Spanish MINECO project CAVI-ART (TIN2013-44742-C4-3-R), and by the Comunidad de Madrid project N-Greens Software-CM (S2013/ICE-2731).

Some parts of this code are based on Spark Testing Base by Holden Karau