toPartitionedTextFile is creating too many partitions
raronson opened this issue · 0 comments
raronson commented
test case:
import org.specs2._
import com.nicta.scoobi.Scoobi._
import com.nicta.scoobi.testing.mutable._
import com.nicta.scoobi.testing.TestFiles._
import org.specs2.matcher.FileMatchers
import com.nicta.scoobi.testing.{TempFiles, SimpleJobs}
import java.io.File
class Testcase extends HadoopSpecification with SimpleJobs with FileMatchers {
override def isCluster = false
"Too many partitions get created" >> { implicit sc: ScoobiConfiguration =>
val base = path(TempFiles.createTempDir("tmp").getPath).pp
val partitions = base + "/partitions"
val dlist = DList(("a/b/c", 1), ("a/b/c", 2), ("a/b/c", 3))
dlist.toPartitionedTextFile(partitions, identity).persist
(new File(partitions)).list.toList must_== List("a")
}
}
The following directory structure was created:
/var/folders/6h/963cvjm52bq3s809wmz38d300000gp/T/tmp3625713493852866692
└── partitions
├── 5
│ └── a
│ └── b
│ └── c
├── a
│ └── b
│ └── c
│ └── ch4out5-m-00001
└── var
└── folders
└── 6h
└── 963cvjm52bq3s809wmz38d300000gp
└── T
└── tmp3625713493852866692
└── partitions
└── _SUCCESS