Strange issue in IfRuntimeIterator
ingomueller-net opened this issue · 2 comments
ingomueller-net commented
I get a strange issue in the following query:
import module namespace hep = "../../common/hep.jq";
declare variable $input-path as anyURI external := anyURI("../../../data/Run2012B_SingleMu-1000.parquet");
declare function sinh($x) {
(exp($x) - exp(-$x)) div 2.0
};
declare function cosh($x) {
(exp($x) + exp(-$x)) div 2.0
};
declare function histogram($values, $lo, $hi, $num-bins) {
let $width := ($hi - $lo) div $num-bins
let $half-width := $width div 2
let $underflow := round(($lo - $half-width) div $width)
let $overflow := round(($hi - $half-width) div $width)
for $v in $values
let $bucket-idx :=
if ($v < $lo) then $underflow
else
if ($v > $hi) then $overflow
else round(($v - $half-width) div $width)
let $center := $bucket-idx * $width + $half-width
group by $center
order by $center
return {"x": $center, "y": count($v)}
};
declare function concat-leptons($event) {
let $muons := (
for $muon in $event.muons[]
return {| $muon, {"type": "m"} |}
)
let $electrons := (
for $electron in $event.electrons[]
return {| $electron, {"type": "e"} |}
)
return ($muons, $electrons)
};
declare function RhoZ-to-eta($rho, $z) {
let $temp := $z div $rho
return log($temp + sqrt($temp * $temp + 1.0))
};
declare function PtEtaPhiM-to-PxPyPzE($vect) {
let $x := $vect.pt * cos($vect.phi)
let $y := $vect.pt * sin($vect.phi)
let $z := $vect.pt * sinh($vect.eta)
let $temp := $vect.pt * cosh($vect.eta)
let $e := sqrt($temp * $temp + $vect.mass * $vect.mass)
return {"x": $x, "y": $y, "z": $z, "e": $e}
};
declare function add-PxPyPzE($particle1, $particle2) {
let $x := $particle1.x + $particle2.x
let $y := $particle1.y + $particle2.y
let $z := $particle1.z + $particle2.z
let $e := $particle1.e + $particle2.e
return {"x": $x, "y": $y, "z": $z, "e": $e}
};
declare function PxPyPzE-to-PtEtaPhiM($particle) {
let $x2 := $particle.x * $particle.x
let $y2 := $particle.y * $particle.y
let $z2 := $particle.z * $particle.z
let $e2 := $particle.e * $particle.e
let $pt := sqrt($x2 + $y2)
let $eta := RhoZ-to-eta($pt, $particle.z)
let $phi := if ($particle.x = 0.0 and $particle.y = 0.0)
then 0.0
else atan2($particle.y, $particle.x)
let $mass := sqrt($e2 - $z2 - $y2 - $x2)
return {"pt": $pt, "eta": $eta, "phi": $phi, "mass": $mass}
};
declare function add-PtEtaPhiM($particle1, $particle2) {
PxPyPzE-to-PtEtaPhiM(
add-PxPyPzE(
PtEtaPhiM-to-PxPyPzE($particle1),
PtEtaPhiM-to-PxPyPzE($particle2)
)
)
};
declare function make-muons($event) {
for $i in (1 to size($event.Muon_pt))
return {
"pt": $event.Muon_pt[[$i]],
"eta": $event.Muon_eta[[$i]],
"phi": $event.Muon_phi[[$i]],
"mass": $event.Muon_mass[[$i]],
"charge": $event.Muon_charge[[$i]],
"pfRelIso03_all": $event.Muon_pfRelIso03_all[[$i]],
"pfRelIso04_all": $event.Muon_pfRelIso04_all[[$i]],
"tightId": $event.Muon_tightId[[$i]],
"softId": $event.Muon_softId[[$i]],
"dxy": $event.Muon_dxy[[$i]],
"dxyErr": $event.Muon_dxyErr[[$i]],
"dz": $event.Muon_dz[[$i]],
"dzErr": $event.Muon_dzErr[[$i]],
"jetIdx": $event.Muon_jetIdx[[$i]],
"genPartIdx": $event.Muon_genPartIdx[[$i]]
}
};
declare function make-electrons($event) {
for $i in (1 to size($event.Electron_pt))
return {
"pt": $event.Electron_pt[[$i]],
"eta": $event.Electron_eta[[$i]],
"phi": $event.Electron_phi[[$i]],
"mass": $event.Electron_mass[[$i]],
"charge": $event.Electron_charge[[$i]],
"pfRelIso03_all": $event.Electron_pfRelIso03_all[[$i]],
"dxy": $event.Electron_dxy[[$i]],
"dxyErr": $event.Electron_dxyErr[[$i]],
"dz": $event.Electron_dz[[$i]],
"dzErr": $event.Electron_dzErr[[$i]],
"cutBasedId": $event.Electron_cutBasedId[[$i]],
"pfId": $event.Electron_pfId[[$i]],
"jetIdx": $event.Electron_jetIdx[[$i]],
"genPartIdx": $event.Electron_genPartIdx[[$i]]
}
};
declare function make-jets($event) {
for $i in (1 to size($event.Jet_pt))
return {
"pt": $event.Jet_pt[[$i]],
"eta": $event.Jet_eta[[$i]],
"phi": $event.Jet_phi[[$i]],
"mass": $event.Jet_mass[[$i]],
"puId": $event.Jet_puId[[$i]],
"btag": $event.Jet_btag[[$i]]
}
};
declare function restructure-event($event) {
let $muons := make-muons($event)
let $electrons := make-electrons($event)
let $jets := make-jets($event)
return {| $event,
{
"muons": [ $muons ],
"electrons": [ $electrons ],
"jets": [ $jets ]
}
|}
};
declare function restructure-data($data) {
for $event in $data
return restructure-event($event)
};
declare function restructure-data-parquet($path) {
for $event in parquet-file($path)
return restructure-event($event)
};
let $filtered := (
for $event in restructure-data-parquet($input-path)
count $c
where $c eq 3
where integer($event.nMuon + $event.nElectron) > 2
let $leptons := concat-leptons($event)
let $closest-lepton-pair := (
for $lepton1 at $i in $leptons
for $lepton2 at $j in $leptons
where $i < $j
where $lepton1.type = $lepton2.type and $lepton1.charge != $lepton2.charge
order by abs(91.2 - add-PtEtaPhiM($lepton1, $lepton2).mass) ascending
return {"i": $i, "j": $j}
)[1]
where exists($closest-lepton-pair)
return max(
for $lepton at $i in $leptons
where $i != $closest-lepton-pair.i and $i != $closest-lepton-pair.j
return $lepton.pt
)
)
return histogram($filtered, 15, 60, 100)
To reproduce, you may need to adapt the path of the input file to point to this file.
The error I get is the following one:
21/02/16 13:16:04 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
log4j:WARN No appenders could be found for logger (org.apache.hadoop.security.UserGroupInformation).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
An error has occured: null
We should investigate this 🙈. Please contact us or file an issue on GitHub with your query.
Link: https://github.com/RumbleDB/rumble/issues
For more debug info (e.g., so you can communicate it to us), please try again using --show-error-info yes in your command line.
java.lang.NullPointerException
at org.rumbledb.runtime.control.IfRuntimeIterator.closeLocal(IfRuntimeIterator.java:105)
at org.rumbledb.runtime.HybridRuntimeIterator.close(HybridRuntimeIterator.java:84)
at org.rumbledb.runtime.RuntimeIterator.lambda$0(RuntimeIterator.java:187)
at java.util.ArrayList.forEach(ArrayList.java:1257)
at org.rumbledb.runtime.RuntimeIterator.close(RuntimeIterator.java:187)
at org.rumbledb.runtime.HybridRuntimeIterator.close(HybridRuntimeIterator.java:82)
at org.rumbledb.runtime.RuntimeIterator.materialize(RuntimeIterator.java:274)
at org.rumbledb.runtime.flwor.udfs.LetClauseUDF.call(LetClauseUDF.java:57)
at org.rumbledb.runtime.flwor.udfs.LetClauseUDF.call(LetClauseUDF.java:1)
at org.apache.spark.sql.UDFRegistration.$anonfun$register$283(UDFRegistration.scala:747)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.sort_addToSorter_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:729)
at org.apache.spark.sql.execution.aggregate.SortAggregateExec.$anonfun$doExecute$2(SortAggregateExec.scala:80)
at org.apache.spark.sql.execution.aggregate.SortAggregateExec.$anonfun$doExecute$2$adapted(SortAggregateExec.scala:77)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndexInternal$2(RDD.scala:859)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndexInternal$2$adapted(RDD.scala:859)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
The issue is strange because if I change basically anything in the query, the error goes away. I have thus not succeeded in producing a shorter test case.
ghislainfourny commented
Thanks for reporting @ingomueller-net
It's a bug that happens rarely, good catch.
The problem was a Higgs boson that decayed into four leptons. I just adjusted the curvature of spacetime a bit and it now works.
ingomueller-net commented
OK, it works. I can see the Higgs boson now!