panic: assignment to entry in nil map
AlamoTNT opened this issue · 6 comments
I got error panic: assignment to entry in nil map
when I was trying to tokenizer a text
"C:\Program Files\Java\jre1.8. 0_202\bin\java.exe" -Djava.util.logging.config.file="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\conf\logging.properties" -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager -Djdk.tls.ephemeralDHKeySize=2048 -Djava.protocol.handler.pkgs=org.apache.catalina.webresources -Dignore.endorsed.dirs="" -classpath "C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\bootstrap.jar;C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\tomcat-juli.jar" -Dcatalina.base="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Dcatalina.home="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Djava.io.tmpdir="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\temp" org.apache.catalina.startup.Bootstrap start`
I debug the source code and find in tokenize.go:Line96, there may be something wrong to correct, the element e
of encoding.GetOverflowing(), e.SequenceRanges does not initialize, which appears to be nil. Thus, error occurs when using e.SetSequenceIds(i), which will assign value to nil map. Please check and confirm whether this should be soon correct!
Thanks for reporting. Please provide a minimal code how to reproduce the error?
This package has been using in quite a few of projects and without a sample code to reproduce, it's hard to trace up the root cause.
InputMaxLength := 256
file := "xxxx\\bert-base-uncased\\tokenizer.json"
bertTokenizer, err := pretrained.FromFile(file)
if err != nil {
return nil, err
}
truncationParams := tokenizer.TruncationParams{
MaxLength: InputMaxLength,
Strategy: tokenizer.OnlyFirst,
}
bertTokenizer.WithTruncation(&truncationParams)
paddingParams := tokenizer.PaddingParams{
Strategy: *tokenizer.NewPaddingStrategy(tokenizer.WithFixed(InputMaxLength)),
Direction: tokenizer.Right,
}
bertTokenizer.WithPadding(&paddingParams)
text := `"C:\Program Files\Java\jre1.8. 0_202\bin\java.exe" -Djava.util.logging.config.file="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\conf\logging.properties" -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager -Djdk.tls.ephemeralDHKeySize=2048 -Djava.protocol.handler.pkgs=org.apache.catalina.webresources -Dignore.endorsed.dirs="" -classpath "C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\bootstrap.jar;C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\tomcat-juli.jar" -Dcatalina.base="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Dcatalina.home="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Djava.io.tmpdir="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\temp" org.apache.catalina.startup.Bootstrap start`
inputSequence := tokenizer.NewInputSequence(text)
singleEncodeInput := tokenizer.NewSingleEncodeInput(inputSequence)
encoding, _ := bertTokenizer.Encode(singleEncodeInput, true)
just push a fix to master and test with the following code without panic
package main
import (
"flag"
"fmt"
"log"
"github.com/sugarme/tokenizer"
"github.com/sugarme/tokenizer/pretrained"
)
var (
modelName string
)
func init() {
flag.StringVar(&modelName, "model", "bert-base-uncased", "model name as at Huggingface model hub e.g. 'tiiuae/falcon-7b'. Default='bert-base-uncased'")
}
func main() {
flag.Parse()
configFile, err := tokenizer.CachedPath(modelName, "tokenizer.json")
if err != nil {
panic(err)
}
tk, err := pretrained.FromFile(configFile)
if err != nil {
panic(err)
}
InputMaxLength := 256
truncationParams := tokenizer.TruncationParams{
MaxLength: InputMaxLength,
Strategy: tokenizer.OnlyFirst,
}
tk.WithTruncation(&truncationParams)
paddingParams := tokenizer.PaddingParams{
Strategy: *tokenizer.NewPaddingStrategy(tokenizer.WithFixed(InputMaxLength)),
Direction: tokenizer.Right,
}
tk.WithPadding(&paddingParams)
text := `"C:\Program Files\Java\jre1.8. 0_202\bin\java.exe" -Djava.util.logging.config.file="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\conf\logging.properties" -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager -Djdk.tls.ephemeralDHKeySize=2048 -Djava.protocol.handler.pkgs=org.apache.catalina.webresources -Dignore.endorsed.dirs="" -classpath "C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\bootstrap.jar;C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\bin\tomcat-juli.jar" -Dcatalina.base="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Dcatalina.home="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52" -Djava.io.tmpdir="C:\Users\Administrator\Desktop\apache-tomcat-9.0.52\temp" org.apache.catalina.startup.Bootstrap start`
en, err := tk.EncodeSingle(text, true)
if err != nil {
log.Fatal(err)
}
fmt.Printf("%-10s: %q\n", "Tokens", en.Tokens)
fmt.Printf("%-10s: %v\n", "Ids", en.Ids)
fmt.Printf("%-10s: %v\n", "Offsets", en.Offsets)
}
Output:
Tokens : ["[CLS]" "\"" "c" ":" "\\" "program" "files" "\\" "java" "\\" "jr" "##e" "##1" "." "8" "." "0" "_" "202" "\\" "bin" "\\" "java" "." "ex" "##e" "\"" "-" "dj" "##ava" "." "ut" "##il" "." "logging" "." "con" "##fi" "##g" "." "file" "=" "\"" "c" ":" "\\" "users" "\\" "administrator" "\\" "desktop" "\\" "apache" "-" "tom" "##cat" "-" "9" "." "0" "." "52" "\\" "con" "##f" "\\" "logging" "." "properties" "\"" "-" "dj" "##ava" "." "ut" "##il" "." "logging" "." "manager" "=" "org" "." "apache" "." "jul" "##i" "." "class" "##load" "##er" "##log" "##mana" "##ger" "-" "dj" "##d" "##k" "." "t" "##ls" "." "ep" "##hem" "##eral" "##dh" "##key" "##si" "##ze" "=" "204" "##8" "-" "dj" "##ava" "." "protocol" "." "handler" "." "p" "##k" "##gs" "=" "org" "." "apache" "." "catalina" "." "web" "##res" "##our" "##ces" "-" "dig" "##nor" "##e" "." "endorsed" "." "dir" "##s" "=" "\"" "\"" "-" "class" "##path" "\"" "c" ":" "\\" "users" "\\" "administrator" "\\" "desktop" "\\" "apache" "-" "tom" "##cat" "-" "9" "." "0" "." "52" "\\" "bin" "\\" "boots" "##tra" "##p" "." "jar" ";" "c" ":" "\\" "users" "\\" "administrator" "\\" "desktop" "\\" "apache" "-" "tom" "##cat" "-" "9" "." "0" "." "52" "\\" "bin" "\\" "tom" "##cat" "-" "jul" "##i" "." "jar" "\"" "-" "dc" "##atal" "##ina" "." "base" "=" "\"" "c" ":" "\\" "users" "\\" "administrator" "\\" "desktop" "\\" "apache" "-" "tom" "##cat" "-" "9" "." "0" "." "52" "\"" "-" "dc" "##atal" "##ina" "." "home" "=" "\"" "c" ":" "\\" "users" "\\" "administrator" "\\" "desktop" "\\" "apache" "-" "[SEP]"]
Ids : [101 1000 1039 1024 1032 2565 6764 1032 9262 1032 3781 2063 2487 1012 1022 1012 1014 1035 16798 1032 8026 1032 9262 1012 4654 2063 1000 1011 6520 12462 1012 21183 4014 1012 15899 1012 9530 8873 2290 1012 5371 1027 1000 1039 1024 1032 5198 1032 8911 1032 15363 1032 15895 1011 3419 11266 1011 1023 1012 1014 1012 4720 1032 9530 2546 1032 15899 1012 5144 1000 1011 6520 12462 1012 21183 4014 1012 15899 1012 3208 1027 8917 1012 15895 1012 21650 2072 1012 2465 11066 2121 21197 24805 4590 1011 6520 2094 2243 1012 1056 4877 1012 4958 29122 21673 16425 14839 5332 4371 1027 19627 2620 1011 6520 12462 1012 8778 1012 28213 1012 1052 2243 5620 1027 8917 1012 15895 1012 22326 1012 4773 6072 8162 9623 1011 10667 12131 2063 1012 11763 1012 16101 2015 1027 1000 1000 1011 2465 15069 1000 1039 1024 1032 5198 1032 8911 1032 15363 1032 15895 1011 3419 11266 1011 1023 1012 1014 1012 4720 1032 8026 1032 6879 6494 2361 1012 15723 1025 1039 1024 1032 5198 1032 8911 1032 15363 1032 15895 1011 3419 11266 1011 1023 1012 1014 1012 4720 1032 8026 1032 3419 11266 1011 21650 2072 1012 15723 1000 1011 5887 27815 3981 1012 2918 1027 1000 1039 1024 1032 5198 1032 8911 1032 15363 1032 15895 1011 3419 11266 1011 1023 1012 1014 1012 4720 1000 1011 5887 27815 3981 1012 2188 1027 1000 1039 1024 1032 5198 1032 8911 1032 15363 1032 15895 1011 102]
Offsets : [[0 0] [0 1] [1 2] [2 3] [3 4] [4 11] [12 17] [17 18] [18 22] [22 23] [23 25] [25 26] [26 27] [27 28] [28 29] [29 30] [31 32] [32 33] [33 36] [36 37] [37 40] [40 41] [41 45] [45 46] [46 48] [48 49] [49 50] [52 53] [53 55] [55 58] [58 59] [59 61] [61 63] [63 64] [64 71] [71 72] [72 75] [75 77] [77 78] [78 79] [79 83] [83 84] [84 85] [85 86] [86 87] [87 88] [88 93] [93 94] [94 107] [107 108] [108 115] [115 116] [116 122] [122 123] [123 126] [126 129] [129 130] [130 131] [131 132] [132 133] [133 134] [134 136] [136 137] [137 140] [140 141] [141 142] [142 149] [149 150] [150 160] [160 161] [162 163] [163 165] [165 168] [168 169] [169 171] [171 173] [173 174] [174 181] [181 182] [182 189] [189 190] [190 193] [193 194] [194 200] [200 201] [201 204] [204 205] [205 206] [206 211] [211 215] [215 217] [217 220] [220 224] [224 227] [229 230] [230 232] [232 233] [233 234] [234 235] [235 236] [236 238] [238 239] [239 241] [241 244] [244 248] [248 250] [250 253] [253 255] [255 257] [257 258] [258 261] [261 262] [263 264] [264 266] [266 269] [269 270] [270 278] [278 279] [279 286] [286 287] [287 288] [288 289] [289 291] [291 292] [292 295] [295 296] [296 302] [302 303] [303 311] [311 312] [312 315] [315 318] [318 321] [321 324] [327 328] [328 331] [331 334] [334 335] [335 336] [336 344] [344 345] [345 348] [348 349] [349 350] [350 351] [351 352] [353 354] [354 359] [359 363] [364 365] [365 366] [366 367] [367 368] [368 373] [373 374] [374 387] [387 388] [388 395] [395 396] [396 402] [402 403] [403 406] [406 409] [409 410] [410 411] [411 412] [412 413] [413 414] [414 416] [416 417] [417 420] [420 421] [421 426] [426 429] [429 430] [430 431] [431 434] [434 435] [435 436] [436 437] [437 438] [438 443] [443 444] [444 457] [457 458] [458 465] [465 466] [466 472] [472 473] [473 476] [476 479] [479 480] [480 481] [481 482] [482 483] [483 484] [484 486] [486 487] [487 490] [490 491] [491 494] [494 497] [497 498] [498 501] [501 502] [502 503] [503 506] [506 507] [508 509] [509 511] [511 515] [515 518] [518 519] [519 523] [523 524] [524 525] [525 526] [526 527] [527 528] [528 533] [533 534] [534 547] [547 548] [548 555] [555 556] [556 562] [562 563] [563 566] [566 569] [569 570] [570 571] [571 572] [572 573] [573 574] [574 576] [576 577] [578 579] [579 581] [581 585] [585 588] [588 589] [589 593] [593 594] [594 595] [595 596] [596 597] [597 598] [598 603] [603 604] [604 617] [617 618] [618 625] [625 626] [626 632] [632 633] [0 0]]
Please check whether those are expected result?