xitongsys/parquet-go

Convert csv to parquet but getting emptyfile

IsabelDing00 opened this issue · 1 comments

Convert csv to parquet,

  1. Got 4bytes parquet file with "PAR1" only.
  2. WriteStop() panic with error: interface conversion: interface {} is nil, not string

Here is my code:

package main

import (
	"bufio"
	"encoding/csv"
	"fmt"
	"io"
	"log"
	"os"

	"github.com/xitongsys/parquet-go-source/local"
	"github.com/xitongsys/parquet-go/parquet"
	"github.com/xitongsys/parquet-go/writer"
)

type custReport struct {
	phoneNumber string `parquet:"name=phone_number, type=BYTE_ARRAY"`
}

func main() {
	fw, err := local.NewLocalFileWriter("test.parquet")
	if err != nil {
		fmt.Println("Fail to create parquet file path:", err)
	}
	pw, err := writer.NewParquetWriter(fw, new(custReport), 1)
	if err != nil {
		fmt.Println("Fail to create parquet writer:", err)
	}

	pw.RowGroupSize = 128 * 1024 * 1024 //128M
	pw.CompressionType = parquet.CompressionCodec_SNAPPY

	csvFile, err := os.Open("test.csv")
	if err != nil {
		fmt.Println("Fail to open csv file:", err)
	}

	reader := csv.NewReader(bufio.NewReader(csvFile))
	for {
		line, error := reader.Read()
		if error == io.EOF {
			break
		} else if error != nil {
			log.Fatal(error)
		}

		report := custReport{
			phoneNumber: line[0],
		}
		if err = pw.Write(report); err != nil {
			fmt.Println("Write error", err)
		}
	}

	if err = pw.WriteStop(); err != nil {
		fmt.Println("WriteStop error:", err)
	}

	fmt.Println("Write Finished")
	fw.Close()

}

csv file: test.csv

Phone Number
+13912345678

@xitongsys

The first letter should be upper.

type custReport struct {
	PhoneNumber string `parquet:"name=phone_number, type=BYTE_ARRAY"`
}