Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix travis build #46

Merged
merged 7 commits into from
Jul 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ sudo: false
addons:
postgresql: 9.4
go:
- 1.4
- 1.8
- 1.9
- tip
cache:
directories:
Expand All @@ -13,6 +14,7 @@ install:
- go get github.com/lib/pq
- go get github.com/kennygrant/sanitize
- go get github.com/cheggaaa/pb
- go get github.com/JensRantil/go-csv
- ./download_samples.sh
script:
- go install && ./test.sh
Expand Down
15 changes: 4 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<img align="right" alt="elephant" src="elephant.jpg" />

Import CSV and JSON into PostgreSQL the easy way.
Import CSV (RFC 4180) and JSON into PostgreSQL the easy way.
This small tool abstract all the hassles and swearing you normally
have to deal with when you just want to dump some data into the database.

Expand All @@ -13,6 +13,7 @@ Features:
- Easy deployment
- Dealing with import errors
- Import over the network
- Only supports UTF8 encoding

> Check out [pgclimb](https://github.com/lukasmartinelli/pgclimb) for exporting data from PostgreSQL into different data formats.

Expand Down Expand Up @@ -232,15 +233,6 @@ This works the same for invalid JSON objects.
pgfutter csv --table violations traffic_violations.csv
```

### Import single JSON object

Instead of using JSON lines you can also [import a single JSON object](https://github.com/lukasmartinelli/pgfutter/issues/9)
into the database. This will load the JSON document into memory first.

```bash
pgfutter jsonobj document.json
```

## Alternatives

For more sophisticated needs you should take a look at [pgloader](http://pgloader.io).
Expand All @@ -267,5 +259,6 @@ We use [gox](https://github.com/mitchellh/gox) to create distributable
binaries for Windows, OSX and Linux.

```bash
docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.4.2-light
docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.9

```
26 changes: 17 additions & 9 deletions csv.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"encoding/csv"
"errors"
"fmt"
"io"
Expand All @@ -10,6 +9,7 @@ import (
"unicode/utf8"

"github.com/cheggaaa/pb"
csv "github.com/JensRantil/go-csv"
)

func containsDelimiter(col string) bool {
Expand Down Expand Up @@ -48,7 +48,9 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string,
}
} else {
columns, err = reader.Read()
fmt.Printf("%v columns\n%v\n", len(columns), columns)
if err != nil {
fmt.Printf("FOUND ERR\n")
return nil, err
}
}
Expand Down Expand Up @@ -120,14 +122,25 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
return nil, success, failed
}

func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string) error {
func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string, excel bool) error {

db, err := connect(connStr, schema)
if err != nil {
return err
}
defer db.Close()

dialect := csv.Dialect{}
dialect.Delimiter, _ = utf8.DecodeRuneInString(delimiter)

// Excel 2008 and 2011 and possibly other versions uses a carriage return \r
// rather than a line feed \n as a newline
if excel {
dialect.LineTerminator = "\r"
} else {
dialect.LineTerminator = "\n"
}

var reader *csv.Reader
var bar *pb.ProgressBar
if filename != "" {
Expand All @@ -138,21 +151,16 @@ func importCSV(filename string, connStr string, schema string, tableName string,
defer file.Close()

bar = NewProgressBar(file)
reader = csv.NewReader(io.TeeReader(file, bar))
reader = csv.NewDialectReader(io.TeeReader(file, bar), dialect)
} else {
reader = csv.NewReader(os.Stdin)
reader = csv.NewDialectReader(os.Stdin, dialect)
}

reader.Comma, _ = utf8.DecodeRuneInString(delimiter)
reader.LazyQuotes = true

columns, err := parseColumns(reader, skipHeader, fields)
if err != nil {
return err
}

reader.FieldsPerRecord = len(columns)

i, err := NewCSVImport(db, schema, tableName, columns)
if err != nil {
return err
Expand Down
32 changes: 15 additions & 17 deletions download_samples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,28 @@ SAMPLES_DIR="$CWD/samples"
function download_json_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
wget -nc http://data.githubarchive.org/2015-01-01-15.json.gz && gunzip -f 2015-01-01-15.json.gz
wget -nc wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/json_sample_2015-01-01-15.json
cd $CWD
}

function download_csv_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
wget -nc -O local_severe_wheather_warning_systems.csv https://data.mo.gov/api/views/n59h-ggai/rows.csv
wget -nc -O montgomery_crime.csv https://data.montgomerycountymd.gov/api/views/icn6-v9z3/rows.csv
wget -nc -O employee_salaries.csv https://data.montgomerycountymd.gov/api/views/54rh-89p8/rows.csv
wget -nc -O residential_permits.csv https://data.montgomerycountymd.gov/api/views/m88u-pqki/rows.csv
wget -nc -O customer_complaints.csv https://data.consumerfinance.gov/api/views/x94z-ydhh/rows.csv
wget -nc -O traffic_violations.csv https://data.montgomerycountymd.gov/api/views/4mse-ku6q/rows.csv
wget -nc -O distribution_of_wealth_switzerland.csv http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Distribution_of_wealth.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Wealth_groups.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Vermoegensklassen.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Steuertarife.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Tax_rates.csv
wget -nc -O whitehouse_visits_2014.zip https://www.whitehouse.gov/sites/default/files/disclosures/whitehouse_waves-2014_12.csv_.zip && unzip -o whitehouse_visits_2014.zip && rm -f whitehouse_visits_2014.csv && mv whitehouse_waves-2014_12.csv.csv whitehouse_visits_2014.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bag/Spitalstatistikdateien/qip/2012/qip12_tabdaten.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bar.bar-02/Metadatenbank-Vernehmlassungen-OGD-V1-3.csv
wget -nc https://www.data.gov/app/uploads/2015/08/opendatasites.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_distribution_of_wealth_switzerland.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_employee_salaries.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_local_severe_wheather_warning_systems.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_montgomery_crime.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_qip12_tabdaten.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_residential_permits.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramentocrime_jan_2006.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramento_realestate_transactions.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sales_jan_2009.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_steuertarife.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_techcrunch_continental_usa.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_vermoegensklassen.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_metadatenbank.csv
cd $CWD
}

download_csv_samples
download_json_samples
download_csv_samples
37 changes: 0 additions & 37 deletions json.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
)

Expand Down Expand Up @@ -67,42 +66,6 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
return nil, success, failed
}

func importJSONObject(filename string, connStr string, schema string, tableName string, dataType string) error {
db, err := connect(connStr, schema)
if err != nil {
return err
}
defer db.Close()

// The entire file is read into memory because we need to add
// it into the PostgreSQL transaction, this will hit memory limits
// for big JSON objects
var bytes []byte
if filename == "" {
bytes, err = ioutil.ReadAll(os.Stdin)
} else {
bytes, err = ioutil.ReadFile(filename)
}
if err != nil {
return err
}

i, err := NewJSONImport(db, schema, tableName, "data", dataType)
if err != nil {
return err
}

// The JSON file is not validated at client side
// it is just copied into the database
// If the JSON file is corrupt PostgreSQL will complain when querying
err = i.AddRow(string(bytes))
if err != nil {
return err
}

return i.Commit()
}

func importJSON(filename string, connStr string, schema string, tableName string, ignoreErrors bool, dataType string) error {

db, err := connect(connStr, schema)
Expand Down
26 changes: 6 additions & 20 deletions pgfutter.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"fmt"
"log"
"os"
"path/filepath"
Expand Down Expand Up @@ -121,27 +120,14 @@ func main() {
return err
},
},
{
Name: "jsonobj",
Usage: "Import single JSON object into database",
Action: func(c *cli.Context) error {
cli.CommandHelpTemplate = strings.Replace(cli.CommandHelpTemplate, "[arguments...]", "<json-file>", -1)

filename := c.Args().First()

schema := c.GlobalString("schema")
tableName := parseTableName(c, filename)
dataType := getDataType(c)

connStr := parseConnStr(c)
err := importJSONObject(filename, connStr, schema, tableName, dataType)
return err
},
},
{
Name: "csv",
Usage: "Import CSV into database",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "excel",
Usage: "support problematic Excel 2008 and Excel 2011 csv line endings",
},
cli.BoolFlag{
Name: "skip-header",
Usage: "skip header row",
Expand Down Expand Up @@ -172,10 +158,10 @@ func main() {
skipHeader := c.Bool("skip-header")
fields := c.String("fields")
skipParseheader := c.Bool("skip-parse-delimiter")
excel := c.Bool("excel")
delimiter := parseDelimiter(c.String("delimiter"), skipParseheader)
fmt.Println(delimiter)
connStr := parseConnStr(c)
err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter)
err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter, excel)
return err
},
},
Expand Down
Loading