@ -23,8 +23,7 @@ public class SparkReadWriteTest extends SparkTestBase {
List < Person > people = Arrays . asList (
List < Person > people = Arrays . asList (
new Person ( "Alice" , 30 ) ,
new Person ( "Alice" , 30 ) ,
new Person ( "Bob" , 25 ) ,
new Person ( "Bob" , 25 ) ,
new Person ( "Charlie" , 35 )
) ;
new Person ( "Charlie" , 35 ) ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
@ -40,9 +39,9 @@ public class SparkReadWriteTest extends SparkTestBase {
assertEquals ( 2 , readDf . columns ( ) . length ) ;
assertEquals ( 2 , readDf . columns ( ) . length ) ;
List < Row > results = readDf . collectAsList ( ) ;
List < Row > results = readDf . collectAsList ( ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Alice" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 30 ) ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Bob" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 25 ) ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Charlie" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 35 ) ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Alice" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 30 ) ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Bob" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 25 ) ) ;
assertTrue ( results . stream ( ) . anyMatch ( r - > "Charlie" . equals ( r . getAs ( "name" ) ) & & ( Integer ) r . getAs ( "age" ) = = 35 ) ) ;
}
}
@Test
@Test
@ -52,8 +51,7 @@ public class SparkReadWriteTest extends SparkTestBase {
/ / Create test data
/ / Create test data
List < Person > people = Arrays . asList (
List < Person > people = Arrays . asList (
new Person ( "Alice" , 30 ) ,
new Person ( "Alice" , 30 ) ,
new Person ( "Bob" , 25 )
) ;
new Person ( "Bob" , 25 ) ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
@ -77,8 +75,7 @@ public class SparkReadWriteTest extends SparkTestBase {
List < Person > people = Arrays . asList (
List < Person > people = Arrays . asList (
new Person ( "Alice" , 30 ) ,
new Person ( "Alice" , 30 ) ,
new Person ( "Bob" , 25 ) ,
new Person ( "Bob" , 25 ) ,
new Person ( "Charlie" , 35 )
) ;
new Person ( "Charlie" , 35 ) ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
Dataset < Row > df = spark . createDataFrame ( people , Person . class ) ;
@ -103,8 +100,7 @@ public class SparkReadWriteTest extends SparkTestBase {
new PersonWithYear ( "Alice" , 30 , 2020 ) ,
new PersonWithYear ( "Alice" , 30 , 2020 ) ,
new PersonWithYear ( "Bob" , 25 , 2021 ) ,
new PersonWithYear ( "Bob" , 25 , 2021 ) ,
new PersonWithYear ( "Charlie" , 35 , 2020 ) ,
new PersonWithYear ( "Charlie" , 35 , 2020 ) ,
new PersonWithYear ( "David" , 28 , 2021 )
) ;
new PersonWithYear ( "David" , 28 , 2021 ) ) ;
Dataset < Row > df = spark . createDataFrame ( people , PersonWithYear . class ) ;
Dataset < Row > df = spark . createDataFrame ( people , PersonWithYear . class ) ;
@ -135,16 +131,14 @@ public class SparkReadWriteTest extends SparkTestBase {
/ / Write first batch
/ / Write first batch
List < Person > batch1 = Arrays . asList (
List < Person > batch1 = Arrays . asList (
new Person ( "Alice" , 30 ) ,
new Person ( "Alice" , 30 ) ,
new Person ( "Bob" , 25 )
) ;
new Person ( "Bob" , 25 ) ) ;
Dataset < Row > df1 = spark . createDataFrame ( batch1 , Person . class ) ;
Dataset < Row > df1 = spark . createDataFrame ( batch1 , Person . class ) ;
df1 . write ( ) . mode ( SaveMode . Overwrite ) . parquet ( outputPath ) ;
df1 . write ( ) . mode ( SaveMode . Overwrite ) . parquet ( outputPath ) ;
/ / Append second batch
/ / Append second batch
List < Person > batch2 = Arrays . asList (
List < Person > batch2 = Arrays . asList (
new Person ( "Charlie" , 35 ) ,
new Person ( "Charlie" , 35 ) ,
new Person ( "David" , 28 )
) ;
new Person ( "David" , 28 ) ) ;
Dataset < Row > df2 = spark . createDataFrame ( batch2 , Person . class ) ;
Dataset < Row > df2 = spark . createDataFrame ( batch2 , Person . class ) ;
df2 . write ( ) . mode ( SaveMode . Append ) . parquet ( outputPath ) ;
df2 . write ( ) . mode ( SaveMode . Append ) . parquet ( outputPath ) ;
@ -179,17 +173,29 @@ public class SparkReadWriteTest extends SparkTestBase {
private String name ;
private String name ;
private int age ;
private int age ;
public Person ( ) { }
public Person ( ) {
}
public Person ( String name , int age ) {
public Person ( String name , int age ) {
this . name = name ;
this . name = name ;
this . age = age ;
this . age = age ;
}
}
public String getName ( ) { return name ; }
public void setName ( String name ) { this . name = name ; }
public int getAge ( ) { return age ; }
public void setAge ( int age ) { this . age = age ; }
public String getName ( ) {
return name ;
}
public void setName ( String name ) {
this . name = name ;
}
public int getAge ( ) {
return age ;
}
public void setAge ( int age ) {
this . age = age ;
}
}
}
public static class PersonWithYear implements java . io . Serializable {
public static class PersonWithYear implements java . io . Serializable {
@ -197,7 +203,8 @@ public class SparkReadWriteTest extends SparkTestBase {
private int age ;
private int age ;
private int year ;
private int year ;
public PersonWithYear ( ) { }
public PersonWithYear ( ) {
}
public PersonWithYear ( String name , int age , int year ) {
public PersonWithYear ( String name , int age , int year ) {
this . name = name ;
this . name = name ;
@ -205,12 +212,28 @@ public class SparkReadWriteTest extends SparkTestBase {
this . year = year ;
this . year = year ;
}
}
public String getName ( ) { return name ; }
public void setName ( String name ) { this . name = name ; }
public int getAge ( ) { return age ; }
public void setAge ( int age ) { this . age = age ; }
public int getYear ( ) { return year ; }
public void setYear ( int year ) { this . year = year ; }
public String getName ( ) {
return name ;
}
public void setName ( String name ) {
this . name = name ;
}
public int getAge ( ) {
return age ;
}
public void setAge ( int age ) {
this . age = age ;
}
}
}
public int getYear ( ) {
return year ;
}
public void setYear ( int year ) {
this . year = year ;
}
}
}