python – pyspark equivalent of the tpc-h query in scala

I want to implement that Scala Query on ispark Question and do not know how to do that:

                Package main.scala

import org.apache.spark.SparkContext
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.count
import org.apache.spark.sql.functions.sum
import org.apache.spark.sql.functions.avg
import org.apache.spark.sql.functions.udf

Class Q01 extends TpchQuery {

Override def exec (sc: SparkContext, schemaProvider: tpchSchemaProvider): DataFrame = {

// This is used to implicitly convert an RDD to a DataFrame.
val sqlContext = new org.apache.spark.sql.SQLContext (sc)
Import sqlContext.implicits._
import schemaProvider._

Value decrease = udf {(x: Double, y: Double) => x * (1 - y)}
Value increase = udf {(x: Double, y: Double) => x * (1 + y)}

schemaProvider.lineitem.filter ($ "l_shipdate" <= "1998-09-02")
.groupBy ($ "l_returnflag", $ "l_linestatus")
.agg (sum ($ "l_quantity"), sum ($ "l_extendedprice"),
Sum (decrease ($ "l_extendedprice", $ "l_discount")),
Sum (increase (decrease ($ "l_extendedprice", $ "l_discount"), $ "l_tax")),
avg ($ "l_quantity"), avg ($ "l_extendedprice"), avg ($ "l_discount"), number ($ "l_quantity"))
.sort ($ "l_returnflag", $ "l_linestatus")
}

}