Home
Trees
Indices
Help
Spark 1.1.0 Python API Docs
[
frames
] |
no frames
]
Identifier Index
[
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X Y
Z
_
]
A
Accumulator
(in
pyspark.accumulators
)
addFile()
(in
SparkContext
)
aggregateByKey()
(in
RDD
)
accumulator()
(in
SparkContext
)
AddingAccumulatorParam
(in
pyspark.accumulators
)
ALS
(in
pyspark.mllib.recommendation
)
AccumulatorParam
(in
pyspark.accumulators
)
addInPlace()
(in
AccumulatorParam
)
applySchema()
(in
SQLContext
)
accumulators
(in
pyspark
)
addInPlace()
(in
AddingAccumulatorParam
)
ArrayType
(in
pyspark.sql
)
AccumulatorServer
(in
pyspark.accumulators
)
addPyFile()
(in
SparkContext
)
add()
(in
Accumulator
)
aggregate()
(in
RDD
)
B
BinaryType
(in
pyspark.sql
)
broadcast
(in
pyspark
)
broadcast()
(in
SparkContext
)
BooleanType
(in
pyspark.sql
)
Broadcast
(in
pyspark.broadcast
)
ByteType
(in
pyspark.sql
)
C
cache()
(in
RDD
)
clustering
(in
pyspark.mllib
)
contains()
(in
SparkConf
)
cache()
(in
SchemaRDD
)
coalesce()
(in
RDD
)
context
(in
pyspark
)
cacheTable()
(in
SQLContext
)
coalesce()
(in
SchemaRDD
)
context()
(in
RDD
)
cancelAllJobs()
(in
SparkContext
)
cogroup()
(in
RDD
)
copy()
(in
StatCounter
)
cancelJobGroup()
(in
SparkContext
)
collect()
(in
RDD
)
corr()
(in
Statistics
)
cartesian()
(in
RDD
)
collect()
(in
SchemaRDD
)
count()
(in
MultivariateStatisticalSummary
)
checkpoint()
(in
RDD
)
collectAsMap()
(in
RDD
)
count()
(in
RDD
)
checkpoint()
(in
SchemaRDD
)
colStats()
(in
Statistics
)
count()
(in
SchemaRDD
)
classification
(in
pyspark.mllib
)
combineByKey()
(in
RDD
)
count()
(in
StatCounter
)
clearFiles()
(in
SparkContext
)
COMPLEX_ACCUMULATOR_PARAM
(in
pyspark.accumulators
)
countByKey()
(in
RDD
)
clusterCenters()
(in
KMeansModel
)
conf
(in
pyspark
)
countByValue()
(in
RDD
)
D
DecimalType
(in
pyspark.sql
)
dense()
(in
Vectors
)
dot()
(in
SparseVector
)
DecisionTree
(in
pyspark.mllib.tree
)
depth()
(in
DecisionTreeModel
)
DoubleType
(in
pyspark.sql
)
DecisionTreeModel
(in
pyspark.mllib.tree
)
DISK_ONLY
(in
StorageLevel
)
dumps
(in
MarshalSerializer
)
DEFAULT_CONFIGS
(in
pyspark.context
)
DISK_ONLY_2
(in
StorageLevel
)
dumps()
(in
PickleSerializer
)
defaultMinPartitions()
(in
SparkContext
)
distinct()
(in
RDD
)
defaultParallelism()
(in
SparkContext
)
distinct()
(in
SchemaRDD
)
F
files
(in
pyspark
)
flatMapValues()
(in
RDD
)
foldByKey()
(in
RDD
)
filter()
(in
RDD
)
FLOAT_ACCUMULATOR_PARAM
(in
pyspark.accumulators
)
foreach()
(in
RDD
)
first()
(in
RDD
)
FloatType
(in
pyspark.sql
)
foreachPartition()
(in
RDD
)
flatMap()
(in
RDD
)
fold()
(in
RDD
)
G
get()
(in
SparkConf
)
getLocalProperty()
(in
SparkContext
)
groupBy()
(in
RDD
)
get()
(in
SparkFiles
)
getNumPartitions()
(in
RDD
)
groupByKey()
(in
RDD
)
getAll()
(in
SparkConf
)
getRootDirectory()
(in
SparkFiles
)
groupWith()
(in
RDD
)
getCheckpointFile()
(in
RDD
)
getStorageLevel()
(in
RDD
)
getCheckpointFile()
(in
SchemaRDD
)
glom()
(in
RDD
)
H
hadoopFile()
(in
SparkContext
)
histogram()
(in
RDD
)
hiveql()
(in
HiveContext
)
hadoopRDD()
(in
SparkContext
)
HiveContext
(in
pyspark.sql
)
hql()
(in
HiveContext
)
I
id()
(in
RDD
)
IntegerType
(in
pyspark.sql
)
isCheckpointed()
(in
RDD
)
inferSchema()
(in
SQLContext
)
intercept()
(in
LinearModel
)
isCheckpointed()
(in
SchemaRDD
)
insertInto()
(in
SchemaRDD
)
intersection()
(in
RDD
)
INT_ACCUMULATOR_PARAM
(in
pyspark.accumulators
)
intersection()
(in
SchemaRDD
)
J
join()
(in
RDD
)
jsonFile()
(in
SQLContext
)
jsonRDD()
(in
SQLContext
)
K
keyBy()
(in
RDD
)
KMeans
(in
pyspark.mllib.clustering
)
keys()
(in
RDD
)
KMeansModel
(in
pyspark.mllib.clustering
)
L
LabeledPoint
(in
pyspark.mllib.regression
)
LinearRegressionModel
(in
pyspark.mllib.regression
)
loads
(in
PickleSerializer
)
LassoModel
(in
pyspark.mllib.regression
)
LinearRegressionModelBase
(in
pyspark.mllib.regression
)
LocalHiveContext
(in
pyspark.sql
)
LassoWithSGD
(in
pyspark.mllib.regression
)
LinearRegressionWithSGD
(in
pyspark.mllib.regression
)
LogisticRegressionModel
(in
pyspark.mllib.classification
)
leftOuterJoin()
(in
RDD
)
loadLabeledPoints()
(in
MLUtils
)
LogisticRegressionWithSGD
(in
pyspark.mllib.classification
)
linalg
(in
pyspark.mllib
)
loadLibSVMFile()
(in
MLUtils
)
LongType
(in
pyspark.sql
)
LinearModel
(in
pyspark.mllib.regression
)
loads
(in
MarshalSerializer
)
M
map()
(in
RDD
)
max()
(in
StatCounter
)
MEMORY_ONLY_SER
(in
StorageLevel
)
mapPartitions()
(in
RDD
)
maximum()
(in
pyspark.statcounter
)
MEMORY_ONLY_SER_2
(in
StorageLevel
)
mapPartitionsWithIndex()
(in
RDD
)
mean()
(in
MultivariateStatisticalSummary
)
merge()
(in
StatCounter
)
mapPartitionsWithIndex()
(in
SchemaRDD
)
mean()
(in
RDD
)
mergeStats()
(in
StatCounter
)
mapPartitionsWithSplit()
(in
RDD
)
mean()
(in
StatCounter
)
min()
(in
MultivariateStatisticalSummary
)
MapType
(in
pyspark.sql
)
MEMORY_AND_DISK
(in
StorageLevel
)
min()
(in
RDD
)
mapValues()
(in
RDD
)
MEMORY_AND_DISK_2
(in
StorageLevel
)
min()
(in
StatCounter
)
MarshalSerializer
(in
pyspark.serializers
)
MEMORY_AND_DISK_SER
(in
StorageLevel
)
minimum()
(in
pyspark.statcounter
)
MatrixFactorizationModel
(in
pyspark.mllib.recommendation
)
MEMORY_AND_DISK_SER_2
(in
StorageLevel
)
mllib
(in
pyspark
)
max()
(in
MultivariateStatisticalSummary
)
MEMORY_ONLY
(in
StorageLevel
)
MLUtils
(in
pyspark.mllib.util
)
max()
(in
RDD
)
MEMORY_ONLY_2
(in
StorageLevel
)
MultivariateStatisticalSummary
(in
pyspark.mllib.stat
)
N
NaiveBayes
(in
pyspark.mllib.classification
)
newAPIHadoopFile()
(in
SparkContext
)
normalVectorRDD()
(in
RandomRDDs
)
NaiveBayesModel
(in
pyspark.mllib.classification
)
newAPIHadoopRDD()
(in
SparkContext
)
numNodes()
(in
DecisionTreeModel
)
name()
(in
RDD
)
normalRDD()
(in
RandomRDDs
)
numNonzeros()
(in
MultivariateStatisticalSummary
)
O
OFF_HEAP
(in
StorageLevel
)
P
parallelize()
(in
SparkContext
)
pipe()
(in
RDD
)
predict()
(in
LinearRegressionModelBase
)
parquetFile()
(in
SQLContext
)
poissonRDD()
(in
RandomRDDs
)
predict()
(in
DecisionTreeModel
)
partitionBy()
(in
RDD
)
poissonVectorRDD()
(in
RandomRDDs
)
predictAll()
(in
MatrixFactorizationModel
)
persist()
(in
RDD
)
predict()
(in
LogisticRegressionModel
)
PrimitiveTypeSingleton
(in
pyspark.sql
)
persist()
(in
SchemaRDD
)
predict()
(in
NaiveBayesModel
)
printSchema()
(in
SchemaRDD
)
pickleFile()
(in
SparkContext
)
predict()
(in
SVMModel
)
pyspark
pickleSer
(in
pyspark.accumulators
)
predict()
(in
KMeansModel
)
PickleSerializer
(in
pyspark.serializers
)
predict()
(in
MatrixFactorizationModel
)
R
random
(in
pyspark.mllib
)
registerAsTable()
(in
SchemaRDD
)
ResultIterable
(in
pyspark.resultiterable
)
RandomRDDs
(in
pyspark.mllib.random
)
registerFunction()
(in
SQLContext
)
RidgeRegressionModel
(in
pyspark.mllib.regression
)
rdd
(in
pyspark
)
registerRDDAsTable()
(in
SQLContext
)
RidgeRegressionWithSGD
(in
pyspark.mllib.regression
)
RDD
(in
pyspark.rdd
)
registerTempTable()
(in
SchemaRDD
)
rightOuterJoin()
(in
RDD
)
recommendation
(in
pyspark.mllib
)
regression
(in
pyspark.mllib
)
Row
(in
pyspark.sql
)
reduce()
(in
RDD
)
repartition()
(in
RDD
)
runJob()
(in
SparkContext
)
reduceByKey()
(in
RDD
)
repartition()
(in
SchemaRDD
)
reduceByKeyLocally()
(in
RDD
)
resultiterable
(in
pyspark
)
S
s
(in
pyspark
)
setAll()
(in
SparkConf
)
sqrt
(in
pyspark.statcounter
)
sample()
(in
RDD
)
setAppName()
(in
SparkConf
)
squared_distance()
(in
SparseVector
)
sampleByKey()
(in
RDD
)
setCheckpointDir()
(in
SparkContext
)
stat
(in
pyspark.mllib
)
sampleStdev()
(in
RDD
)
setExecutorEnv()
(in
SparkConf
)
statcounter
(in
pyspark
)
sampleStdev()
(in
StatCounter
)
setIfMissing()
(in
SparkConf
)
StatCounter
(in
pyspark.statcounter
)
sampleVariance()
(in
RDD
)
setJobGroup()
(in
SparkContext
)
Statistics
(in
pyspark.mllib.stat
)
sampleVariance()
(in
StatCounter
)
setLocalProperty()
(in
SparkContext
)
stats()
(in
RDD
)
saveAsHadoopDataset()
(in
RDD
)
setMaster()
(in
SparkConf
)
stdev()
(in
RDD
)
saveAsHadoopFile()
(in
RDD
)
setName()
(in
RDD
)
stdev()
(in
StatCounter
)
saveAsLibSVMFile()
(in
MLUtils
)
setSparkHome()
(in
SparkConf
)
stop()
(in
SparkContext
)
saveAsNewAPIHadoopDataset()
(in
RDD
)
setSystemProperty()
(in
SparkContext
)
storagelevel
(in
pyspark
)
saveAsNewAPIHadoopFile()
(in
RDD
)
ShortType
(in
pyspark.sql
)
StorageLevel
(in
pyspark.storagelevel
)
saveAsParquetFile()
(in
SchemaRDD
)
shutdown()
(in
AccumulatorServer
)
stringify()
(in
Vectors
)
saveAsPickleFile()
(in
RDD
)
sortBy()
(in
RDD
)
StringType
(in
pyspark.sql
)
saveAsSequenceFile()
(in
RDD
)
sortByKey()
(in
RDD
)
StructField
(in
pyspark.sql
)
saveAsTable()
(in
SchemaRDD
)
SparkConf
(in
pyspark.conf
)
StructType
(in
pyspark.sql
)
saveAsTextFile()
(in
RDD
)
SparkContext
(in
pyspark.context
)
subtract()
(in
RDD
)
schema()
(in
SchemaRDD
)
SparkFiles
(in
pyspark.files
)
subtract()
(in
SchemaRDD
)
SchemaRDD
(in
pyspark.sql
)
sparkUser()
(in
SparkContext
)
subtractByKey()
(in
RDD
)
schemaString()
(in
SchemaRDD
)
sparse()
(in
Vectors
)
sum()
(in
RDD
)
sequenceFile()
(in
SparkContext
)
SparseVector
(in
pyspark.mllib.linalg
)
sum()
(in
StatCounter
)
serializers
(in
pyspark
)
sql
(in
pyspark
)
SVMModel
(in
pyspark.mllib.classification
)
server_shutdown
(in
AccumulatorServer
)
sql()
(in
SQLContext
)
SVMWithSGD
(in
pyspark.mllib.classification
)
set()
(in
SparkConf
)
SQLContext
(in
pyspark.sql
)
T
table()
(in
SQLContext
)
toDebugString()
(in
SparkConf
)
train()
(in
LassoWithSGD
)
take()
(in
RDD
)
toDebugString()
(in
RDD
)
train()
(in
LinearRegressionWithSGD
)
takeOrdered()
(in
RDD
)
top()
(in
RDD
)
train()
(in
RidgeRegressionWithSGD
)
takeSample()
(in
RDD
)
train()
(in
LogisticRegressionWithSGD
)
trainClassifier()
(in
DecisionTree
)
TestHiveContext
(in
pyspark.sql
)
train()
(in
NaiveBayes
)
trainImplicit()
(in
ALS
)
textFile()
(in
SparkContext
)
train()
(in
SVMWithSGD
)
trainRegressor()
(in
DecisionTree
)
TimestampType
(in
pyspark.sql
)
train()
(in
KMeans
)
tree
(in
pyspark.mllib
)
toArray()
(in
SparseVector
)
train()
(in
ALS
)
U
uncacheTable()
(in
SQLContext
)
union()
(in
SparkContext
)
unpersist()
(in
RDD
)
uniformRDD()
(in
RandomRDDs
)
union()
(in
RDD
)
unpersist()
(in
SchemaRDD
)
uniformVectorRDD()
(in
RandomRDDs
)
unpersist()
(in
Broadcast
)
util
(in
pyspark.mllib
)
V
value()
(in
Accumulator
)
variance()
(in
RDD
)
version()
(in
SparkContext
)
values()
(in
RDD
)
variance()
(in
StatCounter
)
variance()
(in
MultivariateStatisticalSummary
)
Vectors
(in
pyspark.mllib.linalg
)
W
weights()
(in
LinearModel
)
wholeTextFiles()
(in
SparkContext
)
Z
zero()
(in
AccumulatorParam
)
zip()
(in
RDD
)
zipWithUniqueId()
(in
RDD
)
zero()
(in
AddingAccumulatorParam
)
zipWithIndex()
(in
RDD
)
_
__add__()
(in
RDD
)
__init__()
(in
ArrayType
)
__str__()
(in
SparseVector
)
__call__()
(in
PrimitiveTypeSingleton
)
__init__()
(in
HiveContext
)
__str__()
(in
LabeledPoint
)
__call__()
(in
Row
)
__init__()
(in
LocalHiveContext
)
__str__()
(in
DecisionTreeModel
)
__cls
(in
pyspark.serializers
)
__init__()
(in
MapType
)
__str__()
(in
ArrayType
)
__del__()
(in
MatrixFactorizationModel
)
__init__()
(in
SQLContext
)
__str__()
(in
StorageLevel
)
__del__()
(in
MultivariateStatisticalSummary
)
__init__()
(in
SchemaRDD
)
_acceptable_types
(in
pyspark.sql
)
__del__()
(in
DecisionTreeModel
)
__init__()
(in
StructField
)
_accumulatorRegistry
(in
pyspark.accumulators
)
__eq__()
(in
SparseVector
)
__init__()
(in
StructType
)
_active_spark_context
(in
SparkContext
)
__getattr__()
(in
Broadcast
)
__init__()
(in
StatCounter
)
_all_primitive_types
(in
pyspark.sql
)
__getattr__()
(in
Row
)
__init__()
(in
StorageLevel
)
_BRACKETS
(in
pyspark.sql
)
__iadd__()
(in
Accumulator
)
__iter__()
(in
ResultIterable
)
_broadcastRegistry
(in
pyspark.broadcast
)
__init__()
(in
Accumulator
)
__len__()
(in
ResultIterable
)
_cached_cls
(in
pyspark.sql
)
__init__()
(in
AddingAccumulatorParam
)
__ne__()
(in
SparseVector
)
_default_batch_size_for_serialized_input
(in
SparkContext
)
__init__()
(in
Broadcast
)
__new__()
(in
Row
)
_gateway
(in
SparkContext
)
__init__()
(in
SparkConf
)
__reduce__()
(in
Accumulator
)
_instances
(in
PrimitiveTypeSingleton
)
__init__()
(in
SparkContext
)
__reduce__()
(in
Broadcast
)
_is_running_on_worker
(in
SparkFiles
)
__init__()
(in
SparkFiles
)
__reduce__()
(in
Row
)
_jvm
(in
SparkContext
)
__init__()
(in
NaiveBayesModel
)
__repr__()
(in
Accumulator
)
_lock
(in
SparkContext
)
__init__()
(in
KMeansModel
)
__repr__()
(in
SparseVector
)
_next_accum_id
(in
SparkContext
)
__init__()
(in
SparseVector
)
__repr__()
(in
RDD
)
_python_includes
(in
SparkContext
)
__init__()
(in
MatrixFactorizationModel
)
__repr__()
(in
MapType
)
_root_directory
(in
SparkFiles
)
__init__()
(in
LabeledPoint
)
__repr__()
(in
Row
)
_sc
(in
SparkFiles
)
__init__()
(in
LinearModel
)
__repr__()
(in
StructField
)
_spark_stack_depth
(in
pyspark.rdd
)
__init__()
(in
MultivariateStatisticalSummary
)
__repr__()
(in
StructType
)
_type_mappings
(in
pyspark.sql
)
__init__()
(in
DecisionTreeModel
)
__repr__()
(in
StatCounter
)
_writeToFile
(in
SparkContext
)
__init__()
(in
RDD
)
__repr__()
(in
StorageLevel
)
__init__()
(in
ResultIterable
)
__str__()
(in
Accumulator
)
Home
Trees
Indices
Help
Spark 1.1.0 Python API Docs
Generated by Epydoc 3.0.1 on Mon Nov 24 15:21:12 2014
http://epydoc.sourceforge.net