Apache Spark2 installation in Linux
$ cd ${HOME}
$ wget https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz
$ ls
spark-2.4.3-bin-hadoop2.7.tgz
$ tar xf spark-2.4.3-bin-hadoop2.7.tgz -C ./
$ sudo mkdir -p /opt/spark-2.4.3
$ sudo mv spark-2.4.3-bin-hadoop2.7/* /opt/spark-2.4.3
$ sudo ln -s /opt/spark-2.4.3/ /opt/spark2
$ rm -rf spark-2.4.3-bin-hadoop2.7/
$ rm -rf spark-2.4.3-bin-hadoop2.7.tgz
$ sudo vi /etc/profile.d/spark.sh
#### SPARK 2.4.3 #######################
export SPARK_HOME=/opt/spark
export PATH=${SPARK_HOME}/bin:$PATH
#### SPARK 2.4.3 #######################
$ source /etc/profile.d/spark.sh
Test installation
$ spark-shell
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.4.3
/_/
$ cd ${SPARK_HOME}/bin
$ ./run-example SparkPi 10
$ pyspark --master local[2]
PySpark and jupyter-notebook installation
// if jdk not installed
$ sudo apt install -y openjdk-8-jdk
$ sudo update-alternatives --config java
$ java -version
// if scala not installed
$ sudo apt install -y scala
$ sudo apt install -y python3-pip
$ pip3 install py4j
$ pip3 install jupyter
$ sudo vi /etc/profile.d/pyspark.sh
#### SPARK #######################
export PATH=$PATH:~/.local/bin/
export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS='notebook'
export PYSPARK_PYTHON='python3'
#### SPARK #######################
$ source /etc/profile.d/pyspark.sh
$ sudo chmod -R 777 /opt/spark/
$ pip3 install findspark
$ cd /opt/spark/current/python/
$ jupyter-notebook --ip 192.168.0.11 --port 8080
new python3 notebook
import findspark
findspark.init('/opt/spark/current')
import pyspark