diff --git a/Vagrantfile b/Vagrantfile index f3d3928a3af01bb894926a70833db2550df32457..76d5beada3b0565338c3d6040de7601ddf43beb0 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -12,7 +12,8 @@ Vagrant.configure(2) do |config| master.vm.network :private_network, ip: '192.168.10.10' master.vm.network :forwarded_port, guest: 27017, host: 27017 # MongoDB master.vm.network :forwarded_port, guest: 8081, host: 8081 # MongoDB Express - master.vm.network :forwarded_port, guest: 8080, host: 8888 # Spark MasterUI + master.vm.network :forwarded_port, guest: 8001, host: 8001 # Spark MasterUI + master.vm.network :forwarded_port, guest: 8002, host: 8002 # Spark WorkerUI master.vm.network :forwarded_port, guest: 7077, host: 7077 # Spark master.vm.network :forwarded_port, guest: 9200, host: 9200 # ElasticSearch REST API master.vm.provision :shell, path: 'vm/bootstrap.sh' diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index 752d8de1fcf1d768e4efa4deb8f0504ff8b233ec..2bda71b97b7ed733b41d92a0e3b140a1256e24b1 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -271,23 +271,6 @@ }, "buffers": [ - { - "contents": "renamed backend to tmbs-rest-backend\nmoved data dir to vm dir\nadded spark ports to vagrant config\nadded comments to vagrant ports\ninstalled elasticsearch in vm, added to bootstrap script", - "settings": - { - "buffer_size": 186, - "line_ending": "Unix", - "name": "renamed backend to tmbs-rest-backend" - } - }, - { - "file": "Vagrantfile", - "settings": - { - "buffer_size": 797, - "line_ending": "Unix" - } - } ], "build_system": "", "build_system_choices": @@ -474,6 +457,9 @@ [ "/home/eike/Repositories/fu/ss15/ma/impl/vm/bootstrap.sh", "/home/eike/Repositories/fu/ss15/ma/impl/Vagrantfile", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/spark-env.sh", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/rc.sh", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/rc.local", "/home/eike/Repositories/fu/ss15/ma/impl/vm/env.sh", "/home/eike/Repositories/fu/ss15/ma/impl/ui/backend/pom.xml", "/home/eike/Repositories/fu/ss15/ma/impl/vm/config/mongo-express.config.js", @@ -851,189 +837,8 @@ "groups": [ { - "selected": 1, "sheets": [ - { - "buffer": 0, - "semi_transient": false, - "settings": - { - "buffer_size": 186, - "regions": - { - }, - "selection": - [ - [ - 186, - 186 - ] - ], - "settings": - { - "BracketHighlighterBusy": false, - "auto_name": "renamed backend to tmbs-rest-backend", - "bh_regions": - [ - "bh_square", - "bh_square_center", - "bh_square_open", - "bh_square_close", - "bh_square_content", - "bh_default", - "bh_default_center", - "bh_default_open", - "bh_default_close", - "bh_default_content", - "bh_single_quote", - "bh_single_quote_center", - "bh_single_quote_open", - "bh_single_quote_close", - "bh_single_quote_content", - "bh_round", - "bh_round_center", - "bh_round_open", - "bh_round_close", - "bh_round_content", - "bh_angle", - "bh_angle_center", - "bh_angle_open", - "bh_angle_close", - "bh_angle_content", - "bh_tag", - "bh_tag_center", - "bh_tag_open", - "bh_tag_close", - "bh_tag_content", - "bh_double_quote", - "bh_double_quote_center", - "bh_double_quote_open", - "bh_double_quote_close", - "bh_double_quote_content", - "bh_regex", - "bh_regex_center", - "bh_regex_open", - "bh_regex_close", - "bh_regex_content", - "bh_c_define", - "bh_c_define_center", - "bh_c_define_open", - "bh_c_define_close", - "bh_c_define_content", - "bh_curly", - "bh_curly_center", - "bh_curly_open", - "bh_curly_close", - "bh_curly_content", - "bh_unmatched", - "bh_unmatched_center", - "bh_unmatched_open", - "bh_unmatched_close", - "bh_unmatched_content" - ], - "incomplete_sync": null, - "syntax": "Packages/Text/Plain text.tmLanguage" - }, - "translation.x": 0.0, - "translation.y": 0.0, - "zoom_level": 1.0 - }, - "stack_index": 1, - "type": "text" - }, - { - "buffer": 1, - "file": "Vagrantfile", - "semi_transient": true, - "settings": - { - "buffer_size": 797, - "regions": - { - }, - "selection": - [ - [ - 353, - 353 - ] - ], - "settings": - { - "BracketHighlighterBusy": false, - "bh_regions": - [ - "bh_curly", - "bh_curly_center", - "bh_curly_open", - "bh_curly_close", - "bh_curly_content", - "bh_c_define", - "bh_c_define_center", - "bh_c_define_open", - "bh_c_define_close", - "bh_c_define_content", - "bh_angle", - "bh_angle_center", - "bh_angle_open", - "bh_angle_close", - "bh_angle_content", - "bh_round", - "bh_round_center", - "bh_round_open", - "bh_round_close", - "bh_round_content", - "bh_double_quote", - "bh_double_quote_center", - "bh_double_quote_open", - "bh_double_quote_close", - "bh_double_quote_content", - "bh_default", - "bh_default_center", - "bh_default_open", - "bh_default_close", - "bh_default_content", - "bh_regex", - "bh_regex_center", - "bh_regex_open", - "bh_regex_close", - "bh_regex_content", - "bh_square", - "bh_square_center", - "bh_square_open", - "bh_square_close", - "bh_square_content", - "bh_single_quote", - "bh_single_quote_center", - "bh_single_quote_open", - "bh_single_quote_close", - "bh_single_quote_content", - "bh_unmatched", - "bh_unmatched_center", - "bh_unmatched_open", - "bh_unmatched_close", - "bh_unmatched_content", - "bh_tag", - "bh_tag_center", - "bh_tag_open", - "bh_tag_close", - "bh_tag_content" - ], - "incomplete_sync": null, - "remote_loading": false, - "synced": false, - "syntax": "Packages/Ruby/Ruby.sublime-syntax", - "tab_size": 2, - "translate_tabs_to_spaces": true - }, - "translation.x": 0.0, - "translation.y": 0.0, - "zoom_level": 1.0 - }, - "stack_index": 0, - "type": "text" - } ] } ], diff --git a/vm/bootstrap.sh b/vm/bootstrap.sh index 02685299136bfcd28ebb906ffe16dfadd8349a2f..fb631ebc4e42aeebd63324f9c2ba820ec18115f1 100644 --- a/vm/bootstrap.sh +++ b/vm/bootstrap.sh @@ -1,30 +1,39 @@ #!/bin/sh +# ----------------------------------------------------------------------------- +# add repos +apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 +echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-3.0.list + # ----------------------------------------------------------------------------- # upgrade system apt-get update apt-get upgrade -y -apt-get install git npm libkrb5-dev -y + +# ----------------------------------------------------------------------------- +# install packages +apt-get install git gdebi-core openjdk-7-jdk openjdk-7-jre -y # ----------------------------------------------------------------------------- # install scala -wget http://www.scala-lang.org/files/archive/scala-2.11.7.deb -dpkg -i scala-2.11.7.deb -rm scala-2.11.7.deb +wget http://www.scala-lang.org/files/archive/scala-2.10.6.deb +gdebi -n scala-2.10.6.deb +rm scala-2.10.6.deb # ----------------------------------------------------------------------------- -# fix node setup in ubuntu -ln -s $(which nodejs) /bin/node +# install spark +wget http://mirror.netcologne.de/apache.org/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.6.tgz +tar zxf spark-1.5.2-bin-hadoop2.6.tgz +rm spark-1.5.2-bin-hadoop2.6.tgz +mv spark-1.5.2-bin-hadoop2.6 spark +ln -sf /vagrant/vm/config/spark-env.sh /home/vagrant/spark/conf/spark-env.sh # ----------------------------------------------------------------------------- -# install & start mongodb -apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 -echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-3.0.list -apt-get update +# install mongodb apt-get install -y mongodb-org # copy configuration -cp /vagrant/vm/config/master-mongod.conf /etc/mongod.conf +ln -sf /vagrant/vm/config/master-mongod.conf /etc/mongod.conf # disable hugepages (https://docs.mongodb.org/manual/tutorial/transparent-huge-pages/) cp /vagrant/vm/config/disable-transparent-hugepages /etc/init.d/ @@ -33,38 +42,54 @@ update-rc.d disable-transparent-hugepages defaults echo 'never' > /sys/kernel/mm/transparent_hugepage/enabled echo 'never' > /sys/kernel/mm/transparent_hugepage/defrag -# install node, mongo-express -apt-get install npm libkrb5-dev -npm install -g mongo-express forever -cp /vagrant/vm/config/mongo-express.config.js /usr/local/lib/node_modules/mongo-express/config.js - # restart service service mongod restart # import mongodb data -mongoimport --db test --collection articles --file /vagrant/data/data.json --jsonArray - -# start web ui -forever start $(which mongo-express) +mongoimport --db test --collection articles --file /vagrant/vm/data/data.json --jsonArray # ----------------------------------------------------------------------------- -# install spark -wget ftp://mirror.netcologne.de/apache.org/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.6.tgz -tar zxf spark-1.5.2-bin-hadoop2.6.tgz -rm spark-1.5.2-bin-hadoop2.6.tgz -mv spark-1.5.2-bin-hadoop2.6 spark +# install mongo express +apt-get install npm libkrb5-dev +ln -sf $(which nodejs) /bin/node +npm install -g mongo-express forever +ln -sf /vagrant/vm/config/mongo-express.config.js /usr/local/lib/node_modules/mongo-express/config.js # ----------------------------------------------------------------------------- -# install elasticsearch -wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | apt-key add - -echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list -apt-get install elasticsearch -update-rc.d elasticsearch defaults 95 10 +# install maven +wget http://mirror.netcologne.de/apache.org/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz +tar zxf apache-maven-3.3.9-bin.tar.gz +mv apache-maven-3.3.9-bin maven +rm apache-maven-3.3.9-bin.tar.gz # ----------------------------------------------------------------------------- # set environment -echo "source /vagrant/vm/env.sh" >> /home/vagrant/.bashrc +cat /vagrant/vm/config/environment >> /etc/environment + +# ----------------------------------------------------------------------------- +# install mahout +git clone https://github.com/apache/mahout.git mahout +cd mahout +mvn -DskipTests -X clean install # ----------------------------------------------------------------------------- # disable firewall -ufw disable \ No newline at end of file +ufw disable + +# ----------------------------------------------------------------------------- +# cleanup +apt-get autoremove -y +apt-get clean +dd if=/dev/zero of=/EMPTY bs=1M +rm -f /EMPTY +cat /dev/null > ~/.bash_history +history -c + +echo "" +echo "--------------------------------------------------------------" +echo "--- Provisioning complete." +echo "--- Reload box to set environment" +echo "--- `vagrant reload`" +echo "--------------------------------------------------------------" + +exit 0 \ No newline at end of file diff --git a/vm/config/environment b/vm/config/environment new file mode 100644 index 0000000000000000000000000000000000000000..46bfb91d41fbefd2f29a531675d52fed0ee6300b --- /dev/null +++ b/vm/config/environment @@ -0,0 +1,17 @@ +# Java +JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 +JAVA_TOOL_OPTIONS="-Xmx2048m -XX:MaxPermSize=1024m -Xms1024m" + +# Spark +SPARK_HOME=/home/vagrant/spark +SPARK_BIN=/home/vagrant/spark/bin + +# Mahout +MAHOUT_HOME=/home/vagrant/mahout +MAHOUT_LOCAL=true # for running standalone on your dev machine, unset MAHOUT_LOCAL for running on a cluster + +# Maven +MAVEN_HOME=/home/vagrant/maven +MAVEN_BIN=/home/vagrant/maven/bin + +PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/vagrant/maven/bin" \ No newline at end of file diff --git a/vm/config/spark-env.sh b/vm/config/spark-env.sh new file mode 100755 index 0000000000000000000000000000000000000000..4cb916c1ba936a0d915e3d326002991e52cbb641 --- /dev/null +++ b/vm/config/spark-env.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read when launching programs locally with +# ./bin/run-example or ./bin/spark-submit +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append + +# Options read by executors and drivers running inside the cluster +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos + +# Options read in YARN client mode +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2) +# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1). +# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark) +# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’) +# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job. +# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job. + +# Options for the daemons used in the standalone deploy mode +# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker +# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node +# - SPARK_WORKER_DIR, to set the working directory of worker processes +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers + +# Generic options for the daemons used in the standalone deploy mode +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) + +SPARK_MASTER_PORT=7077 +SPARK_MASTER_WEBUI_PORT=8001 +SPARK_WORKER_WEBUI_PORT=8002 \ No newline at end of file diff --git a/vm/env.sh b/vm/env.sh deleted file mode 100644 index 13f47935d98bc44bbe5f7dafb745814aab2b8b79..0000000000000000000000000000000000000000 --- a/vm/env.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -