diff --git a/content/a_tour_of_tfw.tex b/content/a_tour_of_tfw.tex index 5f70976..363a29f 100644 --- a/content/a_tour_of_tfw.tex +++ b/content/a_tour_of_tfw.tex @@ -20,8 +20,8 @@ running in the \texttt{solvable} Docker container and frontend components written in Angular. For instance the built-in code editor requires a frontend component and an event handler to function properly, while the frontend component responsible for -drawing out and managing other components implement no -event handler. +drawing out and managing other components implements no +event handler, so it only exists on the frontend. In the Tutorial Framework most of the built-ins define APIs, which are TFW messages that can be used to interact with them. @@ -205,26 +205,31 @@ certain command are executed by the user. \pic{figures/terminal.png}{The Frontend Terminal of TFW Running top} The implementation of reading command history is quite an exotic one. -The framework need to be able to detect if the user has executed any command in the -container. -This is not an easy thing to accomplish without relying some sort of heavyweight +The framework needs to be able to detect if the user has executed any command in the +container using an interactive bash session. +This is not an easy thing to accomplish without relying on some sort of heavyweight monitoring solution such as Sysdig% \footnote{\href{https://sysdig.comq}{https://sysdig.com}}. I deemed most simiar systems a huge overkill to implement this functionality, and their -memory footprints are not something we can really afford here. -Another way would be to use \texttt{pam_tty_audit.so} in the PAM% +memory footprints are not something we could afford here. +Another way would be to use \texttt{pam\_tty\_audit.so} in the PAM% \footnote{Linux Pluggable Authentication Modules: \href{http://man7.org/linux/man-pages/man3/pam.3.html} {http://man7.org/linux/man-pages/man3/pam.3.html}} -configurations responsible for logins, as this allows various TTY auditing, but -I have found an ever simpler approach to the problem in the end. +configurations responsible for logins, as this allows for various TTY auditing functions, +but I have found an ever simpler approach to the problem in the end. By using the inotify system built into TFW, I can set up the user's environment in -such a way, that I can enforce or determine the location of the bash \texttt{HISTFILE}% +such a way, that I can enforce and determine the location of the bash \texttt{HISTFILE}% \footnote{This environment variable contains the path to the file bash writes command history to} of the user. This way I can monitor changes made to this file and read the commands executed by the user from it. +It is important to keep in mind that the user is able to ``sabotage'' this method% +\footnote{By unsetting the \texttt{HISTFILE} envvar for example}, +but that should not be an issue as this is not a feature that is intended to be +used in competitive environments (and if the users of a tutorial intentionally +break the system under themselves, well, good for them). \section{Console Component} @@ -240,7 +245,7 @@ experience similar to working in an IDE on your laptop. \pic{figures/console_and_editor.png}{The Console Displaying Live Process Logs Next to the TFW Code Editor} -\section{Process Management} +\section{Process Management}\label{processmanagement} The framework includes an event handler capable of managing processes running inside the \texttt{solvable} Docker container. @@ -302,11 +307,82 @@ digitally signed messages only. In the event of a successful FSM step, this component is going to broadcast a message showcased as an example in the beginning of Chapter~\ref{atouroftfw}. -\section{Additional Frontend Features} +\section{Web Component} -The frontend of the framework exposes some additional APIs through -TFW messages. +The web component allows developers to configure what web application should +be displayed on the frontend. +There are two options for doing this: +\begin{enumerate} + \item Supply the URL of a web application served from the + Docker container to be displayed + \item Implement an Angular component on the frontend and display it +\end{enumerate} + +In most cases, when developers make tutorials on Python and Java topics for +example, they will implement a web application in the respective programming +language the exercise is about and most developers have no experience in +Angular development. +This is why more often then not the fist option is chosen. +This feature is implemented by embedding a standard HTML iframe% +\footnote{\href{https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe} +{https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe}} +inside the frontend, which is controllable by TFW +(as an example it can be reloaded or navigated from the framework). + +Watchful readers might now be thinking about how does ``controlled by the framework'' +thing work with the Same Origin Policy% +\footnote{\href{https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin_policy} +{https://developer.mozilla.org/en-US/docs/Web/Security/Same-origin\_policy}} +being in effect? +The answer is that developers must use a \emph{relative url}, that is an URL relative +to the entry pont of the TFW frontend itself. +To allow serving several web applications from a single port the framework +supports optional reverse-proxy configurations through the nginx% +\footnote{\href{http://nginx.org}{http://nginx.org}} web server ran by the framework. +More on this in a later chapter. + +\section{Various Frontend Features} + +The angular frontend of features several different layouts. +These layouts are useful to accomodate different workflows for users, +such as the previous exampe of editig code and being able to view the +result of said code in real time next to the editor. +Another example would be editing Ansible playbooks in the file editor, +and then trying to run them in the terminal. +There are also almost full screen views for each component that makes sense +to be used that way. + +The frontend was designed in a way to be fully responsive in windows sizes +that still keep the whole thing usable (i.e.\ it would not be practial to start +solving TFW tutorials on a smart phone, simply because of size limits, so they are +not supported, but the frontend still behaves as expected on small laptops or bigger tablets). +This is not an easy thing to impelent and maintain due to the lots of small +incompatibilites between browsers given the complexity of the frontend. + +Just remember that a few years ago the clearfix% +\footnote{\href{https://stackoverflow.com/questions/8554043/what-is-a-clearfix} +{https://stackoverflow.com/questions/8554043/what-is-a-clearfix}} +hack was the industry standard in creating CSS layouts. +The situation has improved \emph{a lot} since then with flexboxes +and grid layouts despite the sheer chaos that is generally involved in web +standardization efforts, but CSS espacially% +\footnote{\href{https://developer.mozilla.org/en-US/docs/Web/CSS/CSS3} +{https://developer.mozilla.org/en-US/docs/Web/CSS/CSS3}}. + +The framework frontend is built on grid layout and flexboxes% +\footnote{\href{https://developer.mozilla.org/en-US/docs/Web/CSS/CSS\_Grid\_Layout} +{https://developer.mozilla.org/en-US/docs/Web/CSS/CSS\_Grid\_Layout}}, +which gives us the best hopes of being able to maintain it down the line. +It would involve unimaginable horrors to support this multi-layout +frontend on older browsers, so browsers without flex and grid +support are not supported by TFW. +Arguably this is a good thing, as people should keep their browsers up to date to +follow frequent security patches anyway, so let this serve as a reminder to +developers looking to get into IT security that the first step is to +keep your software up to date. + +The frontend of the framework exposes some additional APIs. These include the changing of layouts, selecting the terminal or console -component to be displayed, the possibility of dynamically modifying other -frontend configuration (such as the frequency of autosaving the files in the editor) +component to be displayed, the possibility of dynamically modifying +frontend configuration values (such as the frequency of autosaving the files in the editor) and more. diff --git a/content/using_the_framework.tex b/content/using_the_framework.tex index 4171d38..094afe8 100644 --- a/content/using_the_framework.tex +++ b/content/using_the_framework.tex @@ -5,41 +5,328 @@ to use the framework, some of the design decisions behind this and how everythin is integrated into the \texttt{solvable} Docker image. To use the framework one has to do several things to get started. -The main poins include: +The main points include: \begin{itemize} - \item define an FSM describing the flow of the tutorial - \item implement required event handlers (that might trigger state transitions in the FSM, - interact with non-TFW code and do various things that might be needed during a challenge) - \item define what processes should be running inside the container besides the TFW - server (which is started automatically) - \item set up reverse proxying for any user-facing network applications such as webservers, - SSH and friends + \item Setting up a development environment + \item Defining an FSM to describe the flow of the tutorial and implementing proper callbacks + for this machine, such as ones that display messages to the user + \item Implementing the required event handlers, which may trigger state transitions in the FSM, + interact with non-TFW code and do various things that might be needed during an exercise + \item Defining what processes should run inside the container besides the things TFW + starts automatically + \item Setting up reverse proxying for any user-facing network applications such as webservers \end{itemize} At first all these tasks can seem quite overwhelming. -Remember that witchcraft is what we practice here after all. +Remember that \emph{witchcraft} is what we practice here after all. To overcome the high initial learning curve of getting familiar with the framework I have created a repository called \emph{test-tutorial-framework} with the purpose of providing a project template for developers looking to create challenges using the framework. This repository is a really simple client codebase that is suitable for -developing TFW itself as well (a good place for the tests of the framework). -I also provides an ``industry standard'' \texttt{hack} directory -containing bash scripts that make the development of challenges and TFW itself very convenient. +developing TFW itself as well (a good place to host tests for the framework). + +It also provides an ``industry standard'' \texttt{hack} directory +containing bash scripts that make the development of tutorials and TFW itself very convenient. These scripts span from bootstrapping a complete development environment in one command, to building and running challenges based on the framework. Let us take a quick look at the \emph{test-tutorial-framework} project to acquire a greater -understanding of how the framework operates. +understanding of how the framework interacts with client code. \section{Project Structure} -\section{Implementing a Finite State Machine} +\begin{lstlisting}[ + caption={The project structure of test-tutorial-framework}, + captionpos=b] +. +|--config.yml +| +|--hack/ +|   |--tfw.sh +| |--... +| +|--controller/ +|   |--Dockerfile +| |--... +| +|--solvable/ + |--Dockerfile + |--... +\end{lstlisting} -\section{Defining Processes to Run} +\subsection{Avatao Configuration File} +The \texttt{config.yml} file is an Avatao challenge configuration file, +which is used describe what kind of Docker containers implement a challenge, +what ports do they expose talking what protocols, define the name of the +excercise, it's difficulity, and so on. +Every Avatao challenge must provide such a file. +The Tutorial Framework does not use this file, this is only required to run +the exercise in production, so it is mostly out of scope for this thesis. -\section{Exposing Front-facing Networking Applications} +\subsection{Controller Image} +It was previously mentioned that the \texttt{controller} Docker image is responsible +for the solution checking of challenges (whether the user has completed the exercise or not). +Currently this image is maintained in the test-tutorial-framework repository. +It is a really simple Python server which functions as a TFW event handler as well. +It subscribes to the FSM update messages +broadcasted by the \texttt{FSMManagingEventHandler}, we've previously discussed, +this way it is capable of keeping track of the state of the tutorial, +which allows it to detect if the final state of the FSM is reached. -\section{How This Works} +\subsection{Solvable Image} +Currently the Tutorial Framework is maintained in three git repositories: +\begin{description} + \item[baseimage-tutorial-framework] Docker baseimage (contains all backend logic) + \item[frontend-tutorial-framework] Angular frontend + \item[test-tutorial-framework] An example tutorial built using baseimage and frontend +\end{description} +Every tutorial based on the framework must use the TFW baseimage as the parent of +it's own \texttt{solvable} image, using the \texttt{FROM}% +\footnote{\href{https://docs.docker.com/engine/reference/builder/\#from} +{https://docs.docker.com/engine/reference/builder/\#from}} +Dockerfile command. +Being an example project of the framework this is the case with +test-tutorial-framework as well. -\section{Developer Tooling} +\section{Details of the Solvable Image} +Let us dive into greater detail on how the \texttt{solvable} Docker image of the +test-tutorial-framework operates. +The directory structure is as follows: +\begin{lstlisting} +solvable/ +|--Dockerfile +|--frontend/ +|--supervisor/ +|--nginx/ +|--src/ +\end{lstlisting} +I am going to discuss these one by one. -\section{Framework Release Management} +\subsection{Dockerfile} +Since this is a Docker image it must define a \texttt{Dockerfile}. +This image always uses the baseimage of the framework as a parent image. +Besides this developers can use this as a regular \texttt{Dockerfile} to work with as +they see fit to implement their tutorial. + +\subsection{Frontend} +This directory is designed to contain a clone of the frontend repository. +By default it is empty and it's contents will be put in place during the +setup of the development environment. + +\subsection{Supervisor} +As previously mentioned, the framework uses supervisor to run several processes +inside a Docker container. +Usually Docker containers only run a single process and developers simply start +more containers instead of processes if required. +This approach is not suitable for TFW, as it would require the framework to orchestrate +Docker containers from an other container, which is feasible in theory but +very hard and impractial to do in practice. + +Supervisor is a process control system designed to be able to work with +processes on UNIX-like operating systems. +When a tutorial built on TFW is started, the framework starts supervisor with +PID\footnote{Process ID, on UNIX-like systems the \texttt{init} program is the first +process started} 1, which in turn starts all the programs defined +in this directory using supervisor configuration files. +For example, a developer would use a file similar to this to run a webserver +written in python: +\begin{lstlisting} +[program:yourprogram] +user=user +directory=/home/user/example/ +command=python3 server.py +autostart=true +\end{lstlisting} +As mentioned earlier in~\ref{processmanagement}, any program that is started this way +can be managed by the framewok using API messages. + +\subsection{Nginx} +For simplicity, exercises based on the framework only expose a single port from the +\texttt{solvable} container. +This port is required to serve the frontend of the framework. +If this is the case, how do we run additional web applications to showcase vulnerabilies +on during the tutorial? +Since one port can only be bound by one process at a time, we will need to +use a reverse-proxy to to bind the port and redirect traffict to other +webservers binding non-exposed ports. + +To support this, TFW automatically runs an nginx webserver (it uses this nginx +process to serve the framework frontend as well) we can supply additional configurations to. +Any configuration files placed into this directory will be interpreted by nginx +once the container has started. +To set up the reverse-proxying of a webserver running on port 3333, +one would write a config file similar to this one: +\begin{lstlisting} +location /yoururl { + proxy_pass http://127.0.0.1:3333; +} +\end{lstlisting} +Now the content server by this websever will be available on ``/yoururl''. +It is very important to understand, that developers +have to make sure that their web application \emph{behaves well} behind a reverse proxy. +What this means is that they are going to be serverd from a ``subdirectory'' of an URL: +for example ``/register'' will be served under ``/yoururl/register''. +This means that all links in the final HTML must refer to the proxied urls, e.g.\ +``/yoururl/register'' and server side redirects must point to the correct hrefs as well. +Idiomatically this is usually implemented by supplying a \texttt{BASEURL} +to the application through an environment variable, so that it is able to set +itself up correctly. + +\subsection{Copying Configuration Files} +Behind the curtains, the Tutorial Framework uses some Dockerfile trickery to +faciliate the copying of supervisor and nginx configuration files to their correct +locations. +Normally when one uses the \texttt{COPY}% +\footnote{\href{https://docs.docker.com/engine/reference/builder/\#copy} +{https://docs.docker.com/engine/reference/builder/\#copy}} +command to create a layer% +\footnote{\href{https://docs.docker.com/storage/storagedriver/} +{https://docs.docker.com/storage/storagedriver/}} in a Docker image, +this action takes place when building that image (i.e.\ in the \emph{build context} +of that image). +This is not good for this use case: when building the framework baseimage, +these configuration files that will be written by content developers do not even +exist. +How could we copy files into an image layer that will be created in the future? + +It is possible to use a command called \texttt{ONBUILD}% +\footnote{\href{https://docs.docker.com/engine/reference/builder/\#onbuild} +{https://docs.docker.com/engine/reference/builder/\#onbuild}} +in the Dockerfile of a baseimage to delay another command +to the point in time when other images will use the baseimage +as a parent with the \texttt{FROM} command. This makes it possible to execute +commands in the build context of the descendant image. +This is great, because the config files we need \emph{will} exist in the build +context of the \texttt{solvable} image of test-tutorial-framework. +In practice this looks something like this in the baseimage \texttt{Dockerfile}: +\begin{lstlisting} +ONBUILD COPY ${BUILD_CONTEXT}/nginx/ ${TFW_NGINX_COMPONENTS} +ONBUILD COPY ${BUILD_CONTEXT}/supervisor/ ${TFW_SUPERVISORD_COMPONENTS} +\end{lstlisting} + +\subsection{Source Directory} +The \texttt{src} directory usually holds tutorial-specific code, such as +the implementations of event handlers, the framework FSM, additional web applications +served by the exercise and generally anything that won't fit in the other, +framework-specific directories. +The use of this directory is not mandatory, only a good practice, as developers +are free to implement the non-TFW parts of their exercises as they see fit +(the copying of these files into image layers are their resposibility). + +\section{Impelenting a Finite State Machine} + +The Tutorial Framework allows developers to define state machines in two ways, +as discussed before. +I am going to display the implementation of the same FSM using these methods +to showcase the capabilities of the framework. + +\subsection{YAML based FSM} +YAML\footnote{YAML Ain't Markup Language \href{http://yaml.org}{http://yaml.org}} +is a human friendly data serialization standard and a superset of JSON. +It is possible to use this format to define a state machine like so: +\lstinputlisting[ + caption={A Finite State Machine implemented in YAML}, + captionpos=b +]{listings/test_fsm.yml} +This state machine has 2 states, state 0 and 1. +It defines a single transition between them. +On entering state 1 the FSM will write a message to the frontend messaging component +by invoking TFW library code in Python. +The transition can only occour if the file \texttt{allow\_step\_1} exists. + +YAML based state machine implementations also allow the usage of the Jinja2% +\footnote{\href{http://jinja.pocoo.org/docs/2.10/}{http://jinja.pocoo.org/docs/2.10/}} +templating language to substitute variables into the YAML file. +These substitutions are really powerful, as one could even iterate through arrays +or invoke functions that produce strings to be inserted using this method. + +\subsection{Python based FSM} +Optionally, the same state machine can be implemented like this in Python using +TFW library code: +\lstinputlisting[ + language=python, + caption={A Finite State Machine implemented in Python}, + captionpos=b +]{listings/test_fsm.py} + +As you can see, both implementations are pretty clean and easy. +The advantage of YAML is that it makes it possible to define callbacks using virtually any +command that is available in the container, which means any +programming language is usable to implement said callbacks. +The advantage of the Python version is that since the framework is being developed in +Python as well, it is going to be easier to interface with library code. + +\section{Configuring Components} + +The configuration of built-ins is generally done in two different ways. +For the frontend (Angular) side, developers can edit a \texttt{config.ts} file, +which is full of key-value pairs of configurable frontend functionality. +These pairs are generally pretty self-documenting: +\lstinputlisting[ + caption={Example of the frontend \texttt{config.ts} file (stripped down to save space)}, + captionpos=b +]{listings/config.ts} +Configuring built-in event handlers is possible by editing the Python file they are +initialized in, which exposes several communicative options: +\lstinputlisting[ + language=python, + caption={Example of inicializing some event handlers}, + captionpos=b +]{listings/event_handler_main.py} + +\section{Setting Up a Developer Environment} + +To make getting started as smooth as possible I have created +a ``bootstrap'' script which is capable of creating a development envrionment from +scratch. + +This script is distributed as a bash one-liner command, like so: +\begin{lstlisting}[language=bash] +bash -c "$(curl -fsSL https://git.io/vxBfj)" +\end{lstlisting} +This command downloads a script using \texttt{curl}, then executes the downloaded +script in bash. +In the open source community it is quite common to distribute installers this way% +\footnote{A good example of this is oh-my-zsh +\href{https://github.com/robbyrussell/oh-my-zsh}{https://github.com/robbyrussell/oh-my-zsh}}, +which might seem a little scary at first, but is not less safe then +downloading and executing a binary installer from a website with a valid TLS certificate. +This is because both methods place their trust in the PKI~\footnote{Public Key Infrastructure} +to defend against man-in-the-middle% +\footnote{\href{https://www.owasp.org/index.php/Man-in-the-middle_attack} +{https://www.owasp.org/index.php/Man-in-the-middle\_attack}} attacks. +Debating the security of this infrastructure is certainly something that we +as an industry should constantly do, but it is out of scope for this paper. + +Nevertheless I have also created a version of this command that +checks the SHA256 checksum of the bootstrap script before executing it +(I have placed it on several lines to enhance visibility): +\begin{lstlisting}[language=bash] +URL=https://git.io/vxBfj \ +SHA=d81057610588e16666251a4167f05841fc8b66ccd6988490c1a2d2deb6de8ffa \ +bash -c 'cmd="$(curl -fsSL $URL)" && \ + [ $(echo "$cmd" | sha256sum | cut -d " " -f1) == $SHA ] && \ + echo "$cmd" | bash || echo Checksum mismatch!' +\end{lstlisting} +This essentially downloads the script, places it inside a variable as a string, +then pipes it into a bash interpreter \emph{only if} the checksum +of the downloaded string matches the one provided, otherwise it displays +an error message. +Software projects distributing their product as binary installers often +display such checksums on their download pages. + +The bootstrap script clones the three TFW repositories and does several steps +to create a working environment: +\begin{itemize} + \item It builds the newest version of the TFW baseimage locally + \item It pins the version tag in \texttt{solvable/Dockerfile}, + so that this newly-built version will be used by the tutorial + \item It places the latest frontend in \texttt{solvable/frontend} with + depencendies installed +\end{itemize} +It is important to note that this script \emph{does not} install anything system-wide, +it only works in the directory it is being executed from. + +It would be a lot easier to simply use Docker Hub% +\footnote{\href{https://hub.docker.com}{https://hub.docker.com}}, +but since the code base is currently proprietary we cannot distribute +it using a public medium. diff --git a/listings/config.ts b/listings/config.ts new file mode 100644 index 0000000..a36b516 --- /dev/null +++ b/listings/config.ts @@ -0,0 +1,39 @@ +export const config = { + documentTitle: 'Avatao Tutorials', + dashboard: { + route: 'dashboard', + triggerFirstFSMStep: 'step_1', + askReloadSite: false, + recoverAfterPageReload: true, + terminalOrConsole: 'terminal', + currentLayout: 'terminal-ide-web', + iframeUrl: '/webservice', + hideMessages: false + }, + ide: { + route: 'ide', + autoSaveInterval: 444, + defaultCode: 'Loading your file...', + defaultLanguage: 'text', + deployProcessName: 'webservice', + showDeployButton: true, + reloadIframeOnDeploy: false, + showConsoleOnDeploy: true, + autoDetectFileLanguage: true, + }, + messages: { + route: 'messages', + showNextButton: false, + messageQueueWPM: 150 + }, + console: { + route: 'console', + defaultContent: '', + rewriteContentWithProcessLogsOnDeploy: 'stdout', + showLiveLogs: true, + defaultLogs: { + stdout: '', + stderr: '' + } + }, +}; diff --git a/listings/event_handler_main.py b/listings/event_handler_main.py new file mode 100644 index 0000000..b578c28 --- /dev/null +++ b/listings/event_handler_main.py @@ -0,0 +1,15 @@ +fsm = FSMManagingEventHandler( # TFW FSM + key='fsm', + fsm_type=TestFSM +) +ide = IdeEventHandler( # Web IDE backend + key='ide', + allowed_directories=[TFWENV.IDE_WD, TFWENV.WEBSERVICE_DIR], + directory=TFWENV.IDE_WD, + exclude=['*.pyc'] +) +logmonitor = LogMonitoringEventHandler( # Sends live logs of webservice process to frontend + key='logmonitor', + process_name='webservice', + log_tail=2000 +) diff --git a/listings/test_fsm.py b/listings/test_fsm.py new file mode 100644 index 0000000..ebe1b69 --- /dev/null +++ b/listings/test_fsm.py @@ -0,0 +1,16 @@ +from os.path import exists +from tfw.fsm import LinearFSM +from tfw.networking import MessageSender + +class TestFSM(LinearFSM): + def __init__(self): + super().__init__(2) + self.subscribe_predicate('step_1', self.step_1_allowed) + + @staticmethod + def step_1_allowed(): + return exists('allow_step_1') + + def on_enter_1(self, event_data): + MessageSender().message_sender.send('FSM', 'Entered state 1!') + diff --git a/listings/test_fsm.yml b/listings/test_fsm.yml new file mode 100644 index 0000000..97a54b0 --- /dev/null +++ b/listings/test_fsm.yml @@ -0,0 +1,12 @@ +states: + - name: '0' + - name: '1' + on_enter: > + python3 -c "from tfwconnector import MessageSender; + MessageSender().send('FSM', 'Entered state 1!')" +transitions: + - trigger: step_1 + source: '0' + dest: '1' + predicates: + - '[ -f allow_step_1 ]' # in bash -f means that the file exists diff --git a/listings/test_tfw_structure.txt b/listings/test_tfw_structure.txt new file mode 100644 index 0000000..6270329 --- /dev/null +++ b/listings/test_tfw_structure.txt @@ -0,0 +1,16 @@ +. +├── config.yml +│ +├── hack/ +│   └── tfw.sh +│ +├── controller/ +│   └── Dockerfile +│ +└── solvable/ + ├── Dockerfile + ├── frontend + ├── nginx + ├── src + └── supervisor +