516 lines
11 KiB
TeX
516 lines
11 KiB
TeX
\UseRawInputEncoding
|
|
%\documentclass[hyperref={pdfpagelabels=false}]{beamer}
|
|
\documentclass[hyperref={pdfpagelabels=false},aspectratio=169]{beamer}
|
|
% Die Hyperref Option hyperref={pdfpagelabels=false} verhindert die Warnung:
|
|
% Package hyperref Warning: Option `pdfpagelabels' is turned off
|
|
% (hyperref) because \thepage is undefined.
|
|
% Hyperref stopped early
|
|
%
|
|
|
|
\usepackage{lmodern}
|
|
% Das Paket lmodern erspart die folgenden Warnungen:
|
|
% LaTeX Font Warning: Font shape `OT1/cmss/m/n' in size <4> not available
|
|
% (Font) size <5> substituted on input line 22.
|
|
% LaTeX Font Warning: Size substitutions with differences
|
|
% (Font) up to 1.0pt have occurred.
|
|
%
|
|
|
|
% Wenn \titel{\ldots} \author{\ldots} erst nach \begin{document} kommen,
|
|
% kommt folgende Warnung:
|
|
% Package hyperref Warning: Option `pdfauthor' has already been used,
|
|
% (hyperref) ...
|
|
% Daher steht es hier vor \begin{document}
|
|
|
|
\title[AutoML4Rad]{Case Study - AutoML for Robust Anomaly Detection}
|
|
\author{Simon Kluettermann}
|
|
\date{\today}
|
|
|
|
|
|
\institute{ls9 tu Dortmund}
|
|
|
|
|
|
% Dadurch wird verhindert, dass die Navigationsleiste angezeigt wird.
|
|
\setbeamertemplate{navigation symbols}{}
|
|
|
|
% zusaetzlich ist das usepackage{beamerthemeshadow} eingebunden
|
|
\usepackage{beamerthemeshadow}
|
|
|
|
\hypersetup{pdfstartview={Fit}} % fits the presentation to the window when first displayed
|
|
|
|
\usepackage{appendixnumberbeamer}
|
|
\usepackage{listings}
|
|
|
|
|
|
\usetheme{CambridgeUS}
|
|
\usepackage{ngerman}
|
|
\usecolortheme{dolphin}
|
|
|
|
|
|
% \beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}
|
|
% sorgt dafuer das die Elemente die erst noch (zukuenftig) kommen
|
|
% nur schwach angedeutet erscheinen
|
|
%\beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}%here disabled
|
|
% klappt auch bei Tabellen, wenn teTeX verwendet wird\ldots
|
|
\renewcommand{\figurename}{}
|
|
|
|
\setbeamertemplate{footline}
|
|
{
|
|
\leavevmode%
|
|
\hbox{%
|
|
\begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
|
|
\usebeamerfont{author in head/foot}\insertshorttitle
|
|
\end{beamercolorbox}%
|
|
\begin{beamercolorbox}[wd=.25\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
|
|
\usebeamerfont{title in head/foot}\insertsection
|
|
\end{beamercolorbox}%
|
|
\begin{beamercolorbox}[wd=.3499\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}%
|
|
\usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
|
|
\hyperlink{toc}{\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}}
|
|
\end{beamercolorbox}}%
|
|
\vskip0pt%
|
|
}
|
|
|
|
\usepackage[absolute,overlay]{textpos}
|
|
\usepackage{graphicx}
|
|
|
|
\newcommand{\source}[1]{\begin{textblock*}{9cm}(0.1cm,8.9cm)
|
|
\begin{beamercolorbox}[ht=0.5cm,left]{framesource}
|
|
\usebeamerfont{framesource}\usebeamercolor[fg!66]{framesource} Source: {#1}
|
|
\end{beamercolorbox}
|
|
\end{textblock*}}
|
|
|
|
|
|
\begin{document}
|
|
|
|
|
|
|
|
%from file ../case1/data/000.txt
|
|
\begin{frame}[label=]
|
|
\frametitle{}
|
|
\begin{titlepage}
|
|
|
|
\centering
|
|
{\huge\bfseries \par}
|
|
\vspace{2cm}
|
|
{\LARGE\itshape Simon Kluettermann\par}
|
|
\vspace{1.5cm}
|
|
{\scshape\Large Master Thesis in Physics\par}
|
|
\vspace{0.2cm}
|
|
{\Large submitted to the \par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par}
|
|
\vspace{0.2cm}
|
|
{\Large \par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large RWTH Aachen University}
|
|
\vspace{1cm}
|
|
|
|
\vfill
|
|
{\scshape\Large Department of Physics\par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par}
|
|
\vspace{0.2cm}
|
|
{ \Large\par}
|
|
\vspace{0.2cm}
|
|
{\Large First Referee: Prof. Dr. Michael Kraemer \par}
|
|
{\Large Second Referee: Prof. Dr. Felix Kahlhoefer}
|
|
|
|
\vfill
|
|
|
|
% Bottom of the page
|
|
{\large November 2020 \par}
|
|
\end{titlepage}
|
|
\pagenumbering{roman}
|
|
\thispagestyle{empty}
|
|
\null
|
|
\newpage
|
|
\setcounter{page}{1}
|
|
\pagenumbering{arabic}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/001Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Two distributions
|
|
|
|
\begin{itemize}
|
|
|
|
\item One known (=normal)
|
|
|
|
\item One unknown (=anomalies)
|
|
|
|
|
|
\end{itemize}
|
|
\item Seperate them
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/01Anomaly_Detection/anomalies.pdf}
|
|
\label{fig:prep01Anomaly_Detectionanomaliespdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/002Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Two distributions
|
|
|
|
\begin{itemize}
|
|
|
|
\item One known (=normal)
|
|
|
|
\item One unknown (=anomalies)
|
|
|
|
|
|
\end{itemize}
|
|
\item Seperate them
|
|
|
|
\item Problem: few anomalies
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/02Anomaly_Detection/difference.pdf}
|
|
\label{fig:prep02Anomaly_Detectiondifferencepdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/003Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Anomalies are rare, so often only a few datapoints known (e.g. Machine Failure in an Aircraft)
|
|
|
|
\item In practice, anomalies might appear that are not known during testing
|
|
|
|
\item $\Rightarrow$So train the model only on normal samples
|
|
|
|
\item Unsupervised Machine Learning
|
|
|
|
\begin{itemize}
|
|
|
|
\item What can we say without knowing anomalies?
|
|
|
|
\item ''Understand you dataset''
|
|
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/03Anomaly_Detection/usup.pdf}
|
|
\label{fig:prep03Anomaly_Detectionusuppdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/004Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Anomalies are rare, so often only a few datapoints known (e.g. Machine Failure in an Aircraft)
|
|
|
|
\item In practice, anomalies might appear that are not known during testing
|
|
|
|
\item $\Rightarrow$So train the model only on normal samples
|
|
|
|
\item Unsupervised Machine Learning
|
|
|
|
\begin{itemize}
|
|
|
|
\item What can we say without knowing anomalies?
|
|
|
|
\item ''Understand you dataset''
|
|
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/04Anomaly_Detection/circle.pdf}
|
|
\label{fig:prep04Anomaly_Detectioncirclepdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/005Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Seems easy? Now do this
|
|
|
|
\begin{itemize}
|
|
|
|
\item in thousands of dimensions
|
|
|
|
\item with complicated distributions
|
|
|
|
\item and overlap between anomalies and normal points
|
|
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/05Anomaly_Detection/anomaly_detection.png}
|
|
\label{fig:prep05Anomaly_Detectionanomaly_detectionpng}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/006AutoML.txt
|
|
\begin{frame}[label=AutoML]
|
|
\frametitle{AutoML}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Most machine learning requires Hyperparameter Optimisation
|
|
|
|
\item (Find model parameters that result in the best results)
|
|
|
|
\item $\Rightarrow$AutoML: Do this automatically as fast as possible
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/06AutoML/Download.png}
|
|
\label{fig:prep06AutoMLDownloadpng}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/007AutoAD.txt
|
|
\begin{frame}[label=AutoAD]
|
|
\frametitle{AutoAD}
|
|
\begin{itemize}
|
|
|
|
\item So lets combine both (Auto Anomaly Detection)
|
|
|
|
\item $\Rightarrow$Problem
|
|
|
|
\begin{itemize}
|
|
|
|
\item AutoMl requires Evaluation (loss, accuracy, AUC) to optimize
|
|
|
|
\item AD can only be evaluated with regards to the anomalies
|
|
|
|
\item $\Rightarrow$no longer unsupervised
|
|
|
|
|
|
\end{itemize}
|
|
\item So most Anomaly Detection is ''unoptimized''
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/008Solution 1 Metrics.txt
|
|
\begin{frame}[label=Solution 1 Metrics]
|
|
\frametitle{Solution 1 Metrics}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item So how to solve this?
|
|
|
|
\item One option: Think of some function to evaluate only the normal points
|
|
|
|
\item $\Rightarrow$A bit hard to do in a case study
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/08Solution_1_Metrics/circle2.pdf}
|
|
\label{fig:prep08Solution_1_Metricscircle2pdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/009Solution 2 OneShot Learning.txt
|
|
\begin{frame}[label=Solution 2 OneShot Learning]
|
|
\frametitle{Solution 2 OneShot Learning}
|
|
\begin{itemize}
|
|
|
|
\item So how to solve this?
|
|
|
|
\item One option: ''Just find the best solution directly''
|
|
|
|
\item $\Rightarrow$Zero Shot AutoML
|
|
|
|
\item Find best practices for hyperparameters
|
|
|
|
\item Requires optimisation for each model seperately $\Rightarrow$ matches the case study structure quite well!
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/010Course.txt
|
|
\begin{frame}[label=Course]
|
|
\frametitle{Course}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Basics of Scientific Computing
|
|
|
|
\item Basics of AD
|
|
|
|
\item Basics of AutoML
|
|
|
|
\item Build groups for each algorithm
|
|
|
|
\begin{itemize}
|
|
|
|
\item Choose a set of Hyperparameters
|
|
|
|
\item Find ''best practice`s'' for them
|
|
|
|
\item Maybe consider more complicated Transformations (Preprocessing, Ensemble)
|
|
|
|
|
|
\end{itemize}
|
|
\item Compare between groups (best algorithm for current situation)
|
|
|
|
\item Evaluate on new datasets
|
|
|
|
\item Write a report/Present your work
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/09Course/table.png}
|
|
\label{fig:prep09Coursetablepng}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case1/data/011Questions.txt
|
|
\begin{frame}[label=Questions]
|
|
\frametitle{Questions}
|
|
\begin{itemize}
|
|
|
|
\item Requirements:
|
|
|
|
\begin{itemize}
|
|
|
|
\item MD Req 1$\Rightarrow$MD Req 8
|
|
|
|
\item Basic Python/Math Knowledge
|
|
|
|
\item Motivation to learn something new;)
|
|
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
|
|
\end{document}
|