498 lines
11 KiB
TeX
498 lines
11 KiB
TeX
\UseRawInputEncoding
|
|
%\documentclass[hyperref={pdfpagelabels=false}]{beamer}
|
|
\documentclass[hyperref={pdfpagelabels=false},aspectratio=169]{beamer}
|
|
% Die Hyperref Option hyperref={pdfpagelabels=false} verhindert die Warnung:
|
|
% Package hyperref Warning: Option `pdfpagelabels' is turned off
|
|
% (hyperref) because \thepage is undefined.
|
|
% Hyperref stopped early
|
|
%
|
|
|
|
\usepackage{lmodern}
|
|
% Das Paket lmodern erspart die folgenden Warnungen:
|
|
% LaTeX Font Warning: Font shape `OT1/cmss/m/n' in size <4> not available
|
|
% (Font) size <5> substituted on input line 22.
|
|
% LaTeX Font Warning: Size substitutions with differences
|
|
% (Font) up to 1.0pt have occurred.
|
|
%
|
|
|
|
% Wenn \titel{\ldots} \author{\ldots} erst nach \begin{document} kommen,
|
|
% kommt folgende Warnung:
|
|
% Package hyperref Warning: Option `pdfauthor' has already been used,
|
|
% (hyperref) ...
|
|
% Daher steht es hier vor \begin{document}
|
|
|
|
\title[Anomaly Detection and AutoML]{Anomaly Detection and AutoML}
|
|
\author{Simon Kluettermann}
|
|
\date{\today}
|
|
|
|
|
|
\institute{ls9 tu Dortmund}
|
|
|
|
|
|
% Dadurch wird verhindert, dass die Navigationsleiste angezeigt wird.
|
|
\setbeamertemplate{navigation symbols}{}
|
|
|
|
% zusaetzlich ist das usepackage{beamerthemeshadow} eingebunden
|
|
\usepackage{beamerthemeshadow}
|
|
|
|
\hypersetup{pdfstartview={Fit}} % fits the presentation to the window when first displayed
|
|
|
|
\usepackage{appendixnumberbeamer}
|
|
\usepackage{listings}
|
|
|
|
|
|
\usetheme{CambridgeUS}
|
|
\usepackage{ngerman}
|
|
\usecolortheme{dolphin}
|
|
|
|
|
|
% \beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}
|
|
% sorgt dafuer das die Elemente die erst noch (zukuenftig) kommen
|
|
% nur schwach angedeutet erscheinen
|
|
%\beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}%here disabled
|
|
% klappt auch bei Tabellen, wenn teTeX verwendet wird\ldots
|
|
\renewcommand{\figurename}{}
|
|
|
|
\setbeamertemplate{footline}
|
|
{
|
|
\leavevmode%
|
|
\hbox{%
|
|
\begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
|
|
\usebeamerfont{author in head/foot}\insertshorttitle
|
|
\end{beamercolorbox}%
|
|
\begin{beamercolorbox}[wd=.25\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
|
|
\usebeamerfont{title in head/foot}\insertsection
|
|
\end{beamercolorbox}%
|
|
\begin{beamercolorbox}[wd=.3499\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}%
|
|
\usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
|
|
\hyperlink{toc}{\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}}
|
|
\end{beamercolorbox}}%
|
|
\vskip0pt%
|
|
}
|
|
|
|
\usepackage[absolute,overlay]{textpos}
|
|
\usepackage{graphicx}
|
|
|
|
\newcommand{\source}[1]{\begin{textblock*}{9cm}(0.1cm,8.9cm)
|
|
\begin{beamercolorbox}[ht=0.5cm,left]{framesource}
|
|
\usebeamerfont{framesource}\usebeamercolor[fg!66]{framesource} Source: {#1}
|
|
\end{beamercolorbox}
|
|
\end{textblock*}}
|
|
|
|
|
|
\begin{document}
|
|
|
|
|
|
|
|
%from file ../case2/data/000.txt
|
|
\begin{frame}[label=]
|
|
\frametitle{}
|
|
\begin{titlepage}
|
|
|
|
\centering
|
|
{\huge\bfseries \par}
|
|
\vspace{2cm}
|
|
{\LARGE\itshape Simon Kluettermann\par}
|
|
\vspace{1.5cm}
|
|
{\scshape\Large Master Thesis in Physics\par}
|
|
\vspace{0.2cm}
|
|
{\Large submitted to the \par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par}
|
|
\vspace{0.2cm}
|
|
{\Large \par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large RWTH Aachen University}
|
|
\vspace{1cm}
|
|
|
|
\vfill
|
|
{\scshape\Large Department of Physics\par}
|
|
\vspace{0.2cm}
|
|
{\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par}
|
|
\vspace{0.2cm}
|
|
{ \Large\par}
|
|
\vspace{0.2cm}
|
|
{\Large First Referee: Prof. Dr. Michael Kraemer \par}
|
|
{\Large Second Referee: Prof. Dr. Felix Kahlhoefer}
|
|
|
|
\vfill
|
|
|
|
% Bottom of the page
|
|
{\large November 2020 \par}
|
|
\end{titlepage}
|
|
\pagenumbering{roman}
|
|
\thispagestyle{empty}
|
|
\null
|
|
\newpage
|
|
\setcounter{page}{1}
|
|
\pagenumbering{arabic}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/001Anomaly Detection.txt
|
|
\begin{frame}[label=Anomaly Detection]
|
|
\frametitle{Anomaly Detection}
|
|
\begin{itemize}
|
|
|
|
\item Find strange (unexpected) samples.
|
|
|
|
\item $\Rightarrow$If a traffic light is constantly yellow, probably something broke
|
|
|
|
\item But this could happen in a lot of different ways
|
|
|
|
\item $\Rightarrow$Most likely the traffic light is just off. But it could also fluctuate quickly or start smoking
|
|
|
|
\item How to cover all possible anomalies?
|
|
|
|
\item $\Rightarrow$Unsupervised Machine Learning
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/002Unsupervised Machine Learning.txt
|
|
\begin{frame}[label=Unsupervised Machine Learning]
|
|
\frametitle{Unsupervised Machine Learning}
|
|
\begin{itemize}
|
|
|
|
\item Normal machine learning: Input - Label
|
|
|
|
\item Here: Only Input.
|
|
|
|
\item $\Rightarrow$Instead of classifying different types, try to understand your given dataset
|
|
|
|
\item Deviations from this understanding are anomalies
|
|
|
|
\begin{itemize}
|
|
|
|
\item x: training samples
|
|
|
|
\item tx: test samples
|
|
|
|
\item ty: test labels (is a certain sample an anomaly or not)
|
|
|
|
|
|
\end{itemize}
|
|
\item Useful: \emph{peak /global/cardio.npz}
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/003kNN.txt
|
|
\begin{frame}[label=kNN]
|
|
\frametitle{kNN}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item How to do this? Here one algorithm: kNN
|
|
|
|
\item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous)
|
|
|
|
\item Here: The anomaly score is the distance to the kth closest samples
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/03kNN/yanghuang 08.png}
|
|
\label{fig:prep03kNNyanghuang 08png}
|
|
\caption{[Yang, Huang 08]}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/004kNN.txt
|
|
\begin{frame}[label=kNN]
|
|
\frametitle{kNN}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item How to do this? Here one algorithm: kNN
|
|
|
|
\item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous)
|
|
|
|
\item Here: The anomaly score is the distance to the kth closest samples
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/04kNN/dist0.pdf}
|
|
\label{fig:prep04kNNdist0pdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/005.txt
|
|
\begin{frame}[label=]
|
|
\frametitle{}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.8\textwidth]{..//prep/05/dist0.pdf}
|
|
\label{fig:prep05dist0pdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/006AUC Score.txt
|
|
\begin{frame}[label=AUC Score]
|
|
\frametitle{AUC Score}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.47619047619047616\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/06AUC_Score/02confusion.png}
|
|
\label{fig:prep06AUC_Score02confusionpng}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.47619047619047616\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/06AUC_Score/01dist0.pdf}
|
|
\label{fig:prep06AUC_Score01dist0pdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/007AUC Score.txt
|
|
\begin{frame}[label=AUC Score]
|
|
\frametitle{AUC Score}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item Iterate every threshold
|
|
|
|
\item Plot fpr vs tpr
|
|
|
|
\item False Positive Rate
|
|
|
|
\begin{itemize}
|
|
|
|
\item $\frac{FP}{FP+TN}$
|
|
|
|
|
|
\end{itemize}
|
|
\item True Positive Rate
|
|
|
|
\begin{itemize}
|
|
|
|
\item $\frac{TP}{TP+FN}$
|
|
|
|
|
|
\end{itemize}
|
|
\item ROC-AUC: Integral of this curve!
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.8\textwidth]{..//prep/07AUC_Score/roc.pdf}
|
|
\label{fig:prep07AUC_Scorerocpdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/008AUC Score.txt
|
|
\begin{frame}[label=AUC Score]
|
|
\frametitle{AUC Score}
|
|
\begin{itemize}
|
|
|
|
\item calculcate with \emph{sklearn.metrics.roc\_auc\_score}
|
|
|
|
\item Higher AUC score$\Rightarrow$better
|
|
|
|
\item $AUC=1.0$$\Rightarrow$Perfect seperation
|
|
|
|
\item $AUC=0.5$$\Rightarrow$Random model
|
|
|
|
\item $AUC=0.0$$\Rightarrow$Inverse seperation (every anomaly is normal, and every normal sample is anomalous)
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/009AUC Scores.txt
|
|
\begin{frame}[label=AUC Scores]
|
|
\frametitle{AUC Scores}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/09AUC_Scores/students.png}
|
|
\label{fig:prep09AUC_Scoresstudentspng}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/010AutoML.txt
|
|
\begin{frame}[label=AutoML]
|
|
\frametitle{AutoML}
|
|
\begin{itemize}
|
|
|
|
\item But: We can beat this!
|
|
|
|
\item How? Hyperparameter
|
|
|
|
\begin{itemize}
|
|
|
|
\item Every algorithm has hyperparameter that control how it works
|
|
|
|
\item For example: k in kNN (number of close points considered)
|
|
|
|
|
|
\end{itemize}
|
|
\item Lets take the worst algorithm (kNN: $0.927$) and try to improve it
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/011Optimize.txt
|
|
\begin{frame}[label=Optimize]
|
|
\frametitle{Optimize}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/11Optimize/baseline.png}
|
|
\label{fig:prep11Optimizebaselinepng}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/012Optimize.txt
|
|
\begin{frame}[label=Optimize]
|
|
\frametitle{Optimize}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.7\textwidth]{..//prep/12Optimize/optimize.png}
|
|
\label{fig:prep12Optimizeoptimizepng}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/013flaml.txt
|
|
\begin{frame}[label=flaml]
|
|
\frametitle{flaml}
|
|
\begin{columns}[c] % align columns
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{itemize}
|
|
|
|
\item \emph{source folder/bin/activate}
|
|
|
|
\item \emph{pip install flaml}
|
|
|
|
|
|
\end{itemize}
|
|
\end{column}%
|
|
\hfill%
|
|
\begin{column}{0.48\textwidth}%.48
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/15flaml/forflaml.png}
|
|
\label{fig:prep15flamlforflamlpng}
|
|
\end{figure}
|
|
|
|
|
|
\end{column}%
|
|
\hfill%
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/014flaml.txt
|
|
\begin{frame}[label=flaml]
|
|
\frametitle{flaml}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{..//prep/16flaml/flaml.png}
|
|
\label{fig:prep16flamlflamlpng}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/015.txt
|
|
\begin{frame}[label=]
|
|
\frametitle{}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[width=0.7\textwidth]{..//prep/17/hist.pdf}
|
|
\label{fig:prep17histpdf}
|
|
\end{figure}
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
|
%from file ../case2/data/016Your Turn.txt
|
|
\begin{frame}[label=Your Turn]
|
|
\frametitle{Your Turn}
|
|
\begin{itemize}
|
|
|
|
\item Remember your last algorithm
|
|
|
|
\item Find its hyperparameters (Tip: pyod website)
|
|
|
|
\item Optimize your algorithm and give me a new AUC!
|
|
|
|
\item Bonus Question: Is there a problem with what we are doing?
|
|
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
|
|
|
|
\end{document}
|