thesis1/out/main.tex

586 lines
15 KiB
TeX

\UseRawInputEncoding
%\documentclass[hyperref={pdfpagelabels=false}]{beamer}
\documentclass[hyperref={pdfpagelabels=false},aspectratio=169]{beamer}
% Die Hyperref Option hyperref={pdfpagelabels=false} verhindert die Warnung:
% Package hyperref Warning: Option `pdfpagelabels' is turned off
% (hyperref) because \thepage is undefined.
% Hyperref stopped early
%
\usepackage{lmodern}
% Das Paket lmodern erspart die folgenden Warnungen:
% LaTeX Font Warning: Font shape `OT1/cmss/m/n' in size <4> not available
% (Font) size <5> substituted on input line 22.
% LaTeX Font Warning: Size substitutions with differences
% (Font) up to 1.0pt have occurred.
%
% Wenn \titel{\ldots} \author{\ldots} erst nach \begin{document} kommen,
% kommt folgende Warnung:
% Package hyperref Warning: Option `pdfauthor' has already been used,
% (hyperref) ...
% Daher steht es hier vor \begin{document}
\title[Thesis Simon]{Open Thesis Topics}
\author{Simon.Kluettermann@cs.tu-dortmund.de}
\date{\today}
\institute{ls9 tu Dortmund}
% Dadurch wird verhindert, dass die Navigationsleiste angezeigt wird.
\setbeamertemplate{navigation symbols}{}
% zusaetzlich ist das usepackage{beamerthemeshadow} eingebunden
\usepackage{beamerthemeshadow}
\hypersetup{pdfstartview={Fit}} % fits the presentation to the window when first displayed
\usepackage{appendixnumberbeamer}
\usepackage{listings}
\usetheme{CambridgeUS}
\usepackage{ngerman}
\usecolortheme{dolphin}
% \beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}
% sorgt dafuer das die Elemente die erst noch (zukuenftig) kommen
% nur schwach angedeutet erscheinen
%\beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}%here disabled
% klappt auch bei Tabellen, wenn teTeX verwendet wird\ldots
\renewcommand{\figurename}{}
\setbeamertemplate{footline}
{
\leavevmode%
\hbox{%
\begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
\usebeamerfont{author in head/foot}\insertshorttitle
\end{beamercolorbox}%
\begin{beamercolorbox}[wd=.25\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
\usebeamerfont{title in head/foot}\insertsection
\end{beamercolorbox}%
\begin{beamercolorbox}[wd=.3499\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}%
\usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
\hyperlink{toc}{\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}}
\end{beamercolorbox}}%
\vskip0pt%
}
\usepackage[absolute,overlay]{textpos}
\usepackage{graphicx}
\newcommand{\source}[1]{\begin{textblock*}{9cm}(0.1cm,8.9cm)
\begin{beamercolorbox}[ht=0.5cm,left]{framesource}
\usebeamerfont{framesource}\usebeamercolor[fg!66]{framesource} Source: {#1}
\end{beamercolorbox}
\end{textblock*}}
\begin{document}
%from file ../knn1//data/000.txt
\begin{frame}[label=]
\frametitle{}
\begin{titlepage}
\centering
{\huge\bfseries \par}
\vspace{2cm}
{\LARGE\itshape Simon Kluettermann\par}
\vspace{1.5cm}
{\scshape\Large Master Thesis in Physics\par}
\vspace{0.2cm}
{\Large submitted to the \par}
\vspace{0.2cm}
{\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par}
\vspace{0.2cm}
{\Large \par}
\vspace{0.2cm}
{\scshape\Large RWTH Aachen University}
\vspace{1cm}
\vfill
{\scshape\Large Department of Physics\par}
\vspace{0.2cm}
{\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par}
\vspace{0.2cm}
{ \Large\par}
\vspace{0.2cm}
{\Large First Referee: Prof. Dr. Michael Kraemer \par}
{\Large Second Referee: Prof. Dr. Felix Kahlhoefer}
\vfill
% Bottom of the page
{\large November 2020 \par}
\end{titlepage}
\pagenumbering{roman}
\thispagestyle{empty}
\null
\newpage
\setcounter{page}{1}
\pagenumbering{arabic}
\end{frame}
%from file ../knn1//data/001Thesis@ls9.txt
\begin{frame}[label=Thesis@ls9]
\frametitle{Thesis@ls9}
\begin{itemize}
\item First: Find a topic and a supervisor
\item Work one month on this, to make sure
\begin{itemize}
\item you still like your topic
\item and you are sure you can handle the topic
\end{itemize}
\item then short presentation in front of our chair (15min, relaxed)
\begin{itemize}
\item get some feedback/suggestions
\end{itemize}
\item afterwards register the thesis
\begin{itemize}
\item (different for CS/DS students)
\end{itemize}
\item Problem: We are not able to supervise more than 2 students at the same time (CS faculty rules)
\end{itemize}
\end{frame}
%from file ../knn1//data/002Today.txt
\begin{frame}[label=Today]
\frametitle{Today}
\begin{itemize}
\item First: A short summary of each Topic
\item Then time for questions/Talk with your supervisor about each topic that sounds interesting
\item Your own topics are always welcome;)
\end{itemize}
\end{frame}
%from file ../knn1//data/003Anomaly Detection.txt
\begin{frame}[label=Anomaly Detection]
\frametitle{Anomaly Detection}
\begin{columns}[c] % align columns
\begin{column}{0.48\textwidth}%.48
\begin{itemize}
\item Im working on Anomaly Detection
\item That means characterising an often very complex distributions, to find events that dont match the expected distribution
\end{itemize}
\end{column}%
\hfill%
\begin{column}{0.48\textwidth}%.48
\begin{figure}[H]
\centering
\includegraphics[width=0.9\textwidth]{../prep/03Anomaly_Detection/circle.pdf}
\label{fig:prep03Anomaly_Detectioncirclepdf}
\end{figure}
\end{column}%
\hfill%
\end{columns}
\end{frame}
%from file ../knn1//data/004knn.txt
\begin{frame}[label=knn]
\frametitle{knn}
\begin{itemize}
\item kNN algorithm can also be used for AD
\item if the k closest point is further away, a sample is considered more anomalous
\item $r=\frac{k}{2N\cdot pdf}$
\item Powerful method, as it can model the pdf directly
\end{itemize}
\end{frame}
%from file ../knn1//data/005Better knn.txt
\begin{frame}[label=Better knn]
\frametitle{Better knn}
\begin{itemize}
\item The model (mostly) ignores every known sample except one
\item So there are extensions
\item $avg=\frac{1}{N} \sum_i knn_i(x)$
\item $wavg=\frac{1}{N} \sum_i \frac{knn_i(x)}{i}$
\end{itemize}
\end{frame}
%from file ../knn1//data/006Comparison.txt
\begin{frame}[label=Comparison]
%\frametitle{Comparison}
\begin{tabular}{llllll}
\hline
Dataset & wavg & avg & 1 & 3 & 5 \\
\hline
$vertebral$ & $\textbf{0.4506}$ & $\textbf{0.4506}$ & $\textbf{0.4667}$ & $\textbf{0.4667}$ & $\textbf{0.45}$ \\
... & & & & & \\
$thyroid$ & $\textbf{0.9138}$ & $\textbf{0.9151}$ & $\textbf{0.8763}$ & $\textbf{0.9086}$ & $\textbf{0.914}$ \\
$Iris\_setosa$ & $\textbf{0.9333}$ & $\textbf{0.9333}$ & $\textbf{0.9333}$ & $\textbf{0.9}$ & $\textbf{0.9}$ \\
$breastw$ & $\textbf{0.9361}$ & $\textbf{0.9361}$ & $\textbf{0.9211}$ & $\textbf{0.9248}$ & $\textbf{0.9286}$ \\
$wine$ & $\textbf{0.95}$ & $\textbf{0.95}$ & $\textbf{0.9}$ & $\textbf{0.95}$ & $\textbf{0.95}$ \\
$pendigits$ & $\textbf{0.9487}$ & $\textbf{0.9487}$ & $\textbf{0.9391}$ & $\textbf{0.9295}$ & $\textbf{0.9359}$ \\
$segment$ & $\textbf{0.9747}$ & $\textbf{0.9747}$ & $\textbf{0.9495}$ & $\textbf{0.9545}$ & $\textbf{0.9394}$ \\
$banknote-authentication$ & $\textbf{0.9777}$ & $\textbf{0.9776}$ & $\textbf{0.9408}$ & $\textbf{0.943}$ & $\textbf{0.9583}$ \\
$vowels$ & $\textbf{0.9998}$ & $\textbf{0.9972}$ & $\textbf{0.99}$ & $\textbf{0.92}$ & $\textbf{0.93}$ \\
$Ecoli$ & $\textbf{1.0}$ & $\textbf{1.0}$ & $\textbf{0.9}$ & $\textbf{1.0}$ & $\textbf{1.0}$ \\
$$ & $$ & $$ & $$ & $$ & $$ \\
$Average$ & $\textbf{0.7528} $ & $\textbf{0.7520} $ & $0.7325 $ & $0.7229 $ & $0.7157 $ \\
\hline
\end{tabular}
\end{frame}
%from file ../knn1//data/007What to do?.txt
\begin{frame}[label=What to do?]
\frametitle{What to do?}
\begin{itemize}
\item Evaluation as anomaly detector is complicated
\begin{itemize}
\item Requires known anomalies
\end{itemize}
\item $\Rightarrow$So evaluate as density estimator
\begin{itemize}
\item Does not require anomalies
\item Allows generating infinite amounts of training data
\end{itemize}
\end{itemize}
\end{frame}
%from file ../knn1//data/008What to do?.txt
\begin{frame}[label=What to do?]
\frametitle{What to do?}
\begin{itemize}
\item Collect Extensions of the oc-knn algorithm
\item Define some distance measure to a known pdf
\item Generate random datapoints following the pdf
\item Evaluate which algorithm finds the pdf the best
\end{itemize}
\end{frame}
%from file ../knn1//data/009Requirements.txt
\begin{frame}[label=Requirements]
\frametitle{Requirements}
\begin{itemize}
\item Knowledge of python ( sum([i for i in range(5) if i\%2]) )
\begin{itemize}
\item Ideally incl numpy
\end{itemize}
\item Basic university level Math (you could argue that $r_k \propto \frac{k}{pdf}$)
\item Ideally some experience working on a ssh server
\item $\Rightarrow$Good as a Bachelor Thesis
\item For a Master Thesis, I would extend this a bit (Could you also find $k$?)
\end{itemize}
\end{frame}
%from file ../knn1//data/010Normalising Flows.txt
\begin{frame}[label=Normalising Flows]
\frametitle{Normalising Flows}
\begin{itemize}
\item Deep Learning Method, in which the output is normalised
\item $\int f(x) dx=1 \; \forall f(x)$
\item Can be used to estimate probability density functions
\item $\Rightarrow$Thus useful for AD
\item $\int f(h(x)) \|\frac{\delta x}{\delta h}\| dh=1 \; \forall h(x)$
\end{itemize}
\end{frame}
%from file ../knn1//data/011Graph Normalising Flows.txt
\begin{frame}[label=Graph Normalising Flows]
\frametitle{Graph Normalising Flows}
\begin{itemize}
\item How to apply this to graphs?
\item One Paper (Liu 2019) uses two NN:
\item Autoencoder graph$\Rightarrow$vector
\item NF on vector data
\item which is fine, but also not really graph specific
\item No interaction between encoding and transformation
\end{itemize}
\end{frame}
%from file ../knn1//data/012Graph Normalising Flows.txt
\begin{frame}[label=Graph Normalising Flows]
\frametitle{Graph Normalising Flows}
\begin{itemize}
\item So why not do this directly?
\item $\Rightarrow$Requires differentiating a graph
\item Why not use only one Network?
\item Graph$\Rightarrow$Vector$\Rightarrow$pdf
\item $\Rightarrow$Finds trivial solution, as $<pdf> \propto \frac{1}{\sigma_{Vector}}$
\item So regularise the standart deviation of the vector space!
\begin{itemize}
\item Interplay between encoding and NF
\item Could also be useful for highdim data
\end{itemize}
\end{itemize}
\end{frame}
%from file ../knn1//data/013Requirements.txt
\begin{frame}[label=Requirements]
\frametitle{Requirements}
\begin{itemize}
\item Proficient in python ( [i for i in range(1,N) if not [j for j in range(2,i) if not i\%j]] )
\begin{itemize}
\item Ideally incl numpy, tensorflow, keras
\end{itemize}
\item Some deep learning experience
\item University level math (google Cholesky Decomposition. Why is this useful for NF?)
\item Ideally some experience working on a ssh server
\item A bit more challenging$\Rightarrow$Better as a Master thesis
\item (Still we would start very slowly of course)
\end{itemize}
\end{frame}
%from file ../knn1//data/014Old Thesis Sina.txt
\begin{frame}[label=Old Thesis Sina]
\frametitle{Sina}
\begin{columns}[c] % align columns
\begin{column}{0.48\textwidth}%.48
\begin{itemize}
\item Isolation Forest: Different Anomaly Detection Algorithm
\item Problem: Isolation Forests dont work on categorical data
\item $\Rightarrow$Extend them to categorical data
\end{itemize}
\end{column}%
\hfill%
\begin{column}{0.48\textwidth}%.48
\begin{figure}[H]
\centering
\includegraphics[width=0.9\textwidth]{../prep/20Old_Thesis_Sina/Bildschirmfoto vom 2022-09-26 16-22-30.png}
\label{fig:prep20Old_Thesis_SinaBildschirmfoto vom 2022-09-26 16-22-30png}
\end{figure}
\end{column}%
\hfill%
\end{columns}
\end{frame}
%from file ../knn1//data/015Old Thesis Britta.txt
\begin{frame}[label=Old Thesis Britta]
\frametitle{Britta}
\begin{columns}[c] % align columns
\begin{column}{0.58\textwidth}%.48
\begin{itemize}
\item Reidentification: Find known objects in new images
\item Task: Find if two images of pallet blocks are of the same pallet block
\item Use AD to represent the pallet blocks
\end{itemize}
\end{column}%
\hfill%
\begin{column}{0.38\textwidth}%.48
\begin{figure}[H]
\centering
\includegraphics[width=0.9\textwidth]{../prep/21Old_Thesis_Britta/Bildschirmfoto vom 2022-09-26 16-23-26.png}
\label{fig:prep21Old_Thesis_BrittaBildschirmfoto vom 2022-09-26 16-23-26png}
\end{figure}
\end{column}%
\hfill%
\end{columns}
\end{frame}
%from file ../knn1//data/016Old Thesis Hsin Ping.txt
\begin{frame}[label=Old Thesis Hsin Ping]
\frametitle{Hsin Ping}
\begin{columns}[c] % align columns
\begin{column}{0.48\textwidth}%.48
\begin{itemize}
\item Ensemble: Combination of multiple models
\item Task: Explain the prediction of a model using the ensemble structure
\end{itemize}
\end{column}%
\hfill%
\begin{column}{0.48\textwidth}%.48
\begin{figure}[H]
\centering
\includegraphics[width=0.9\textwidth]{../prep/22Old_Thesis_Hsin_Ping/Bildschirmfoto vom 2022-09-26 16-24-14.png}
\label{fig:prep22Old_Thesis_Hsin_PingBildschirmfoto vom 2022-09-26 16-24-14png}
\end{figure}
\end{column}%
\hfill%
\end{columns}
\end{frame}
%from file ../knn1//data/017Old Thesis Nikitha.txt
\begin{frame}[label=Old Thesis Nikitha]
\frametitle{Nikitha}
\begin{columns}[c] % align columns
\begin{column}{0.48\textwidth}%.48
\begin{itemize}
\item Task: Explore a new kind of ensemble
\item Instead of many uncorrelated models, let the models interact during training
\end{itemize}
\end{column}%
\hfill%
\begin{column}{0.48\textwidth}%.48
\begin{figure}[H]
\centering
\includegraphics[width=0.9\textwidth]{../prep/23Old_Thesis_Nikitha/Bildschirmfoto vom 2022-09-26 16-25-06.png}
\label{fig:prep23Old_Thesis_NikithaBildschirmfoto vom 2022-09-26 16-25-06png}
\end{figure}
\end{column}%
\hfill%
\end{columns}
\end{frame}
\begin{frame}
Questions?
\end{frame}
\end{document}