-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathliterature.tex
105 lines (79 loc) · 3.72 KB
/
literature.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
\documentclass{farlamp}
% latexmk -pvc -pdfxe -bibtex -interaction=nonstopmode -outdir=build literature.tex
\addbibresource{references.bib}
\DeclareBibliographyCategory{annotated}
\author{Richard Möhn}
\date{\today}
\title{Literature overview}
\addtitledatatopdf
\begin{document}
\maketitle
\tableofcontents
\section{To search}
\begin{itemize}
\item What has Paul written about RL-based IDA?
\item How are rewards determined in RL?
\item Since I mention IL, read something about IL?
\item Find something about how ML algorithms deal with faulty
data/outliers. – Noisy data with little repetition. – Mathematical
function learning with noise. – Detecting/classification with
mislabelled data (credits: Logan Smith).
\item \done\ See todos in SupAmp-ReAmp and Overfail2
\item \done\ \href{https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=supervising+strong+learners+by+amplifying+weak+experts&btnG=}{Search
backward from \textcite{CSASupAmp} on Google Scholar}
\item Something about surrogate modelling? It appears to be related with
distillation.
\end{itemize}
\section{Potential sources}
\begin{itemize}
\item Possibly relevant works citing \textcite{CSASupAmp}, according to
Google Scholar:
\begin{itemize}
\item \href{https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=supervising+strong+learners+by+amplifying+weak+experts&btnG=}{Backward search on Google Scholar}
\item \href{https://arxiv.org/abs/1906.08663}{Modeling AGI Safety Frameworks
with Causal Influence Diagrams}
\item \href{https://arxiv.org/abs/1906.10189}{Evolutionary
Computation and AI Safety: Research Problems Impeding Routine
and Safe Real-world Application of Evolution}
\item \href{https://www.mdpi.com/2504-2289/3/2/21}{Multiparty
Dynamics and Failure Modes for Machine Learning and Artificial
Intelligence}
\item \href{https://arxiv.org/abs/1906.01820}{Risks from Learned Optimization in Advanced Machine Learning Systems}
\end{itemize}
\end{itemize}
\section{To skim and decide}
\begin{itemize}
\item Resources from \textcite{CSASupAmp} that I've marked with a blue cross.
\item
\href{https://ai-alignment.com/semi-supervised-reinforcement-learning-cf7d5375197f}{Semi-supervised reinforcement learning}
\item \href{https://arxiv.org/abs/1811.07871}{Scalable agent alignment via
reward modeling: a research direction}
\end{itemize}
\section{To (re-)read}
\begin{itemize}
\item https://www.lesswrong.com/posts/fq7Ehb2oWwXtZic8S/reinforcement-learning-in-the-iterated-amplification
\item \cite{ChriREngP}
\item \cite{ChriThoRewE}
\end{itemize}
\section{Annotated bibliography}
\begin{displayquote}[{\cite[p. 102 f.]{CoR}}]
Often the assembling of an annotated bibliography is a distinct stage in a
research process […]. Each annotation is an opportunity to evaluate the
credibility of a source, summarize its argument, and explain its relevance
to your project.
[…] If you can't summarize your sources or explain their relevance, you are
likely not ready to write your paper.
\end{displayquote}
\annotitem{ChriRelAmp}
TODO: Copy summary from notes and clean up. Add relevance.
\annotitem{CSASupAmp}
TODO: Copy summary from notes and clean up. Add relevance.
\begin{FlushLeft}
\printbibliography[notcategory=annotated]
\end{FlushLeft}
\end{document}
% NEXT:
% - List more sources. – From from Google Scholar, actually from all the points
% above. They're not fully expanded yet.
% - For each source, decide when it makes sense to read it. Much of it only
% makes sense after I've learned more about ML.