1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
use std::sync::{Arc, RwLock};
use co::prelude::*;
use coblas::plugin::*;
use conn;
use num::traits::{NumCast, cast};
pub type ArcLock<T> = Arc<RwLock<T>>;
pub fn native_backend() -> Backend<Native> {
let framework = Native::new();
let hardwares = &framework.hardwares().to_vec();
let backend_config = BackendConfig::new(framework, hardwares);
Backend::new(backend_config).unwrap()
}
pub fn write_to_memory<T: NumCast + ::std::marker::Copy>(mem: &mut MemoryType, data: &[T]) {
write_to_memory_offset(mem, data, 0);
}
pub fn write_to_memory_offset<T: NumCast + ::std::marker::Copy>(mem: &mut MemoryType, data: &[T], offset: usize) {
match mem {
&mut MemoryType::Native(ref mut mem) => {
let mut mem_buffer = mem.as_mut_slice::<f32>();
for (index, datum) in data.iter().enumerate() {
mem_buffer[index + offset] = cast(*datum).unwrap();
}
},
#[cfg(any(feature = "opencl", feature = "cuda"))]
_ => {}
}
}
pub fn write_batch_sample<T: NumCast + ::std::marker::Copy>(tensor: &mut SharedTensor<f32>, data: &[T], i: usize) {
let native_backend = native_backend();
let batch_size = tensor.desc().size();
let sample_size = batch_size / tensor.desc()[0];
let _ = tensor.add_device(native_backend.device());
tensor.sync(native_backend.device()).unwrap();
write_to_memory_offset(tensor.get_mut(native_backend.device()).unwrap(), &data, i * sample_size);
}
pub fn native_scalar<T: NumCast + ::std::marker::Copy>(scalar: T) -> SharedTensor<T> {
let native = native_backend();
let mut shared_scalar = SharedTensor::<T>::new(native.device(), &vec![1]).unwrap();
write_to_memory(shared_scalar.get_mut(native.device()).unwrap(), &[scalar]);
shared_scalar
}
pub fn cast_vec_usize_to_i32(input: Vec<usize>) -> Vec<i32> {
let mut out = Vec::new();
for i in input.iter() {
out.push(*i as i32);
}
out
}
pub trait Axpby<F> : Axpy<F> + Scal<F> {
fn axpby(&self, a: &mut SharedTensor<F>, x: &mut SharedTensor<F>, b: &mut SharedTensor<F>, y: &mut SharedTensor<F>) -> Result<(), ::co::error::Error> {
try!(self.scal(b, y));
try!(self.axpy(a, x, y));
Ok(())
}
fn axpby_plain(&self, a: &SharedTensor<F>, x: &SharedTensor<F>, b: &SharedTensor<F>, y: &mut SharedTensor<F>) -> Result<(), ::co::error::Error> {
try!(self.scal_plain(b, y));
try!(self.axpy_plain(a, x, y));
Ok(())
}
}
impl<T: Axpy<f32> + Scal<f32>> Axpby<f32> for T {}
pub trait SolverOps<F> : LayerOps<F> + Axpby<F> + Dot<F> + Copy<F> {}
impl<T: LayerOps<f32> + Axpby<f32> + Dot<f32> + Copy<f32>> SolverOps<f32> for T {}
#[cfg(all(feature="cuda", not(feature="native")))]
pub trait LayerOps<F> : conn::Convolution<F>
+ conn::Pooling<F>
+ conn::Relu<F> + conn::ReluPointwise<F>
+ conn::Sigmoid<F> + conn::SigmoidPointwise<F>
+ conn::Tanh<F> + conn::TanhPointwise<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}
#[cfg(feature="native")]
pub trait LayerOps<F> : conn::Relu<F>
+ conn::Sigmoid<F>
+ conn::Tanh<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}
#[cfg(all(feature="cuda", not(feature="native")))]
impl<T: conn::Convolution<f32>
+ conn::Pooling<f32>
+ conn::Relu<f32> + conn::ReluPointwise<f32>
+ conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>
+ conn::Tanh<f32> + conn::TanhPointwise<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}
#[cfg(feature="native")]
impl<T: conn::Relu<f32>
+ conn::Sigmoid<f32>
+ conn::Tanh<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}