del adhoc soundscape fading
Browse files- api.py +5 -16
- audiocraft/builders.py +2 -2
- live_demo.py +2 -1
api.py
CHANGED
@@ -91,7 +91,6 @@ def overlay(x, soundscape=None):
|
|
91 |
target_rate=24000)[0, :-250] # last samples have splash sounds DISCARD 25000 last samples
|
92 |
|
93 |
|
94 |
-
k = background.shape[0]
|
95 |
|
96 |
|
97 |
|
@@ -100,21 +99,11 @@ def overlay(x, soundscape=None):
|
|
100 |
|
101 |
|
102 |
|
103 |
-
hop = int(.99 * k) # only overlap 10%
|
104 |
-
n_repeat = len(x) // hop
|
105 |
-
total = np.zeros( hop * (n_repeat + 2)) # add some extra pad space for last frame to fit
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
for j in range(n_repeat):
|
113 |
-
total[j*hop:j*hop + k] += m * background # the total is already smoothly fading due to the previous mask. Only new addition of signal needs to rise smoothly
|
114 |
-
print((total < -1).sum(), (total > 1).sum(), 'OUTOF BOUNDS\n\n\n\n')
|
115 |
-
# total = total.clip(-1, 1) # if too many signals were added on top of each other
|
116 |
-
# print(total[40000:70000].tolist())
|
117 |
-
print(np.logical_and(total > .1, total < .9).sum(), total.shape, 'ev')
|
118 |
|
119 |
# less periodic
|
120 |
|
@@ -124,7 +113,7 @@ def overlay(x, soundscape=None):
|
|
124 |
# amplify sounds full [-1,1]
|
125 |
|
126 |
total /= np.abs(total).max() + 1e-7
|
127 |
-
x = .
|
128 |
|
129 |
else:
|
130 |
|
|
|
91 |
target_rate=24000)[0, :-250] # last samples have splash sounds DISCARD 25000 last samples
|
92 |
|
93 |
|
|
|
94 |
|
95 |
|
96 |
|
|
|
99 |
|
100 |
|
101 |
|
|
|
|
|
|
|
102 |
|
103 |
+
|
104 |
+
n_repeat = len(x) // background.shape[0] + 1
|
105 |
+
|
106 |
+
total = np.tile(background, n_repeat)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
# less periodic
|
109 |
|
|
|
113 |
# amplify sounds full [-1,1]
|
114 |
|
115 |
total /= np.abs(total).max() + 1e-7
|
116 |
+
x = .5 * x + .5 * total[:len(x)]
|
117 |
|
118 |
else:
|
119 |
|
audiocraft/builders.py
CHANGED
@@ -79,8 +79,8 @@ class AudioGen(nn.Module):
|
|
79 |
conditions=attributes,
|
80 |
max_gen_len=int(self.duration * self.frame_rate)) # [bs, 4, 37 * self.lm.n_draw]
|
81 |
x = self.compression_model.decode(gen_tokens, None) #[bs, 1, 11840]
|
82 |
-
# print('______________\
|
83 |
-
|
84 |
|
85 |
return x / x.abs().max(2, keepdims=True)[0] + 1e-7
|
86 |
|
|
|
79 |
conditions=attributes,
|
80 |
max_gen_len=int(self.duration * self.frame_rate)) # [bs, 4, 37 * self.lm.n_draw]
|
81 |
x = self.compression_model.decode(gen_tokens, None) #[bs, 1, 11840]
|
82 |
+
# print('______________\nAudioGen Tokens', gen_tokens)
|
83 |
+
|
84 |
|
85 |
return x / x.abs().max(2, keepdims=True)[0] + 1e-7
|
86 |
|
live_demo.py
CHANGED
@@ -29,10 +29,11 @@ args.speed = 1.14
|
|
29 |
os.system('cls' if os.name == 'nt' else 'clear')
|
30 |
while True:
|
31 |
_str = input("\n\n\n\nDescribe Any Sound: \n\n\n\n")
|
32 |
-
|
33 |
|
34 |
_str += 'Lorem ipsum dolor sit amet, consetetur elixir sed diam nonumy eirmod tempor invidunt labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Soutet clita kasd gubergren.'
|
35 |
|
|
|
36 |
args.text = '_tmp.txt' # input -> .txt (implementation thought for audiobooks in API)
|
37 |
|
38 |
with open(args.text, 'w') as f:
|
|
|
29 |
os.system('cls' if os.name == 'nt' else 'clear')
|
30 |
while True:
|
31 |
_str = input("\n\n\n\nDescribe Any Sound: \n\n\n\n")
|
32 |
+
|
33 |
|
34 |
_str += 'Lorem ipsum dolor sit amet, consetetur elixir sed diam nonumy eirmod tempor invidunt labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Soutet clita kasd gubergren.'
|
35 |
|
36 |
+
args.soundscape = _str
|
37 |
args.text = '_tmp.txt' # input -> .txt (implementation thought for audiobooks in API)
|
38 |
|
39 |
with open(args.text, 'w') as f:
|