Dionyssos commited on
Commit
df63ff0
·
1 Parent(s): dd7320e

del adhoc soundscape fading

Browse files
Files changed (3) hide show
  1. api.py +5 -16
  2. audiocraft/builders.py +2 -2
  3. live_demo.py +2 -1
api.py CHANGED
@@ -91,7 +91,6 @@ def overlay(x, soundscape=None):
91
  target_rate=24000)[0, :-250] # last samples have splash sounds DISCARD 25000 last samples
92
 
93
 
94
- k = background.shape[0]
95
 
96
 
97
 
@@ -100,21 +99,11 @@ def overlay(x, soundscape=None):
100
 
101
 
102
 
103
- hop = int(.99 * k) # only overlap 10%
104
- n_repeat = len(x) // hop
105
- total = np.zeros( hop * (n_repeat + 2)) # add some extra pad space for last frame to fit
106
 
107
- m = np.ones(k)
108
- overlap = k - hop
109
- m[hop:] = np.linspace(1, 0, overlap) # tril mask for avg sound in the interpolated hop
110
-
111
-
112
- for j in range(n_repeat):
113
- total[j*hop:j*hop + k] += m * background # the total is already smoothly fading due to the previous mask. Only new addition of signal needs to rise smoothly
114
- print((total < -1).sum(), (total > 1).sum(), 'OUTOF BOUNDS\n\n\n\n')
115
- # total = total.clip(-1, 1) # if too many signals were added on top of each other
116
- # print(total[40000:70000].tolist())
117
- print(np.logical_and(total > .1, total < .9).sum(), total.shape, 'ev')
118
 
119
  # less periodic
120
 
@@ -124,7 +113,7 @@ def overlay(x, soundscape=None):
124
  # amplify sounds full [-1,1]
125
 
126
  total /= np.abs(total).max() + 1e-7
127
- x = .4 * x + .6 * total[:len(x)]
128
 
129
  else:
130
 
 
91
  target_rate=24000)[0, :-250] # last samples have splash sounds DISCARD 25000 last samples
92
 
93
 
 
94
 
95
 
96
 
 
99
 
100
 
101
 
 
 
 
102
 
103
+
104
+ n_repeat = len(x) // background.shape[0] + 1
105
+
106
+ total = np.tile(background, n_repeat)
 
 
 
 
 
 
 
107
 
108
  # less periodic
109
 
 
113
  # amplify sounds full [-1,1]
114
 
115
  total /= np.abs(total).max() + 1e-7
116
+ x = .5 * x + .5 * total[:len(x)]
117
 
118
  else:
119
 
audiocraft/builders.py CHANGED
@@ -79,8 +79,8 @@ class AudioGen(nn.Module):
79
  conditions=attributes,
80
  max_gen_len=int(self.duration * self.frame_rate)) # [bs, 4, 37 * self.lm.n_draw]
81
  x = self.compression_model.decode(gen_tokens, None) #[bs, 1, 11840]
82
- # print('______________\nGENTOk 5', gen_tokens)
83
- print('GENAUD 5', x.sum(), x.shape)
84
 
85
  return x / x.abs().max(2, keepdims=True)[0] + 1e-7
86
 
 
79
  conditions=attributes,
80
  max_gen_len=int(self.duration * self.frame_rate)) # [bs, 4, 37 * self.lm.n_draw]
81
  x = self.compression_model.decode(gen_tokens, None) #[bs, 1, 11840]
82
+ # print('______________\nAudioGen Tokens', gen_tokens)
83
+
84
 
85
  return x / x.abs().max(2, keepdims=True)[0] + 1e-7
86
 
live_demo.py CHANGED
@@ -29,10 +29,11 @@ args.speed = 1.14
29
  os.system('cls' if os.name == 'nt' else 'clear')
30
  while True:
31
  _str = input("\n\n\n\nDescribe Any Sound: \n\n\n\n")
32
- args.soundscape = _str
33
 
34
  _str += 'Lorem ipsum dolor sit amet, consetetur elixir sed diam nonumy eirmod tempor invidunt labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Soutet clita kasd gubergren.'
35
 
 
36
  args.text = '_tmp.txt' # input -> .txt (implementation thought for audiobooks in API)
37
 
38
  with open(args.text, 'w') as f:
 
29
  os.system('cls' if os.name == 'nt' else 'clear')
30
  while True:
31
  _str = input("\n\n\n\nDescribe Any Sound: \n\n\n\n")
32
+
33
 
34
  _str += 'Lorem ipsum dolor sit amet, consetetur elixir sed diam nonumy eirmod tempor invidunt labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Soutet clita kasd gubergren.'
35
 
36
+ args.soundscape = _str
37
  args.text = '_tmp.txt' # input -> .txt (implementation thought for audiobooks in API)
38
 
39
  with open(args.text, 'w') as f: